From 88ba801b3ff684dfeb99b447820689ce20143ca0 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Tue, 4 Mar 2025 15:34:40 -0600 Subject: [PATCH 01/79] Add a validated field for mounting an ephemeral volume Issue: PGO-2271 --- ...ator.crunchydata.com_postgresclusters.yaml | 212 ++++++++++++++++++ .../controller/postgrescluster/instance.go | 2 +- internal/postgres/config.go | 3 + internal/postgres/reconcile.go | 39 +++- internal/postgres/reconcile_test.go | 132 ++++++++--- .../v1beta1/postgrescluster_types.go | 10 + .../v1beta1/zz_generated.deepcopy.go | 24 ++ 7 files changed, 378 insertions(+), 44 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index a116a6b8b3..d0891d05ba 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -11041,6 +11041,218 @@ spec: - whenUnsatisfiable type: object type: array + volumes: + properties: + temp: + description: |- + An ephemeral volume for temporary files. + More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over volumes + to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to + the PersistentVolume backing this claim. + type: string + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: missing accessModes + rule: 0 < size(self.accessModes) + - message: missing storage request + rule: has(self.resources.requests.storage) + type: object walVolumeClaimSpec: description: |- Defines a separate PersistentVolumeClaim for PostgreSQL's write-ahead log. diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index d6fc6158e8..4ed6e79f24 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1188,7 +1188,7 @@ func (r *Reconciler) reconcileInstance( ctx, cluster, spec, primaryCertificate, replicationCertSecretProjection(clusterReplicationSecret), postgresDataVolume, postgresWALVolume, tablespaceVolumes, - &instance.Spec.Template.Spec) + &instance.Spec.Template) if backupsSpecFound { addPGBackRestToInstancePodSpec( diff --git a/internal/postgres/config.go b/internal/postgres/config.go index b3102b74dc..a478c0e72b 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -58,6 +58,9 @@ safelink() ( // dataMountPath is where to mount the main data volume. tablespaceMountPath = "/tablespaces" + // tmpMountPath is where to mount the optional ephemeral volume. + tmpMountPath = "/pgtmp" + // walMountPath is where to mount the optional WAL volume. walMountPath = "/pgwal" diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index fda5229792..5041140b0d 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -32,6 +32,11 @@ func TablespaceVolumeMount(tablespaceName string) corev1.VolumeMount { return corev1.VolumeMount{Name: "tablespace-" + tablespaceName, MountPath: tablespaceMountPath + "/" + tablespaceName} } +// TempVolumeMount returns the name and mount path of the ephemeral volume. +func TempVolumeMount() corev1.VolumeMount { + return corev1.VolumeMount{Name: "postgres-temp", MountPath: tmpMountPath} +} + // WALVolumeMount returns the name and mount path of the PostgreSQL WAL volume. func WALVolumeMount() corev1.VolumeMount { return corev1.VolumeMount{Name: "postgres-wal", MountPath: walMountPath} @@ -63,7 +68,7 @@ func InstancePod(ctx context.Context, inClusterCertificates, inClientCertificates *corev1.SecretProjection, inDataVolume, inWALVolume *corev1.PersistentVolumeClaim, inTablespaceVolumes []*corev1.PersistentVolumeClaim, - outInstancePod *corev1.PodSpec, + outInstancePod *corev1.PodTemplateSpec, ) { certVolumeMount := corev1.VolumeMount{ Name: naming.CertVolume, @@ -207,7 +212,7 @@ func InstancePod(ctx context.Context, VolumeMounts: []corev1.VolumeMount{certVolumeMount, dataVolumeMount}, } - outInstancePod.Volumes = []corev1.Volume{ + outInstancePod.Spec.Volumes = []corev1.Volume{ certVolume, dataVolume, downwardAPIVolume, @@ -227,7 +232,7 @@ func InstancePod(ctx context.Context, }, }, } - outInstancePod.Volumes = append(outInstancePod.Volumes, tablespaceVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, tablespaceVolume) container.VolumeMounts = append(container.VolumeMounts, tablespaceVolumeMount) startup.VolumeMounts = append(startup.VolumeMounts, tablespaceVolumeMount) } @@ -239,7 +244,7 @@ func InstancePod(ctx context.Context, Sources: append([]corev1.VolumeProjection{}, inCluster.Spec.Config.Files...), } container.VolumeMounts = append(container.VolumeMounts, additionalConfigVolumeMount) - outInstancePod.Volumes = append(outInstancePod.Volumes, additionalConfigVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, additionalConfigVolume) } // Mount the WAL PVC whenever it exists. The startup command will move WAL @@ -258,19 +263,37 @@ func InstancePod(ctx context.Context, container.VolumeMounts = append(container.VolumeMounts, walVolumeMount) startup.VolumeMounts = append(startup.VolumeMounts, walVolumeMount) - outInstancePod.Volumes = append(outInstancePod.Volumes, walVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, walVolume) + } + + // Mount an ephemeral volume, if specified. + if inInstanceSpec.Volumes != nil && inInstanceSpec.Volumes.Temp != nil { + tmpVolumeMount := TempVolumeMount() + tmpVolume := corev1.Volume{Name: tmpVolumeMount.Name} + tmpVolume.Ephemeral = &corev1.EphemeralVolumeSource{ + VolumeClaimTemplate: &corev1.PersistentVolumeClaimTemplate{ + Spec: inInstanceSpec.Volumes.Temp.AsPersistentVolumeClaimSpec(), + }, + } + + // Create the PVC with the same labels and annotations as the pod. + tmpVolume.Ephemeral.VolumeClaimTemplate.Annotations = outInstancePod.Annotations + tmpVolume.Ephemeral.VolumeClaimTemplate.Labels = outInstancePod.Labels + + container.VolumeMounts = append(container.VolumeMounts, tmpVolumeMount) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, tmpVolume) } - outInstancePod.Containers = []corev1.Container{container, reloader} + outInstancePod.Spec.Containers = []corev1.Container{container, reloader} // If the InstanceSidecars feature gate is enabled and instance sidecars are // defined, add the defined container to the Pod. if feature.Enabled(ctx, feature.InstanceSidecars) && inInstanceSpec.Containers != nil { - outInstancePod.Containers = append(outInstancePod.Containers, inInstanceSpec.Containers...) + outInstancePod.Spec.Containers = append(outInstancePod.Spec.Containers, inInstanceSpec.Containers...) } - outInstancePod.InitContainers = []corev1.Container{startup} + outInstancePod.Spec.InitContainers = []corev1.Container{startup} } // PodSecurityContext returns a v1.PodSecurityContext for cluster that can write diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index a36e3c5368..9903afb97c 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -115,11 +115,11 @@ func TestInstancePod(t *testing.T) { } // without WAL volume nor WAL volume spec - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Assert(t, cmp.MarshalMatches(pod, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec, ` containers: - env: - name: PGDATA @@ -384,15 +384,15 @@ volumes: walVolume := new(corev1.PersistentVolumeClaim) walVolume.Name = "walvol" - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) // Container has all mountPaths, including downwardAPI - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -402,19 +402,19 @@ volumes: name: database-containerinfo readOnly: true - mountPath: /pgwal - name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata name: postgres-data - mountPath: /pgwal - name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.Volumes, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Volumes, ` - name: cert-volume projected: defaultMode: 384 @@ -475,7 +475,7 @@ volumes: `), "expected WAL volume") // Startup moves WAL files to data volume. - assert.DeepEqual(t, pod.InitContainers[0].Command[4:], + assert.DeepEqual(t, pod.Spec.InitContainers[0].Command[4:], []string{"startup", "11", "/pgdata/pg11_wal"}) }) @@ -485,16 +485,16 @@ volumes: files: [{ secret: { name: keytab } }], }`) - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, clusterWithConfig, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) // Container has all mountPaths, including downwardAPI, // and the postgres-config - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -505,15 +505,15 @@ volumes: readOnly: true - mountPath: /etc/postgres name: postgres-config - readOnly: true`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + readOnly: true`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI and additionalConfig - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata - name: postgres-data`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-data`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) }) t.Run("WithCustomSidecarContainer", func(t *testing.T) { @@ -526,7 +526,7 @@ volumes: InstancePod(ctx, cluster, sidecarInstance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Equal(t, len(pod.Containers), 2, "expected 2 containers in Pod, got %d", len(pod.Containers)) + assert.Equal(t, len(pod.Spec.Containers), 2, "expected 2 containers in Pod") }) t.Run("SidecarEnabled", func(t *testing.T) { @@ -539,11 +539,11 @@ volumes: InstancePod(ctx, cluster, sidecarInstance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Equal(t, len(pod.Containers), 3, "expected 3 containers in Pod, got %d", len(pod.Containers)) + assert.Equal(t, len(pod.Spec.Containers), 3, "expected 3 containers in Pod") var found bool - for i := range pod.Containers { - if pod.Containers[i].Name == "customsidecar1" { + for i := range pod.Spec.Containers { + if pod.Spec.Containers[i].Name == "customsidecar1" { found = true break } @@ -576,7 +576,7 @@ volumes: InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, pod) - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -588,10 +588,10 @@ volumes: - mountPath: /tablespaces/castle name: tablespace-castle - mountPath: /tablespaces/trial - name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Containers[0].Name) + name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI and additionalConfig - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -600,7 +600,7 @@ volumes: - mountPath: /tablespaces/castle name: tablespace-castle - mountPath: /tablespaces/trial - name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.InitContainers[0].Name) + name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Spec.InitContainers[0].Name) }) t.Run("WithWALVolumeWithWALVolumeSpec", func(t *testing.T) { @@ -610,14 +610,14 @@ volumes: instance := new(v1beta1.PostgresInstanceSetSpec) instance.WALVolumeClaimSpec = new(v1beta1.VolumeClaimSpec) - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -627,18 +627,18 @@ volumes: name: database-containerinfo readOnly: true - mountPath: /pgwal - name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata name: postgres-data - mountPath: /pgwal - name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.Volumes, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Volumes, ` - name: cert-volume projected: defaultMode: 384 @@ -699,9 +699,71 @@ volumes: `), "expected WAL volume") // Startup moves WAL files to WAL volume. - assert.DeepEqual(t, pod.InitContainers[0].Command[4:], + assert.DeepEqual(t, pod.Spec.InitContainers[0].Command[4:], []string{"startup", "11", "/pgwal/pg11_wal"}) }) + + t.Run("TempVolume", func(t *testing.T) { + instance := new(v1beta1.PostgresInstanceSetSpec) + require.UnmarshalInto(t, &instance, `{ + volumes: { temp: { + resources: { requests: { storage: 99Mi } }, + storageClassName: somesuch, + } }, + }`) + + pod := new(corev1.PodTemplateSpec) + InstancePod(ctx, cluster, instance, + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, cmp.MarshalContains(pod.Spec.Containers[0].VolumeMounts, ` +- mountPath: /pgtmp + name: postgres-temp +`), "expected temp mount in %q container", pod.Spec.Containers[0].Name) + + // NOTE: `creationTimestamp: null` appears in the resulting pod, + // but it does not affect the PVC or reconciliation events; + // possibly https://pr.k8s.io/100032 + assert.Assert(t, cmp.MarshalContains(pod.Spec.Volumes, ` +- ephemeral: + volumeClaimTemplate: + metadata: + creationTimestamp: null + spec: + resources: + requests: + storage: 99Mi + storageClassName: somesuch + name: postgres-temp +`), "expected definition in the pod") + + t.Run("Metadata", func(t *testing.T) { + annotated := pod.DeepCopy() + annotated.Annotations = map[string]string{"n1": "etc"} + annotated.Labels = map[string]string{"gg": "asdf"} + + InstancePod(ctx, cluster, instance, + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, annotated) + + assert.Assert(t, cmp.MarshalContains(annotated.Spec.Volumes, ` +- ephemeral: + volumeClaimTemplate: + metadata: + annotations: + n1: etc + creationTimestamp: null + labels: + gg: asdf + spec: + resources: + requests: + storage: 99Mi + storageClassName: somesuch + name: postgres-temp +`), "expected definition in the pod") + }) + }) } func TestPodSecurityContext(t *testing.T) { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 7ee966d211..4d3be247fc 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -519,6 +519,16 @@ type PostgresInstanceSetSpec struct { // +listMapKey=name // +optional TablespaceVolumes []TablespaceVolume `json:"tablespaceVolumes,omitempty"` + + Volumes *PostgresVolumesSpec `json:"volumes,omitempty"` +} + +type PostgresVolumesSpec struct { + // An ephemeral volume for temporary files. + // More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes + // --- + // +optional + Temp *VolumeClaimSpec `json:"temp,omitempty"` } type TablespaceVolume struct { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 189eebdd23..233534d39f 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -2318,6 +2318,11 @@ func (in *PostgresInstanceSetSpec) DeepCopyInto(out *PostgresInstanceSetSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Volumes != nil { + in, out := &in.Volumes, &out.Volumes + *out = new(PostgresVolumesSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresInstanceSetSpec. @@ -2464,6 +2469,25 @@ func (in *PostgresUserSpec) DeepCopy() *PostgresUserSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresVolumesSpec) DeepCopyInto(out *PostgresVolumesSpec) { + *out = *in + if in.Temp != nil { + in, out := &in.Temp, &out.Temp + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresVolumesSpec. +func (in *PostgresVolumesSpec) DeepCopy() *PostgresVolumesSpec { + if in == nil { + return nil + } + out := new(PostgresVolumesSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RegistrationRequirementStatus) DeepCopyInto(out *RegistrationRequirementStatus) { *out = *in From 2c760d829c868067723d0db808f65a9aa5a1dfba Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Wed, 12 Mar 2025 14:29:54 -0500 Subject: [PATCH 02/79] Add util func for adding collector logic (#4128) Check feature gates and check spec In our original execution, we had a mix of logic to enable OTel: some logic required just the feature gate, some logic required the feature gates AND the instrumentation spec. This PR regularizes the logic: every check require both gates and spec to indicate the user wants instrumentation; specific checks for logs/metrics within larger checks can be left as is. Note: This PR also removes the instrumentation check from ExporterEnabled. We may want to re-add logic like that and be clear about which takes precedence. --- internal/collector/instance.go | 4 +- internal/collector/naming.go | 1 + internal/collector/patroni.go | 5 +- internal/collector/patroni_test.go | 9 ++- internal/collector/pgadmin.go | 5 +- internal/collector/pgadmin_test.go | 19 ++++--- internal/collector/pgbackrest.go | 3 +- internal/collector/pgbackrest_test.go | 7 ++- internal/collector/pgbouncer.go | 11 ++-- internal/collector/pgbouncer_test.go | 10 +++- internal/collector/postgres.go | 3 +- internal/collector/postgres_metrics.go | 10 ++-- internal/collector/postgres_test.go | 6 ++ internal/collector/util.go | 56 +++++++++++++++++++ .../controller/postgrescluster/cluster.go | 4 +- .../postgrescluster/cluster_test.go | 5 ++ .../controller/postgrescluster/instance.go | 6 +- .../controller/postgrescluster/pgbackrest.go | 2 +- .../controller/postgrescluster/pgbouncer.go | 7 +-- .../controller/postgrescluster/pgmonitor.go | 17 +++--- .../standalone_pgadmin/configmap.go | 3 +- .../controller/standalone_pgadmin/pod_test.go | 11 ++-- .../standalone_pgadmin/statefulset.go | 3 +- internal/pgbackrest/config.go | 8 +-- internal/pgbouncer/config.go | 6 +- internal/pgbouncer/reconcile.go | 2 +- internal/pgmonitor/postgres.go | 11 ++-- internal/pgmonitor/util.go | 4 -- internal/pgmonitor/util_test.go | 9 --- 29 files changed, 157 insertions(+), 90 deletions(-) create mode 100644 internal/collector/util.go diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 9c83f11f3a..54081b2684 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -50,9 +50,7 @@ func AddToPod( includeLogrotate bool, thisPodServesMetrics bool, ) { - if spec == nil || - !(feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + if !OpenTelemetryLogsOrMetricsEnabled(ctx, spec) { return } diff --git a/internal/collector/naming.go b/internal/collector/naming.go index c8db6d6f21..801d61e8ce 100644 --- a/internal/collector/naming.go +++ b/internal/collector/naming.go @@ -15,6 +15,7 @@ const PGBouncerMetrics = "metrics/pgbouncer" const PostgresMetrics = "metrics/postgres" const PatroniMetrics = "metrics/patroni" const ResourceDetectionProcessor = "resourcedetection" +const MonitoringUser = "ccp_monitoring" const SqlQuery = "sqlquery" diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index 532d103db7..6b22df6a09 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -9,7 +9,6 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -23,7 +22,7 @@ func EnablePatroniLogging(ctx context.Context, spec = inCluster.Spec.Instrumentation.Logs } - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := naming.PatroniPGDataLogPath // Keep track of what log records and files have been processed. @@ -134,7 +133,7 @@ func EnablePatroniMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, outConfig *Config, ) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if OpenTelemetryMetricsEnabled(ctx, inCluster) { // Add Prometheus exporter outConfig.Exporters[Prometheus] = map[string]any{ "endpoint": "0.0.0.0:" + strconv.Itoa(PrometheusPort), diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index e2d3a84e58..2f73374109 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -11,6 +11,7 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -23,8 +24,14 @@ func TestEnablePatroniLogging(t *testing.T) { ctx := feature.NewContext(context.Background(), gate) config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) - EnablePatroniLogging(ctx, new(v1beta1.PostgresCluster), config) + EnablePatroniLogging(ctx, cluster, config) result, err := config.ToYAML() assert.NilError(t, err) diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go index e22ed621f0..1f82115703 100644 --- a/internal/collector/pgadmin.go +++ b/internal/collector/pgadmin.go @@ -10,7 +10,6 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -18,9 +17,10 @@ import ( func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec, configmap *corev1.ConfigMap, ) error { - if !feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if !OpenTelemetryLogsEnabled(ctx, spec) { return nil } + otelConfig := NewConfig(spec) otelConfig.Extensions["file_storage/pgadmin_data_logs"] = map[string]any{ @@ -125,5 +125,6 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec if err == nil { configmap.Data["collector.yaml"] = otelYAML } + return err } diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index c4d5acfab6..e5db11f587 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -12,7 +12,6 @@ import ( corev1 "k8s.io/api/core/v1" "github.com/crunchydata/postgres-operator/internal/collector" - pgadmin "github.com/crunchydata/postgres-operator/internal/controller/standalone_pgadmin" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/testing/cmp" @@ -31,7 +30,11 @@ func TestEnablePgAdminLogging(t *testing.T) { configmap := new(corev1.ConfigMap) initialize.Map(&configmap.Data) - err := collector.EnablePgAdminLogging(ctx, nil, configmap) + var instrumentation *v1beta1.InstrumentationSpec + require.UnmarshalInto(t, &instrumentation, `{ + logs: { retentionPeriod: 12h }, + }`) + err := collector.EnablePgAdminLogging(ctx, instrumentation, configmap) assert.NilError(t, err) assert.Assert(t, cmp.MarshalMatches(configmap.Data, ` @@ -44,7 +47,7 @@ collector.yaml: | extensions: file_storage/pgadmin_data_logs: create_directory: false - directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + directory: /var/lib/pgadmin/logs/receiver fsync: true processors: batch/1s: @@ -90,11 +93,11 @@ collector.yaml: | receivers: filelog/gunicorn: include: - - `+pgadmin.GunicornLogFileAbsolutePath+` + - /var/lib/pgadmin/logs/gunicorn.log storage: file_storage/pgadmin_data_logs filelog/pgadmin: include: - - `+pgadmin.LogFileAbsolutePath+` + - /var/lib/pgadmin/logs/pgadmin.log storage: file_storage/pgadmin_data_logs service: extensions: @@ -165,7 +168,7 @@ collector.yaml: | extensions: file_storage/pgadmin_data_logs: create_directory: false - directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + directory: /var/lib/pgadmin/logs/receiver fsync: true processors: batch/1s: @@ -211,11 +214,11 @@ collector.yaml: | receivers: filelog/gunicorn: include: - - `+pgadmin.GunicornLogFileAbsolutePath+` + - /var/lib/pgadmin/logs/gunicorn.log storage: file_storage/pgadmin_data_logs filelog/pgadmin: include: - - `+pgadmin.LogFileAbsolutePath+` + - /var/lib/pgadmin/logs/pgadmin.log storage: file_storage/pgadmin_data_logs service: extensions: diff --git a/internal/collector/pgbackrest.go b/internal/collector/pgbackrest.go index 569748ed9c..009ec0c825 100644 --- a/internal/collector/pgbackrest.go +++ b/internal/collector/pgbackrest.go @@ -11,7 +11,6 @@ import ( "fmt" "slices" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -29,7 +28,7 @@ func NewConfigForPgBackrestRepoHostPod( ) *Config { config := NewConfig(spec) - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, spec) { var directory string for _, repo := range repos { diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index f1ebf14e4f..e8a5a4d2dd 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -11,6 +11,7 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -27,8 +28,12 @@ func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { Volume: new(v1beta1.RepoPVC), }, } + var instrumentation *v1beta1.InstrumentationSpec + require.UnmarshalInto(t, &instrumentation, `{ + logs: { retentionPeriod: 12h }, + }`) - config := NewConfigForPgBackrestRepoHostPod(ctx, nil, repos) + config := NewConfigForPgBackrestRepoHostPod(ctx, instrumentation, repos) result, err := config.ToYAML() assert.NilError(t, err) diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 9133bd6813..375d2b9bab 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -12,7 +12,6 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -40,7 +39,7 @@ func NewConfigForPgBouncerPod( config := NewConfig(cluster.Spec.Instrumentation) EnablePgBouncerLogging(ctx, cluster, config) - EnablePgBouncerMetrics(ctx, config, sqlQueryUsername) + EnablePgBouncerMetrics(ctx, cluster, config, sqlQueryUsername) return config } @@ -56,7 +55,7 @@ func EnablePgBouncerLogging(ctx context.Context, spec = inCluster.Spec.Instrumentation.Logs } - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := naming.PGBouncerLogPath // Keep track of what log records and files have been processed. @@ -171,8 +170,10 @@ func EnablePgBouncerLogging(ctx context.Context, // EnablePgBouncerMetrics adds necessary configuration to the collector config to scrape // metrics from pgBouncer when the OpenTelemetryMetrics feature flag is enabled. -func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsername string) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { +func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, + config *Config, sqlQueryUsername string) { + + if OpenTelemetryMetricsEnabled(ctx, inCluster) { // Add Prometheus exporter config.Exporters[Prometheus] = map[string]any{ "endpoint": "0.0.0.0:" + strconv.Itoa(PrometheusPort), diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index df8427fbbd..74aed710da 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -11,6 +11,7 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -23,8 +24,13 @@ func TestEnablePgBouncerLogging(t *testing.T) { ctx := feature.NewContext(context.Background(), gate) config := NewConfig(nil) - - EnablePgBouncerLogging(ctx, new(v1beta1.PostgresCluster), config) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) + EnablePgBouncerLogging(ctx, cluster, config) result, err := config.ToYAML() assert.NilError(t, err) diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index cfc0b88245..5d419f85ea 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -15,7 +15,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -86,7 +85,7 @@ func EnablePostgresLogging( spec = inCluster.Spec.Instrumentation.Logs } - if inCluster != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := postgres.LogDirectory() version := inCluster.Spec.PostgresVersion diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index b6bd39cd87..4530c431a3 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -12,9 +12,7 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/pgmonitor" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -59,7 +57,7 @@ type metric struct { } func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, config *Config) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if OpenTelemetryMetricsEnabled(ctx, inCluster) { log := logging.FromContext(ctx) var err error @@ -131,7 +129,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "driver": "postgres", "datasource": fmt.Sprintf( `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), + MonitoringUser), "collection_interval": "5s", // Give Postgres time to finish setup. "initial_delay": "10s", @@ -142,7 +140,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "driver": "postgres", "datasource": fmt.Sprintf( `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), + MonitoringUser), "collection_interval": "300s", // Give Postgres time to finish setup. "initial_delay": "10s", @@ -172,7 +170,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "driver": "postgres", "datasource": fmt.Sprintf( `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), + MonitoringUser), "collection_interval": querySet.CollectionInterval, // Give Postgres time to finish setup. "initial_delay": "10s", diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index a6736d66cc..3bdf33c61a 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -12,6 +12,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -25,6 +26,11 @@ func TestEnablePostgresLogging(t *testing.T) { cluster := new(v1beta1.PostgresCluster) cluster.Spec.PostgresVersion = 99 + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) config := NewConfig(nil) params := postgres.NewParameterSet() diff --git a/internal/collector/util.go b/internal/collector/util.go new file mode 100644 index 0000000000..72cf8641ef --- /dev/null +++ b/internal/collector/util.go @@ -0,0 +1,56 @@ +// Copyright 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +type CrunchyCRD interface { + *v1beta1.PostgresCluster | *v1beta1.PGAdmin | *v1beta1.InstrumentationSpec +} + +func OpenTelemetrySpecPresent[T CrunchyCRD](object T) bool { + + switch v := any(object).(type) { + case *v1beta1.InstrumentationSpec: + return v != nil + case *v1beta1.PostgresCluster: + return v.Spec.Instrumentation != nil + case *v1beta1.PGAdmin: + return v.Spec.Instrumentation != nil + default: + return false + } + +} + +func OpenTelemetryLogsOrMetricsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) +} + +func OpenTelemetryLogsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + feature.Enabled(ctx, feature.OpenTelemetryLogs) +} + +func OpenTelemetryMetricsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + feature.Enabled(ctx, feature.OpenTelemetryMetrics) +} diff --git a/internal/controller/postgrescluster/cluster.go b/internal/controller/postgrescluster/cluster.go index ead4881b1e..2ceb30453a 100644 --- a/internal/controller/postgrescluster/cluster.go +++ b/internal/controller/postgrescluster/cluster.go @@ -15,7 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/patroni" @@ -75,7 +75,7 @@ func (r *Reconciler) patroniLogSize(ctx context.Context, cluster *v1beta1.Postgr sizeInBytes = 25000000 } return sizeInBytes - } else if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + } else if collector.OpenTelemetryLogsEnabled(ctx, cluster) { return 25000000 } return 0 diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index 6882cfa27b..a38a128086 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -870,6 +870,11 @@ func TestPatroniLogSize(t *testing.T) { reconciler := &Reconciler{Recorder: recorder} cluster.Spec.Patroni = nil + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) size := reconciler.patroniLogSize(ctx, &cluster) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 4ed6e79f24..85f23d960b 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1202,7 +1202,7 @@ func (r *Reconciler) reconcileInstance( // If either OpenTelemetry feature is enabled, we want to add the collector config to the pod if err == nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { // If the OpenTelemetryMetrics feature is enabled, we need to get the pgpassword from the // monitoring user secret @@ -1428,8 +1428,8 @@ func (r *Reconciler) reconcileInstanceConfigMap( // If OTel logging or metrics is enabled, add collector config if err == nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { + err = collector.AddToConfigMap(ctx, otelConfig, instanceConfigMap) // Add pgbackrest logrotate if OpenTelemetryLogs is enabled and diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index 41d1b942a1..b7de247a5d 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -694,7 +694,7 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster // If OpenTelemetryLogs is enabled, we want to add the collector to the pod // and also add the RepoVolumes to the container. - if postgresCluster.Spec.Instrumentation != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, &repo.Spec.Template, []corev1.VolumeMount{}, "", diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 660572005a..671b284299 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -19,7 +19,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/crunchydata/postgres-operator/internal/collector" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" @@ -99,13 +98,11 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( pgbouncer.ConfigMap(ctx, cluster, configmap) } // If OTel logging or metrics is enabled, add collector config - if otelConfig != nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { err = collector.AddToConfigMap(ctx, otelConfig, configmap) } // If OTel logging is enabled, add logrotate config - if err == nil && otelConfig != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if err == nil && collector.OpenTelemetryLogsEnabled(ctx, cluster) { logrotateConfig := collector.LogrotateConfig{ LogFiles: []string{naming.PGBouncerFullLogPath}, PostrotateScript: collector.PGBouncerPostRotateScript, diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index 84b955559a..48d15d1e6d 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -16,6 +16,7 @@ import ( corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" @@ -62,7 +63,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // the `EnableExporterInPostgreSQL` funcs; that way we are always running // that function against an updated and running pod. - if pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), cluster.Spec.PostgresVersion)) if err != nil { return err @@ -99,7 +100,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return pgmonitor.EnableExporterInPostgreSQL(ctx, exec, monitoringSecret, pgmonitor.ExporterDB, setup) } - if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) && !collector.OpenTelemetryMetricsEnabled(ctx, cluster) { action = func(ctx context.Context, exec postgres.Executor) error { return pgmonitor.DisableMonitoringUserInPostgres(ctx, exec) } @@ -161,7 +162,7 @@ func (r *Reconciler) reconcileMonitoringSecret( // is enabled to determine when monitoring secret should be created, // since our implementation of the SqlQuery receiver in the OTel Collector // uses the monitoring user as well. - if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) && !collector.OpenTelemetryMetricsEnabled(ctx, cluster) { if err == nil { err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) } @@ -234,7 +235,7 @@ func addPGMonitorExporterToInstancePodSpec( template *corev1.PodTemplateSpec, exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap) { - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { return } @@ -374,7 +375,7 @@ func addPGMonitorExporterToInstancePodSpec( func (r *Reconciler) reconcileExporterWebConfig(ctx context.Context, cluster *v1beta1.PostgresCluster) (*corev1.ConfigMap, error) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { return nil, nil } @@ -384,7 +385,9 @@ func (r *Reconciler) reconcileExporterWebConfig(ctx context.Context, return nil, err } - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) || cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { + if !pgmonitor.ExporterEnabled(ctx, cluster) || + collector.OpenTelemetryMetricsEnabled(ctx, cluster) || + cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { @@ -441,7 +444,7 @@ func (r *Reconciler) reconcileExporterQueriesConfig(ctx context.Context, return nil, err } - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { diff --git a/internal/controller/standalone_pgadmin/configmap.go b/internal/controller/standalone_pgadmin/configmap.go index 72a95b14db..5078e0e9fa 100644 --- a/internal/controller/standalone_pgadmin/configmap.go +++ b/internal/controller/standalone_pgadmin/configmap.go @@ -19,7 +19,6 @@ import ( "github.com/pkg/errors" "github.com/crunchydata/postgres-operator/internal/collector" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -73,7 +72,7 @@ func configmap(ctx context.Context, pgadmin *v1beta1.PGAdmin, gunicornRetentionPeriod = "D" ) // If OTel logs feature gate is enabled, we want to change the pgAdmin/gunicorn logging - if feature.Enabled(ctx, feature.OpenTelemetryLogs) && pgadmin.Spec.Instrumentation != nil { + if collector.OpenTelemetryLogsEnabled(ctx, pgadmin) { logRetention = true // If the user has set a retention period, we will use those values for log rotation, diff --git a/internal/controller/standalone_pgadmin/pod_test.go b/internal/controller/standalone_pgadmin/pod_test.go index b414a7bab0..bc8a32da49 100644 --- a/internal/controller/standalone_pgadmin/pod_test.go +++ b/internal/controller/standalone_pgadmin/pod_test.go @@ -16,6 +16,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -211,13 +212,9 @@ volumes: pgadmin.Spec.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("100m"), } - retentionPeriod, err := v1beta1.NewDuration("12 hours") - assert.NilError(t, err) - pgadmin.Spec.Instrumentation = &v1beta1.InstrumentationSpec{ - Logs: &v1beta1.InstrumentationLogsSpec{ - RetentionPeriod: retentionPeriod, - }, - } + require.UnmarshalInto(t, &pgadmin.Spec.Instrumentation, `{ + logs: { retentionPeriod: 12h }, + }`) call() diff --git a/internal/controller/standalone_pgadmin/statefulset.go b/internal/controller/standalone_pgadmin/statefulset.go index 6e606b0867..6783780eae 100644 --- a/internal/controller/standalone_pgadmin/statefulset.go +++ b/internal/controller/standalone_pgadmin/statefulset.go @@ -17,7 +17,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/controller/postgrescluster" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -122,7 +121,7 @@ func statefulset( pod(pgadmin, configmap, &sts.Spec.Template.Spec, dataVolume) - if pgadmin.Spec.Instrumentation != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if collector.OpenTelemetryLogsEnabled(ctx, pgadmin) { // Logs for gunicorn and pgadmin write to /var/lib/pgadmin/logs // so the collector needs access to that that path. dataVolumeMount := corev1.VolumeMount{ diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index c14a264ce3..498be32d3b 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -17,7 +17,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" @@ -131,8 +130,8 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet ).String() if RepoHostVolumeDefined(postgresCluster) && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + collector.OpenTelemetryLogsOrMetricsEnabled(ctx, postgresCluster) { + err = collector.AddToConfigMap(ctx, collector.NewConfigForPgBackrestRepoHostPod( ctx, postgresCluster.Spec.Instrumentation, @@ -141,8 +140,7 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet // If OTel logging is enabled, add logrotate config for the RepoHost if err == nil && - postgresCluster.Spec.Instrumentation != nil && - feature.Enabled(ctx, feature.OpenTelemetryLogs) { + collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { var pgBackRestLogPath string for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { if repo.Volume != nil { diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index 257dc63dbd..99bcac0399 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -12,7 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -127,13 +127,13 @@ func clusterINI(ctx context.Context, cluster *v1beta1.PostgresCluster) string { } // If OpenTelemetryLogs feature is enabled, enable logging to file - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if collector.OpenTelemetryLogsEnabled(ctx, cluster) { global["logfile"] = naming.PGBouncerLogPath + "/pgbouncer.log" } // When OTel metrics are enabled, allow pgBouncer's postgres user // to run read-only console queries on pgBouncer's virtual db - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { global["stats_users"] = PostgresqlUser } diff --git a/internal/pgbouncer/reconcile.go b/internal/pgbouncer/reconcile.go index b663596ed7..8eed54a3b6 100644 --- a/internal/pgbouncer/reconcile.go +++ b/internal/pgbouncer/reconcile.go @@ -207,7 +207,7 @@ func Pod( template.Spec.Volumes = []corev1.Volume{configVolume} - if feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, inCluster) { collector.AddToPod(ctx, inCluster.Spec.Instrumentation, inCluster.Spec.ImagePullPolicy, inConfigMap, template, []corev1.VolumeMount{configVolumeMount}, string(inSecret.Data["pgbouncer-password"]), []string{naming.PGBouncerLogPath}, true, true) diff --git a/internal/pgmonitor/postgres.go b/internal/pgmonitor/postgres.go index 1d7817c9a3..3ef83cd2e0 100644 --- a/internal/pgmonitor/postgres.go +++ b/internal/pgmonitor/postgres.go @@ -10,7 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -24,7 +24,8 @@ const ( // PostgreSQLHBAs provides the Postgres HBA rules for allowing the monitoring // exporter to be accessible func PostgreSQLHBAs(ctx context.Context, inCluster *v1beta1.PostgresCluster, outHBAs *postgres.HBAs) { - if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if ExporterEnabled(ctx, inCluster) || + collector.OpenTelemetryMetricsEnabled(ctx, inCluster) { // Limit the monitoring user to local connections using SCRAM. outHBAs.Mandatory = append(outHBAs.Mandatory, postgres.NewHBA().TCP().Users(MonitoringUser).Method("scram-sha-256").Network("127.0.0.0/8"), @@ -34,9 +35,11 @@ func PostgreSQLHBAs(ctx context.Context, inCluster *v1beta1.PostgresCluster, out } // PostgreSQLParameters provides additional required configuration parameters -// that Postgres needs to support monitoring +// that Postgres needs to support monitoring for both pgMonitor and OTel func PostgreSQLParameters(ctx context.Context, inCluster *v1beta1.PostgresCluster, outParameters *postgres.Parameters) { - if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if ExporterEnabled(ctx, inCluster) || + collector.OpenTelemetryMetricsEnabled(ctx, inCluster) { + // Exporter expects that shared_preload_libraries are installed // pg_stat_statements: https://access.crunchydata.com/documentation/pgmonitor/latest/exporter/ // pgnodemx: https://github.com/CrunchyData/pgnodemx diff --git a/internal/pgmonitor/util.go b/internal/pgmonitor/util.go index 32cf222448..72f528ffa3 100644 --- a/internal/pgmonitor/util.go +++ b/internal/pgmonitor/util.go @@ -8,7 +8,6 @@ import ( "context" "os" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -37,8 +36,5 @@ func ExporterEnabled(ctx context.Context, cluster *v1beta1.PostgresCluster) bool if cluster.Spec.Monitoring.PGMonitor.Exporter == nil { return false } - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { - return false - } return true } diff --git a/internal/pgmonitor/util_test.go b/internal/pgmonitor/util_test.go index e83bbb3730..a7758d0da4 100644 --- a/internal/pgmonitor/util_test.go +++ b/internal/pgmonitor/util_test.go @@ -10,7 +10,6 @@ import ( "gotest.tools/v3/assert" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -27,12 +26,4 @@ func TestExporterEnabled(t *testing.T) { cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} assert.Assert(t, ExporterEnabled(ctx, cluster)) - - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.OpenTelemetryMetrics: true, - })) - ctx = feature.NewContext(ctx, gate) - cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} - assert.Assert(t, !ExporterEnabled(ctx, cluster)) } From 02402ed5fa3e544658b0a8fc493dafe98a43003c Mon Sep 17 00:00:00 2001 From: Philip Hurst Date: Wed, 12 Mar 2025 16:46:20 -0400 Subject: [PATCH 03/79] Pgadmin oauth secrets (#4123) * preliminary work on OAUTH2 configuration Secrets with pgAdmin * update description * update comment * add logic to configSystem script * check OAuth Secrets and ConfigMap for changes and schedule rollout when needed * update test for new Python logic handling OAuth Secret JSON files * update test * updated typo in code comments * updated comments for clarification * rebase * updated description in CRD * Change oauth2 to mount rather than load secrets * FIXUP: dots-only filenames --------- Co-authored-by: Chris Bandy --- ...res-operator.crunchydata.com_pgadmins.yaml | 51 ++++++++++++++++++- internal/controller/standalone_pgadmin/pod.go | 40 +++++++++++++-- .../controller/standalone_pgadmin/pod_test.go | 42 ++++++++++++++- internal/shell/paths.go | 17 +++++++ internal/shell/paths_test.go | 30 +++++++++++ .../v1beta1/standalone_pgadmin_types.go | 39 +++++++++++++- .../v1beta1/zz_generated.deepcopy.go | 23 +++++++++ 7 files changed, 234 insertions(+), 8 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index 4871e399fd..d26b968d41 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -1320,7 +1320,7 @@ spec: type: array gunicorn: description: |- - Settings for the gunicorn server. + Settings for the Gunicorn server. More info: https://docs.gunicorn.org/en/latest/settings.html type: object x-kubernetes-preserve-unknown-fields: true @@ -1353,12 +1353,61 @@ spec: - name type: object x-kubernetes-map-type: atomic + oauthConfigurations: + description: |- + Secrets for the `OAUTH2_CONFIG` setting. If there are `OAUTH2_CONFIG` values + in the settings field, they will be combined with the values loaded here. + More info: https://www.pgadmin.org/docs/pgadmin4/latest/oauth2.html + items: + properties: + name: + description: The OAUTH2_NAME of this configuration. + maxLength: 20 + minLength: 1 + pattern: ^[A-Za-z0-9]+$ + type: string + secret: + description: A Secret containing the settings of one OAuth2 + provider as a JSON object. + properties: + key: + description: Name of the data field within the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ + type: string + x-kubernetes-validations: + - message: cannot be "." or start with ".." + rule: self != "." && !self.startsWith("..") + name: + description: Name of the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - key + - name + type: object + x-kubernetes-map-type: atomic + required: + - name + - secret + type: object + x-kubernetes-map-type: atomic + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map settings: description: |- Settings for the pgAdmin server process. Keys should be uppercase and values must be constants. More info: https://www.pgadmin.org/docs/pgadmin4/latest/config_py.html type: object + x-kubernetes-map-type: granular x-kubernetes-preserve-unknown-fields: true type: object dataVolumeClaimSpec: diff --git a/internal/controller/standalone_pgadmin/pod.go b/internal/controller/standalone_pgadmin/pod.go index ab6f8679f4..88f483c570 100644 --- a/internal/controller/standalone_pgadmin/pod.go +++ b/internal/controller/standalone_pgadmin/pod.go @@ -28,6 +28,8 @@ const ( configDatabaseURIPath = "~postgres-operator/config-database-uri" ldapFilePath = "~postgres-operator/ldap-bind-password" gunicornConfigFilePath = "~postgres-operator/" + gunicornConfigKey + oauthConfigDir = "~postgres-operator/oauth-config" + oauthAbsolutePath = configMountPath + "/" + oauthConfigDir // scriptMountPath is where to mount a temporary directory that is only // writable during Pod initialization. @@ -212,6 +214,17 @@ func podConfigFiles(configmap *corev1.ConfigMap, pgadmin v1beta1.PGAdmin) []core }, }...) + for i, oauth := range pgadmin.Spec.Config.OAuthConfigurations { + // Safely encode the OAUTH2_NAME in the file name. Prepend the index so + // the files can be loaded in the order they are defined in the spec. + mountPath := fmt.Sprintf( + "%s/%02d-%s.json", oauthConfigDir, i, shell.CleanFileName(oauth.Name), + ) + config = append(config, corev1.VolumeProjection{ + Secret: initialize.Pointer(oauth.Secret.AsProjection(mountPath)), + }) + } + if pgadmin.Spec.Config.ConfigDatabaseURI != nil { config = append(config, corev1.VolumeProjection{ Secret: initialize.Pointer( @@ -311,15 +324,17 @@ loadServerCommand // descriptor and uses the timeout of the builtin `read` to wait. That same // descriptor gets closed and reopened to use the builtin `[ -nt` to check mtimes. // - https://unix.stackexchange.com/a/407383 - // In order to get gunicorn to reload the logging config - // we need to send a KILL rather than a HUP signal. + // + // Gunicorn needs a SIGTERM rather than SIGHUP to reload its logging config. + // This also causes pgAdmin to restart when its configuration changes. // - https://github.com/benoitc/gunicorn/issues/3353 + // // Right now the config file is on the same configMap as the cluster file // so if the mtime changes for any of those files, it will change for all. var reloadScript = ` exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -375,12 +390,31 @@ with open('` + configMountPath + `/` + configFilePath + `') as _f: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) +if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] +for _f in reversed(glob.glob('` + oauthAbsolutePath + `/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('` + ldapPasswordAbsolutePath + `'): with open('` + ldapPasswordAbsolutePath + `') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('` + configDatabaseURIPathAbsolutePath + `'): with open('` + configDatabaseURIPathAbsolutePath + `') as _f: CONFIG_DATABASE_URI = _f.read() +del _conf, _data, _f ` // Gunicorn reads from the `/etc/pgadmin/gunicorn_config.py` file during startup diff --git a/internal/controller/standalone_pgadmin/pod_test.go b/internal/controller/standalone_pgadmin/pod_test.go index bc8a32da49..84f6e56cdc 100644 --- a/internal/controller/standalone_pgadmin/pod_test.go +++ b/internal/controller/standalone_pgadmin/pod_test.go @@ -75,7 +75,7 @@ containers: exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -149,12 +149,31 @@ initContainers: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] + for _f in reversed(glob.glob('/etc/pgadmin/conf.d/~postgres-operator/oauth-config/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password'): with open('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + del _conf, _data, _f - | import json, re, gunicorn gunicorn.SERVER_SOFTWARE = 'Python' @@ -257,7 +276,7 @@ containers: exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -335,12 +354,31 @@ initContainers: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] + for _f in reversed(glob.glob('/etc/pgadmin/conf.d/~postgres-operator/oauth-config/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password'): with open('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + del _conf, _data, _f - | import json, re, gunicorn gunicorn.SERVER_SOFTWARE = 'Python' diff --git a/internal/shell/paths.go b/internal/shell/paths.go index 3455ff8fe4..d1df635e68 100644 --- a/internal/shell/paths.go +++ b/internal/shell/paths.go @@ -14,6 +14,23 @@ import ( "strings" ) +// CleanFileName returns the suffix of path after its last slash U+002F. +// This is similar to "basename" except this returns empty string when: +// - The final character of path is slash U+002F, or +// - The result would be "." or ".." +// +// See: +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/basename.html +func CleanFileName(path string) string { + if i := strings.LastIndexByte(path, '/'); i >= 0 { + path = path[i+1:] + } + if path != "." && path != ".." { + return path + } + return "" +} + // MakeDirectories returns a list of POSIX shell commands that ensure each path // exists. It creates every directory leading to path from (but not including) // base and sets their permissions to exactly perms, regardless of umask. diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index 273f672b79..8af16a73c0 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -17,6 +17,36 @@ import ( "github.com/crunchydata/postgres-operator/internal/testing/require" ) +func TestCleanFileName(t *testing.T) { + t.Parallel() + + t.Run("Empty", func(t *testing.T) { + assert.Equal(t, CleanFileName(""), "") + }) + + t.Run("Dots", func(t *testing.T) { + assert.Equal(t, CleanFileName("."), "") + assert.Equal(t, CleanFileName(".."), "") + assert.Equal(t, CleanFileName("..."), "...") + assert.Equal(t, CleanFileName("././/.././../."), "") + assert.Equal(t, CleanFileName("././/.././../.."), "") + assert.Equal(t, CleanFileName("././/.././../../x.j"), "x.j") + }) + + t.Run("Directories", func(t *testing.T) { + assert.Equal(t, CleanFileName("/"), "") + assert.Equal(t, CleanFileName("//"), "") + assert.Equal(t, CleanFileName("asdf/"), "") + assert.Equal(t, CleanFileName("asdf//12.3"), "12.3") + assert.Equal(t, CleanFileName("//////"), "") + assert.Equal(t, CleanFileName("//////gg"), "gg") + }) + + t.Run("NoSeparators", func(t *testing.T) { + assert.Equal(t, CleanFileName("asdf12.3.ssgg"), "asdf12.3.ssgg") + }) +} + func TestMakeDirectories(t *testing.T) { t.Parallel() diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go index 9042245b2f..534d792c4f 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go @@ -21,7 +21,7 @@ type StandalonePGAdminConfiguration struct { // +optional ConfigDatabaseURI *OptionalSecretKeyRef `json:"configDatabaseURI,omitempty"` - // Settings for the gunicorn server. + // Settings for the Gunicorn server. // More info: https://docs.gunicorn.org/en/latest/settings.html // +optional // +kubebuilder:pruning:PreserveUnknownFields @@ -37,11 +37,46 @@ type StandalonePGAdminConfiguration struct { // Settings for the pgAdmin server process. Keys should be uppercase and // values must be constants. // More info: https://www.pgadmin.org/docs/pgadmin4/latest/config_py.html - // +optional + // --- // +kubebuilder:pruning:PreserveUnknownFields // +kubebuilder:validation:Schemaless // +kubebuilder:validation:Type=object + // + // +mapType=granular + // +optional Settings SchemalessObject `json:"settings,omitempty"` + + // Secrets for the `OAUTH2_CONFIG` setting. If there are `OAUTH2_CONFIG` values + // in the settings field, they will be combined with the values loaded here. + // More info: https://www.pgadmin.org/docs/pgadmin4/latest/oauth2.html + // --- + // The controller expects this number to be no more than two digits. + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=10 + // + // +listType=map + // +listMapKey=name + // +optional + OAuthConfigurations []PGAdminOAuthConfig `json:"oauthConfigurations,omitempty"` +} + +// +structType=atomic +type PGAdminOAuthConfig struct { + // The OAUTH2_NAME of this configuration. + // --- + // This goes into a filename, so let's keep it short and simple. + // The Secret is allowed to contain OAUTH2_NAME and deviate from this. + // +kubebuilder:validation:Pattern=`^[A-Za-z0-9]+$` + // + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=20 + // +required + Name string `json:"name"` + + // A Secret containing the settings of one OAuth2 provider as a JSON object. + // --- + // +required + Secret SecretKeyRef `json:"secret"` } // PGAdminSpec defines the desired state of PGAdmin diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 233534d39f..58281cb921 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -846,6 +846,22 @@ func (in *PGAdminList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PGAdminOAuthConfig) DeepCopyInto(out *PGAdminOAuthConfig) { + *out = *in + in.Secret.DeepCopyInto(&out.Secret) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PGAdminOAuthConfig. +func (in *PGAdminOAuthConfig) DeepCopy() *PGAdminOAuthConfig { + if in == nil { + return nil + } + out := new(PGAdminOAuthConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGAdminPodSpec) DeepCopyInto(out *PGAdminPodSpec) { *out = *in @@ -2721,6 +2737,13 @@ func (in *StandalonePGAdminConfiguration) DeepCopyInto(out *StandalonePGAdminCon (*in).DeepCopyInto(*out) } out.Settings = in.Settings.DeepCopy() + if in.OAuthConfigurations != nil { + in, out := &in.OAuthConfigurations, &out.OAuthConfigurations + *out = make([]PGAdminOAuthConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StandalonePGAdminConfiguration. From 89da2a2ed92a7e2cab550bf99a88785904148daa Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 17 Mar 2025 15:39:34 -0700 Subject: [PATCH 04/79] Tell collector to watch the .log.1 files to avoid missing records around rotation time. Add comments around each components filelog receiver settings. --- internal/collector/patroni.go | 9 ++++++++- internal/collector/patroni_test.go | 2 ++ internal/collector/pgadmin.go | 5 +++++ internal/collector/pgbackrest.go | 7 ++++++- internal/collector/pgbackrest_test.go | 2 ++ internal/collector/postgres.go | 13 ++++++++++++- internal/collector/postgres_test.go | 2 ++ 7 files changed, 37 insertions(+), 3 deletions(-) diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index 6b22df6a09..aa6a7a85e3 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -39,7 +39,14 @@ func EnablePatroniLogging(ctx context.Context, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/patroni_jsonlog"] = map[string]any{ // Read the JSON files and keep track of what has been processed. - "include": []string{directory + "/*.log"}, + // When patroni rotates its log files, it renames the old .log file + // to .log.1. We want the collector to ingest logs from both files + // as it is possible that patroni will continue to write a log + // record or two to the old file while rotation is occurring. The + // collector knows not to create duplicate logs. + "include": []string{ + directory + "/*.log", directory + "/*.log.1", + }, "storage": "file_storage/patroni_logs", "operators": []map[string]any{ diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index 2f73374109..01f28d1b36 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -88,6 +88,7 @@ receivers: filelog/patroni_jsonlog: include: - /pgdata/patroni/log/*.log + - /pgdata/patroni/log/*.log.1 operators: - from: body to: body.original @@ -183,6 +184,7 @@ receivers: filelog/patroni_jsonlog: include: - /pgdata/patroni/log/*.log + - /pgdata/patroni/log/*.log.1 operators: - from: body to: body.original diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go index 1f82115703..85fb43408e 100644 --- a/internal/collector/pgadmin.go +++ b/internal/collector/pgadmin.go @@ -29,6 +29,11 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec "fsync": true, } + // PgAdmin/gunicorn logs are rotated by python -- python tries to emit a log + // and if the file needs to rotate, it rotates first and then emits the log. + // The collector therefore only needs to watch the single active log for + // each component. + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme otelConfig.Receivers["filelog/pgadmin"] = map[string]any{ "include": []string{"/var/lib/pgadmin/logs/pgadmin.log"}, "storage": "file_storage/pgadmin_data_logs", diff --git a/internal/collector/pgbackrest.go b/internal/collector/pgbackrest.go index 009ec0c825..4fa6f5c1fc 100644 --- a/internal/collector/pgbackrest.go +++ b/internal/collector/pgbackrest.go @@ -55,8 +55,13 @@ func NewConfigForPgBackrestRepoHostPod( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme config.Receivers["filelog/pgbackrest_log"] = map[string]any{ // Read the files and keep track of what has been processed. + // We use logrotate to rotate the pgbackrest logs which renames the + // old .log file to .log.1. We want the collector to ingest logs from + // both files as it is possible that pgbackrest will continue to write + // a log record or two to the old file while rotation is occurring. + // The collector knows not to create duplicate logs. "include": []string{ - directory + "/*.log", + directory + "/*.log", directory + "/*.log.1", }, "storage": "file_storage/pgbackrest_logs", // pgBackRest prints logs with a log prefix, which includes a timestamp diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index e8a5a4d2dd..347599692f 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -95,6 +95,7 @@ receivers: filelog/pgbackrest_log: include: - /pgbackrest/repo1/log/*.log + - /pgbackrest/repo1/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs @@ -195,6 +196,7 @@ receivers: filelog/pgbackrest_log: include: - /pgbackrest/repo1/log/*.log + - /pgbackrest/repo1/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index 5d419f85ea..c98ba4e98b 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -142,6 +142,7 @@ func EnablePostgresLogging( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/postgres_csvlog"] = map[string]any{ // Read the CSV files and keep track of what has been processed. + // The wildcard covers all potential log file names. "include": []string{directory + "/*.csv"}, "storage": "file_storage/postgres_logs", @@ -173,6 +174,7 @@ func EnablePostgresLogging( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/postgres_jsonlog"] = map[string]any{ // Read the JSON files and keep track of what has been processed. + // The wildcard covers all potential log file names. "include": []string{directory + "/*.json"}, "storage": "file_storage/postgres_logs", @@ -238,8 +240,17 @@ func EnablePostgresLogging( "fsync": true, } + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/pgbackrest_log"] = map[string]any{ - "include": []string{naming.PGBackRestPGDataLogPath + "/*.log"}, + // We use logrotate to rotate the pgbackrest logs which renames the + // old .log file to .log.1. We want the collector to ingest logs from + // both files as it is possible that pgbackrest will continue to write + // a log record or two to the old file while rotation is occurring. + // The collector knows not to create duplicate logs. + "include": []string{ + naming.PGBackRestPGDataLogPath + "/*.log", + naming.PGBackRestPGDataLogPath + "/*.log.1", + }, "storage": "file_storage/pgbackrest_logs", // pgBackRest prints logs with a log prefix, which includes a timestamp diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 3bdf33c61a..d9bb161b9d 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -197,6 +197,7 @@ receivers: filelog/pgbackrest_log: include: - /pgdata/pgbackrest/log/*.log + - /pgdata/pgbackrest/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs @@ -438,6 +439,7 @@ receivers: filelog/pgbackrest_log: include: - /pgdata/pgbackrest/log/*.log + - /pgdata/pgbackrest/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs From b38a3cabc8036d139dbca105832a1104664e5099 Mon Sep 17 00:00:00 2001 From: Tony Landreth <56887169+tony-landreth@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:30:44 -0400 Subject: [PATCH 05/79] Bumps golang.org/x/net to v0.36.0 (#695) --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0db97ac83d..f2a0d0c756 100644 --- a/go.mod +++ b/go.mod @@ -104,7 +104,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.33.0 // indirect + golang.org/x/net v0.36.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect golang.org/x/sync v0.11.0 // indirect golang.org/x/sys v0.30.0 // indirect diff --git a/go.sum b/go.sum index 0fa2adc5a3..f0a720aad2 100644 --- a/go.sum +++ b/go.sum @@ -224,6 +224,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA= +golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From a869e964cc4ff4223ef9798d37f1b5248b381d7a Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 21 Mar 2025 11:33:27 -0500 Subject: [PATCH 06/79] Run go mod tidy in 5.8 See: b38a3cabc8036d139dbca105832a1104664e5099 --- go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.sum b/go.sum index f0a720aad2..2822ed5e1e 100644 --- a/go.sum +++ b/go.sum @@ -222,8 +222,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA= golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= From 8db626040d37f8a0257a00e70801ae2bed1a477e Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Wed, 19 Mar 2025 11:25:59 -0500 Subject: [PATCH 07/79] Prefer OTEL to exporter if both enabled (#4137) If both Exporter and OTel Metrics are enabled, CPK prefers OTel. See: 46ee186790703381717f46d2c1915f14225edd7a --- internal/pgmonitor/util.go | 6 ++++++ internal/pgmonitor/util_test.go | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/internal/pgmonitor/util.go b/internal/pgmonitor/util.go index 72f528ffa3..76a8a6adae 100644 --- a/internal/pgmonitor/util.go +++ b/internal/pgmonitor/util.go @@ -8,6 +8,7 @@ import ( "context" "os" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -27,6 +28,11 @@ func GetQueriesConfigDir(ctx context.Context) string { // ExporterEnabled returns true if the monitoring exporter is enabled func ExporterEnabled(ctx context.Context, cluster *v1beta1.PostgresCluster) bool { + // If OpenTelemetry metrics are enabled for this cluster, that takes precedence + // over the postgres_exporter metrics. + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { + return false + } if cluster.Spec.Monitoring == nil { return false } diff --git a/internal/pgmonitor/util_test.go b/internal/pgmonitor/util_test.go index a7758d0da4..e862e87a67 100644 --- a/internal/pgmonitor/util_test.go +++ b/internal/pgmonitor/util_test.go @@ -10,6 +10,8 @@ import ( "gotest.tools/v3/assert" + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -26,4 +28,19 @@ func TestExporterEnabled(t *testing.T) { cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} assert.Assert(t, ExporterEnabled(ctx, cluster)) + + // Enabling the OpenTelemetryMetrics is not sufficient to disable the exporter + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx = feature.NewContext(ctx, gate) + assert.Assert(t, ExporterEnabled(ctx, cluster)) + + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) + assert.Assert(t, !ExporterEnabled(ctx, cluster)) } From 7eab4a9192a25e35c298ac57b396f410c419bc50 Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Thu, 20 Mar 2025 12:41:50 -0500 Subject: [PATCH 08/79] Modify context path of OTEL transformers (#4134) * Modify context path of OTEL transformers As of 0.119.0, the parser collector modifies certain paths of transforms. While this is not an error, the resulting log can look a little alarming, so we rewrote some of our transforms to match. * Handle pgBouncer 1.24.0 During testing, some pgbouncer metrics were seen to cause errors when accessing PG through pgbouncer; in addition, we found that 1.24.0 introduced a change to the `show databases` columns. This PR addresses those errors and that change. NOTE: Even when not using NULL values, sqlqueryreceiver will still warn when it finds a NULL value in some row that it scans. Issue: PGO-2268 See: 7a006019b63199fbd88161c0dce67a13669556ff --- .../generated/pgbackrest_logs_transforms.json | 2 +- .../generated/pgbouncer_metrics_queries.json | 2 +- .../generated/postgres_logs_transforms.json | 2 +- internal/collector/patroni.go | 21 +- internal/collector/patroni_test.go | 56 +-- internal/collector/pgadmin.go | 25 +- internal/collector/pgadmin_test.go | 64 ++-- .../collector/pgbackrest_logs_transforms.yaml | 33 +- internal/collector/pgbackrest_test.go | 72 ++-- internal/collector/pgbouncer.go | 23 +- .../collector/pgbouncer_metrics_queries.yaml | 20 +- internal/collector/pgbouncer_test.go | 62 ++-- .../collector/postgres_logs_transforms.yaml | 141 ++++---- internal/collector/postgres_test.go | 328 ++++++++++-------- 14 files changed, 458 insertions(+), 393 deletions(-) diff --git a/internal/collector/generated/pgbackrest_logs_transforms.json b/internal/collector/generated/pgbackrest_logs_transforms.json index adf3b09af9..3f8cf5137a 100644 --- a/internal/collector/generated/pgbackrest_logs_transforms.json +++ b/internal/collector/generated/pgbackrest_logs_transforms.json @@ -1 +1 @@ -[{"context":"log","statements":["set(instrumentation_scope.name, \"pgbackrest\")","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","merge_maps(cache, ExtractPatterns(body, \"^(?\u003ctimestamp\u003e\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?\u003cprocess_id\u003eP\\\\d{2,3})\\\\s*(?\u003cerror_severity\u003e\\\\S*): (?\u003cmessage\u003e(?s).*)$\"), \"insert\") where Len(body) \u003e 0","set(severity_text, cache[\"error_severity\"]) where IsString(cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"TRACE\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG\"","set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == \"DETAIL\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARN\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(time, Time(cache[\"timestamp\"], \"%Y-%m-%d %H:%M:%S.%L\")) where IsString(cache[\"timestamp\"])","set(attributes[\"process.pid\"], cache[\"process_id\"])","set(attributes[\"log.record.original\"], body)","set(body, cache[\"message\"])"]}] +[{"statements":["set(instrumentation_scope.name, \"pgbackrest\")","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","merge_maps(log.cache, ExtractPatterns(log.body, \"^(?\u003ctimestamp\u003e\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?\u003cprocess_id\u003eP\\\\d{2,3})\\\\s*(?\u003cerror_severity\u003e\\\\S*): (?\u003cmessage\u003e(?s).*)$\"), \"insert\") where Len(log.body) \u003e 0","set(log.severity_text, log.cache[\"error_severity\"]) where IsString(log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"TRACE\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == \"DETAIL\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARN\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.time, Time(log.cache[\"timestamp\"], \"%Y-%m-%d %H:%M:%S.%L\")) where IsString(log.cache[\"timestamp\"])","set(log.attributes[\"process.pid\"], log.cache[\"process_id\"])","set(log.attributes[\"log.record.original\"], log.body)","set(log.body, log.cache[\"message\"])"]}] diff --git a/internal/collector/generated/pgbouncer_metrics_queries.json b/internal/collector/generated/pgbouncer_metrics_queries.json index 0248051d94..78260bcf44 100644 --- a/internal/collector/generated/pgbouncer_metrics_queries.json +++ b/internal/collector/generated/pgbouncer_metrics_queries.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS"},{"metrics":[{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool","value_column":"reserve_pool"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS"}] +[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS"},{"metrics":[{"attribute_columns":["name","port","database"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool","value_column":"reserve_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS"}] diff --git a/internal/collector/generated/postgres_logs_transforms.json b/internal/collector/generated/postgres_logs_transforms.json index d3a2dbe47f..f7409174eb 100644 --- a/internal/collector/generated/postgres_logs_transforms.json +++ b/internal/collector/generated/postgres_logs_transforms.json @@ -1 +1 @@ -[{"conditions":["body[\"format\"] == \"csv\""],"context":"log","statements":["set(cache, ParseCSV(body[\"original\"], body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(cache, ExtractPatterns(cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(cache[\"connection_from\"]) \u003e 0","set(cache[\"remote_host\"], Substring(cache[\"connection_from\"], 0, Len(cache[\"connection_from\"]) - Len(cache[\"remote_port\"]) - 1)) where Len(cache[\"connection_from\"]) \u003e 0 and IsString(cache[\"remote_port\"])","set(cache[\"remote_host\"], cache[\"connection_from\"]) where Len(cache[\"connection_from\"]) \u003e 0 and not IsString(cache[\"remote_host\"])","merge_maps(cache, ExtractPatterns(cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(cache[\"location\"]) \u003e 0","set(cache[\"cursor_position\"], Double(cache[\"cursor_position\"])) where IsMatch(cache[\"cursor_position\"], \"^[0-9.]+$\")","set(cache[\"file_line_num\"], Double(cache[\"file_line_num\"])) where IsMatch(cache[\"file_line_num\"], \"^[0-9.]+$\")","set(cache[\"internal_position\"], Double(cache[\"internal_position\"])) where IsMatch(cache[\"internal_position\"], \"^[0-9.]+$\")","set(cache[\"leader_pid\"], Double(cache[\"leader_pid\"])) where IsMatch(cache[\"leader_pid\"], \"^[0-9.]+$\")","set(cache[\"line_num\"], Double(cache[\"line_num\"])) where IsMatch(cache[\"line_num\"], \"^[0-9.]+$\")","set(cache[\"pid\"], Double(cache[\"pid\"])) where IsMatch(cache[\"pid\"], \"^[0-9.]+$\")","set(cache[\"query_id\"], Double(cache[\"query_id\"])) where IsMatch(cache[\"query_id\"], \"^[0-9.]+$\")","set(cache[\"remote_port\"], Double(cache[\"remote_port\"])) where IsMatch(cache[\"remote_port\"], \"^[0-9.]+$\")","set(body[\"parsed\"], cache)"]},{"context":"log","statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(cache, body[\"parsed\"]) where body[\"format\"] == \"csv\"","set(cache, ParseJSON(body[\"original\"])) where body[\"format\"] == \"json\"","set(severity_text, cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"DEBUG5\"","set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == \"DEBUG4\"","set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == \"DEBUG3\"","set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == \"DEBUG2\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG1\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\" or severity_text == \"LOG\"","set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == \"NOTICE\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARNING\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == \"FATAL\"","set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == \"PANIC\"","set(time, Time(cache[\"timestamp\"], \"%F %T.%L %Z\"))","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(attributes[\"log.record.original\"], body[\"original\"])","set(body, cache)","set(attributes[\"client.address\"], body[\"remote_host\"]) where IsString(body[\"remote_host\"])","set(attributes[\"client.port\"], Int(body[\"remote_port\"])) where IsDouble(body[\"remote_port\"])","set(attributes[\"code.filepath\"], body[\"file_name\"]) where IsString(body[\"file_name\"])","set(attributes[\"code.function\"], body[\"func_name\"]) where IsString(body[\"func_name\"])","set(attributes[\"code.lineno\"], Int(body[\"file_line_num\"])) where IsDouble(body[\"file_line_num\"])","set(attributes[\"db.namespace\"], body[\"dbname\"]) where IsString(body[\"dbname\"])","set(attributes[\"db.response.status_code\"], body[\"state_code\"]) where IsString(body[\"state_code\"])","set(attributes[\"process.creation.time\"], Concat([ Substring(body[\"session_start\"], 0, 10), \"T\", Substring(body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(attributes[\"process.pid\"], Int(body[\"pid\"])) where IsDouble(body[\"pid\"])","set(attributes[\"process.title\"], body[\"ps\"]) where IsString(body[\"ps\"])","set(attributes[\"user.name\"], body[\"user\"]) where IsString(body[\"user\"])"]},{"conditions":["Len(body[\"message\"]) \u003e 7 and Substring(body[\"message\"], 0, 7) == \"AUDIT: \""],"context":"log","statements":["set(body[\"pgaudit\"], ParseCSV(Substring(body[\"message\"], 7, Len(body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(body[\"pgaudit\"]) \u003e 0"]}] +[{"conditions":["body[\"format\"] == \"csv\""],"statements":["set(log.cache, ParseCSV(log.body[\"original\"], log.body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(log.cache, ExtractPatterns(log.cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(log.cache[\"connection_from\"]) \u003e 0","set(log.cache[\"remote_host\"], Substring(log.cache[\"connection_from\"], 0, Len(log.cache[\"connection_from\"]) - Len(log.cache[\"remote_port\"]) - 1)) where Len(log.cache[\"connection_from\"]) \u003e 0 and IsString(log.cache[\"remote_port\"])","set(log.cache[\"remote_host\"], log.cache[\"connection_from\"]) where Len(log.cache[\"connection_from\"]) \u003e 0 and not IsString(log.cache[\"remote_host\"])","merge_maps(log.cache, ExtractPatterns(log.cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(log.cache[\"location\"]) \u003e 0","set(log.cache[\"cursor_position\"], Double(log.cache[\"cursor_position\"])) where IsMatch(log.cache[\"cursor_position\"], \"^[0-9.]+$\")","set(log.cache[\"file_line_num\"], Double(log.cache[\"file_line_num\"])) where IsMatch(log.cache[\"file_line_num\"], \"^[0-9.]+$\")","set(log.cache[\"internal_position\"], Double(log.cache[\"internal_position\"])) where IsMatch(log.cache[\"internal_position\"], \"^[0-9.]+$\")","set(log.cache[\"leader_pid\"], Double(log.cache[\"leader_pid\"])) where IsMatch(log.cache[\"leader_pid\"], \"^[0-9.]+$\")","set(log.cache[\"line_num\"], Double(log.cache[\"line_num\"])) where IsMatch(log.cache[\"line_num\"], \"^[0-9.]+$\")","set(log.cache[\"pid\"], Double(log.cache[\"pid\"])) where IsMatch(log.cache[\"pid\"], \"^[0-9.]+$\")","set(log.cache[\"query_id\"], Double(log.cache[\"query_id\"])) where IsMatch(log.cache[\"query_id\"], \"^[0-9.]+$\")","set(log.cache[\"remote_port\"], Double(log.cache[\"remote_port\"])) where IsMatch(log.cache[\"remote_port\"], \"^[0-9.]+$\")","set(log.body[\"parsed\"], log.cache)"]},{"statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(log.cache, log.body[\"parsed\"]) where log.body[\"format\"] == \"csv\"","set(log.cache, ParseJSON(log.body[\"original\"])) where log.body[\"format\"] == \"json\"","set(log.severity_text, log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"DEBUG5\"","set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == \"DEBUG4\"","set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == \"DEBUG3\"","set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == \"DEBUG2\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG1\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\" or log.severity_text == \"LOG\"","set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == \"NOTICE\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARNING\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == \"FATAL\"","set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == \"PANIC\"","set(log.time, Time(log.cache[\"timestamp\"], \"%F %T.%L %Z\")) where IsString(log.cache[\"timestamp\"])","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(log.attributes[\"log.record.original\"], log.body[\"original\"])","set(log.body, log.cache)","set(log.attributes[\"client.address\"], log.body[\"remote_host\"]) where IsString(log.body[\"remote_host\"])","set(log.attributes[\"client.port\"], Int(log.body[\"remote_port\"])) where IsDouble(log.body[\"remote_port\"])","set(log.attributes[\"code.filepath\"], log.body[\"file_name\"]) where IsString(log.body[\"file_name\"])","set(log.attributes[\"code.function\"], log.body[\"func_name\"]) where IsString(log.body[\"func_name\"])","set(log.attributes[\"code.lineno\"], Int(log.body[\"file_line_num\"])) where IsDouble(log.body[\"file_line_num\"])","set(log.attributes[\"db.namespace\"], log.body[\"dbname\"]) where IsString(log.body[\"dbname\"])","set(log.attributes[\"db.response.status_code\"], log.body[\"state_code\"]) where IsString(log.body[\"state_code\"])","set(log.attributes[\"process.creation.time\"], Concat([ Substring(log.body[\"session_start\"], 0, 10), \"T\", Substring(log.body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(log.body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(log.attributes[\"process.pid\"], Int(log.body[\"pid\"])) where IsDouble(log.body[\"pid\"])","set(log.attributes[\"process.title\"], log.body[\"ps\"]) where IsString(log.body[\"ps\"])","set(log.attributes[\"user.name\"], log.body[\"user\"]) where IsString(log.body[\"user\"])"]},{"conditions":["Len(body[\"message\"]) \u003e 7 and Substring(body[\"message\"], 0, 7) == \"AUDIT: \""],"statements":["set(log.body[\"pgaudit\"], ParseCSV(Substring(log.body[\"message\"], 7, Len(log.body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(log.body[\"pgaudit\"]) \u003e 0"]}] diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index aa6a7a85e3..2e0edb0d15 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -71,16 +71,15 @@ func EnablePatroniLogging(ctx context.Context, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme outConfig.Processors["transform/patroni_logs"] = map[string]any{ "log_statements": []map[string]any{{ - "context": "log", "statements": []string{ `set(instrumentation_scope.name, "patroni")`, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson - `set(cache, ParseJSON(body["original"]))`, + `set(log.cache, ParseJSON(log.body["original"]))`, // The log severity is in the "levelname" field. // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["levelname"])`, + `set(log.severity_text, log.cache["levelname"])`, // Map Patroni (python) "logging levels" to OpenTelemetry severity levels. // @@ -88,11 +87,11 @@ func EnablePatroniLogging(ctx context.Context, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://github.com/open-telemetry/opentelemetry-python/blob/v1.29.0/opentelemetry-api/src/opentelemetry/_logs/severity/__init__.py // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "CRITICAL"`, // Parse the "asctime" field into the record timestamp. // The format is neither RFC 3339 nor ISO 8601: @@ -102,14 +101,14 @@ func EnablePatroniLogging(ctx context.Context, // // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md // https://docs.python.org/3.6/library/logging.html#logging.LogRecord - `set(time, Time(cache["asctime"], "%F %T,%L"))`, + `set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"])`, // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body["original"])`, - `set(body, cache["message"])`, + `set(log.attributes["log.record.original"], log.body["original"])`, + `set(log.body, log.cache["message"])`, }, }}, } diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index 01f28d1b36..20dd8096eb 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -71,19 +71,23 @@ processors: timeout: 30s transform/patroni_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "patroni") - - set(cache, ParseJSON(body["original"])) - - set(severity_text, cache["levelname"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" - - set(time, Time(cache["asctime"], "%F %T,%L")) - - set(attributes["log.record.original"], body["original"]) - - set(body, cache["message"]) + - set(log.cache, ParseJSON(log.body["original"])) + - set(log.severity_text, log.cache["levelname"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" + - set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache["message"]) receivers: filelog/patroni_jsonlog: include: @@ -167,19 +171,23 @@ processors: timeout: 30s transform/patroni_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "patroni") - - set(cache, ParseJSON(body["original"])) - - set(severity_text, cache["levelname"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" - - set(time, Time(cache["asctime"], "%F %T,%L")) - - set(attributes["log.record.original"], body["original"]) - - set(body, cache["message"]) + - set(log.cache, ParseJSON(log.body["original"])) + - set(log.severity_text, log.cache["levelname"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" + - set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache["message"]) receivers: filelog/patroni_jsonlog: include: diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go index 85fb43408e..c5cd147df8 100644 --- a/internal/collector/pgadmin.go +++ b/internal/collector/pgadmin.go @@ -60,34 +60,33 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec otelConfig.Processors["transform/pgadmin_log"] = map[string]any{ "log_statements": []map[string]any{ { - "context": "log", "statements": []string{ // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body)`, - `set(cache, ParseJSON(body))`, - `merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert")`, - `set(body, cache["message"])`, + `set(log.attributes["log.record.original"], log.body)`, + `set(log.cache, ParseJSON(log.body))`, + `merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert")`, + `set(log.body, log.cache["message"])`, // Set instrumentation scope to the "name" from each log record. - `set(instrumentation_scope.name, cache["name"])`, + `set(instrumentation_scope.name, log.cache["name"])`, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["level"])`, - `set(time_unix_nano, Int(cache["time"]*1000000000))`, + `set(log.severity_text, log.cache["level"])`, + `set(log.time_unix_nano, Int(log.cache["time"]*1000000000))`, // Map pgAdmin "logging levels" to OpenTelemetry severity levels. // // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "CRITICAL"`, }, }, }, diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index e5db11f587..b856baab0c 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -75,21 +75,25 @@ collector.yaml: | timeout: 30s transform/pgadmin_log: log_statements: - - context: log - statements: - - set(attributes["log.record.original"], body) - - set(cache, ParseJSON(body)) - - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + - statements: + - set(log.attributes["log.record.original"], log.body) + - set(log.cache, ParseJSON(log.body)) + - merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert") - - set(body, cache["message"]) - - set(instrumentation_scope.name, cache["name"]) - - set(severity_text, cache["level"]) - - set(time_unix_nano, Int(cache["time"]*1000000000)) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(log.body, log.cache["message"]) + - set(instrumentation_scope.name, log.cache["name"]) + - set(log.severity_text, log.cache["level"]) + - set(log.time_unix_nano, Int(log.cache["time"]*1000000000)) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" receivers: filelog/gunicorn: include: @@ -196,21 +200,25 @@ collector.yaml: | timeout: 30s transform/pgadmin_log: log_statements: - - context: log - statements: - - set(attributes["log.record.original"], body) - - set(cache, ParseJSON(body)) - - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + - statements: + - set(log.attributes["log.record.original"], log.body) + - set(log.cache, ParseJSON(log.body)) + - merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert") - - set(body, cache["message"]) - - set(instrumentation_scope.name, cache["name"]) - - set(severity_text, cache["level"]) - - set(time_unix_nano, Int(cache["time"]*1000000000)) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(log.body, log.cache["message"]) + - set(instrumentation_scope.name, log.cache["name"]) + - set(log.severity_text, log.cache["level"]) + - set(log.time_unix_nano, Int(log.cache["time"]*1000000000)) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" receivers: filelog/gunicorn: include: diff --git a/internal/collector/pgbackrest_logs_transforms.yaml b/internal/collector/pgbackrest_logs_transforms.yaml index 31f4a48f94..389f9d0a2c 100644 --- a/internal/collector/pgbackrest_logs_transforms.yaml +++ b/internal/collector/pgbackrest_logs_transforms.yaml @@ -3,8 +3,7 @@ # # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme -- context: log - statements: +- statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") @@ -14,30 +13,30 @@ # 3) the log level (form INFO, WARN, etc.) # 4) the message (anything else, including newline -- we can do this because we have a multiline block on the receiver) - >- - merge_maps(cache, - ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): (?(?s).*)$"), + merge_maps(log.cache, + ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): (?(?s).*)$"), "insert") - where Len(body) > 0 + where Len(log.body) > 0 # The log severity is the "error_severity" field. # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext # https://pgbackrest.org/configuration.html#section-log/option-log-level-file - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-timestamp - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(log.cache["timestamp"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md - - set(attributes["process.pid"], cache["process_id"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) # Keep the unparsed log record in a standard attribute, - # and replace the log record body with the message field. + # and replace the log record log.body with the message field. # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index 347599692f..66e180ef1f 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -73,24 +73,30 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) receivers: filelog/pgbackrest_log: include: @@ -174,24 +180,30 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) receivers: filelog/pgbackrest_log: include: diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 375d2b9bab..700b9a3725 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -96,18 +96,17 @@ func EnablePgBouncerLogging(ctx context.Context, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme outConfig.Processors["transform/pgbouncer_logs"] = map[string]any{ "log_statements": []map[string]any{{ - "context": "log", "statements": []string{ // Set instrumentation scope `set(instrumentation_scope.name, "pgbouncer")`, // Extract timestamp, pid, log level, and message and store in cache. - `merge_maps(cache, ExtractPatterns(body, ` + + `merge_maps(log.cache, ExtractPatterns(log.body, ` + `"^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) ` + `\\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert")`, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["log_level"])`, + `set(log.severity_text, log.cache["log_level"])`, // Map pgBouncer (libusual) "logging levels" to OpenTelemetry severity levels. // @@ -115,11 +114,11 @@ func EnablePgBouncerLogging(ctx context.Context, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" or severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "NOISE" or log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "LOG"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "FATAL"`, // Parse the timestamp. // The format is neither RFC 3339 nor ISO 8601: @@ -129,19 +128,19 @@ func EnablePgBouncerLogging(ctx context.Context, // then a timezone abbreviation. // // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md - `set(time, Time(cache["timestamp"], "%F %T.%L %Z"))`, + `set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"])`, // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body)`, + `set(log.attributes["log.record.original"], log.body)`, // Set pid as attribute - `set(attributes["process.pid"], cache["pid"])`, + `set(log.attributes["process.pid"], log.cache["pid"])`, // Set the log message to body. - `set(body, cache["msg"])`, + `set(log.body, log.cache["msg"])`, }, }}, } diff --git a/internal/collector/pgbouncer_metrics_queries.yaml b/internal/collector/pgbouncer_metrics_queries.yaml index 228fef1cc0..a4e3a918fb 100644 --- a/internal/collector/pgbouncer_metrics_queries.yaml +++ b/internal/collector/pgbouncer_metrics_queries.yaml @@ -11,45 +11,47 @@ attribute_columns: ["database", "user", "state", "application_name", "link"] description: "Current waiting time in seconds" - # NOTE: Avoid collecting "host" column because it can be null; the collector will warn against null. + # NOTE: Avoid collecting/using "host", "force_user", and "pool_mode" columns because they + # can be NULL; the collector will warn against NULL even when not used. But it will emit + # an error log if those columns are used. # The host column should always point either to pgBouncer's virtual database (the null case) or to the primary. - sql: "SHOW DATABASES" metrics: - metric_name: ccp_pgbouncer_databases_pool_size value_column: pool_size - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Maximum number of server connections" - metric_name: ccp_pgbouncer_databases_min_pool_size value_column: min_pool_size - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Minimum number of server connections" - metric_name: ccp_pgbouncer_databases_reserve_pool - value_column: reserve_pool - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + value_column: reserve_pool_size + attribute_columns: ["name", "port", "database"] description: "Maximum number of additional connections for this database" - metric_name: ccp_pgbouncer_databases_max_connections value_column: max_connections - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: >- Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database - metric_name: ccp_pgbouncer_databases_current_connections value_column: current_connections - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Current number of connections for this database" - metric_name: ccp_pgbouncer_databases_paused value_column: paused - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "1 if this database is currently paused, else 0" - metric_name: ccp_pgbouncer_databases_disabled value_column: disabled - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "1 if this database is currently disabled, else 0" - sql: "SHOW LISTS" diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index 74aed710da..cbd69cbd03 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -70,23 +70,26 @@ processors: timeout: 30s transform/pgbouncer_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbouncer") - - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert") - - set(severity_text, cache["log_level"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" - or severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) - - set(attributes["log.record.original"], body) - - set(attributes["process.pid"], cache["pid"]) - - set(body, cache["msg"]) + - set(log.severity_text, log.cache["log_level"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "NOISE" or log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "LOG" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.attributes["process.pid"], log.cache["pid"]) + - set(log.body, log.cache["msg"]) receivers: filelog/pgbouncer_log: include: @@ -167,23 +170,26 @@ processors: timeout: 30s transform/pgbouncer_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbouncer") - - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert") - - set(severity_text, cache["log_level"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" - or severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) - - set(attributes["log.record.original"], body) - - set(attributes["process.pid"], cache["pid"]) - - set(body, cache["msg"]) + - set(log.severity_text, log.cache["log_level"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "NOISE" or log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "LOG" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.attributes["process.pid"], log.cache["pid"]) + - set(log.body, log.cache["msg"]) receivers: filelog/pgbouncer_log: include: diff --git a/internal/collector/postgres_logs_transforms.yaml b/internal/collector/postgres_logs_transforms.yaml index f397b996e8..c8178f2d6e 100644 --- a/internal/collector/postgres_logs_transforms.yaml +++ b/internal/collector/postgres_logs_transforms.yaml @@ -7,12 +7,11 @@ # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme -- context: log - conditions: +- conditions: - body["format"] == "csv" statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", mode="strict")) # Extract the optional "remote_port" value from the "connection_from" field. It is either: # 1. a Unix socket starting with "[local]:" or @@ -24,77 +23,76 @@ # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l108 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/common/ip.c#l224 - >- - merge_maps(cache, - ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + merge_maps(log.cache, + ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), "insert") - where Len(cache["connection_from"]) > 0 + where Len(log.cache["connection_from"]) > 0 # When there is a "remote_port" value, everything before it is the "remote_host" value. - >- - set(cache["remote_host"], - Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - Len(cache["remote_port"]) - 1)) - where Len(cache["connection_from"]) > 0 and IsString(cache["remote_port"]) + set(log.cache["remote_host"], + Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) - Len(log.cache["remote_port"]) - 1)) + where Len(log.cache["connection_from"]) > 0 and IsString(log.cache["remote_port"]) # When there is still no "remote_host" value, copy the "connection_from" value, if any. - >- - set(cache["remote_host"], cache["connection_from"]) - where Len(cache["connection_from"]) > 0 and not IsString(cache["remote_host"]) + set(log.cache["remote_host"], log.cache["connection_from"]) + where Len(log.cache["connection_from"]) > 0 and not IsString(log.cache["remote_host"]) # Extract the values encoded in the "location" field. # # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2805 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l207 - >- - merge_maps(cache, - ExtractPatterns(cache["location"], "^(?:(?[^,]+), )?(?[^:]+):(?\\d+)$"), + merge_maps(log.cache, + ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), )?(?[^:]+):(?\\d+)$"), "insert") - where Len(cache["location"]) > 0 + where Len(log.cache["location"]) > 0 # These values are numeric in JSON logs. - >- - set(cache["cursor_position"], Double(cache["cursor_position"])) - where IsMatch(cache["cursor_position"], "^[0-9.]+$") + set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) + where IsMatch(log.cache["cursor_position"], "^[0-9.]+$") - >- - set(cache["file_line_num"], Double(cache["file_line_num"])) - where IsMatch(cache["file_line_num"], "^[0-9.]+$") + set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) + where IsMatch(log.cache["file_line_num"], "^[0-9.]+$") - >- - set(cache["internal_position"], Double(cache["internal_position"])) - where IsMatch(cache["internal_position"], "^[0-9.]+$") + set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") - >- - set(cache["leader_pid"], Double(cache["leader_pid"])) - where IsMatch(cache["leader_pid"], "^[0-9.]+$") + set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) + where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - >- - set(cache["line_num"], Double(cache["line_num"])) - where IsMatch(cache["line_num"], "^[0-9.]+$") + set(log.cache["line_num"], Double(log.cache["line_num"])) + where IsMatch(log.cache["line_num"], "^[0-9.]+$") - >- - set(cache["pid"], Double(cache["pid"])) - where IsMatch(cache["pid"], "^[0-9.]+$") + set(log.cache["pid"], Double(log.cache["pid"])) + where IsMatch(log.cache["pid"], "^[0-9.]+$") - >- - set(cache["query_id"], Double(cache["query_id"])) - where IsMatch(cache["query_id"], "^[0-9.]+$") + set(log.cache["query_id"], Double(log.cache["query_id"])) + where IsMatch(log.cache["query_id"], "^[0-9.]+$") - >- - set(cache["remote_port"], Double(cache["remote_port"])) - where IsMatch(cache["remote_port"], "^[0-9.]+$") + set(log.cache["remote_port"], Double(log.cache["remote_port"])) + where IsMatch(log.cache["remote_port"], "^[0-9.]+$") # Pass the results to the next set of statements. - - set(body["parsed"], cache) + - set(log.body["parsed"], log.cache) # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme -- context: log - statements: +- statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. - - set(cache, body["parsed"]) where body["format"] == "csv" + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == "json" # The log severity is in the "error_severity" field. # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - - set(severity_text, cache["error_severity"]) + - set(log.severity_text, log.cache["error_severity"]) # Map severity text to OpenTelemetry severity levels. # Postgres has levels beyond the typical ones: @@ -106,17 +104,17 @@ # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber # https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == "PANIC" # Parse the "timestamp" field into the record timestamp. # The format is neither RFC 3339 nor ISO 8601: @@ -128,7 +126,7 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#time # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2246 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/elog.c#l2671 - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) # Rename fields emitted by Postgres to align with OpenTelemetry semantic conventions. # @@ -140,27 +138,27 @@ - set(resource.attributes["db.system"], "postgresql") # Keep the unparsed log record in a standard attribute, - # and replace the log record body with the parsed fields. + # and replace the log record log.body with the parsed fields. # # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/client.md - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) # These values are populated when the "log_error_verbosity" parameter is VERBOSE. # # https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-ERROR-VERBOSITY # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/code.md - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/db.md - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where IsString(log.body["state_code"]) # Postgres is multiprocess so some client/backend details align here. # @@ -170,29 +168,28 @@ # https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/error/elog.c;hb=REL_17_0#l2697 # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md - >- - set(attributes["process.creation.time"], Concat([ - Substring(body["session_start"], 0, 10), "T", - Substring(body["session_start"], 11, 8), "Z"], "")) - where IsMatch(body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + set(log.attributes["process.creation.time"], Concat([ + Substring(log.body["session_start"], 0, 10), "T", + Substring(log.body["session_start"], 11, 8), "Z"], "")) + where IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") - >- - set(attributes["process.pid"], Int(body["pid"])) - where IsDouble(body["pid"]) + set(log.attributes["process.pid"], Int(log.body["pid"])) + where IsDouble(log.body["pid"]) - >- - set(attributes["process.title"], body["ps"]) - where IsString(body["ps"]) + set(log.attributes["process.title"], log.body["ps"]) + where IsString(log.body["ps"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/user.md - >- - set(attributes["user.name"], body["user"]) - where IsString(body["user"]) + set(log.attributes["user.name"], log.body["user"]) + where IsString(log.body["user"]) # Look for and parse the CSV of a pgAudit message. # # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme # https://github.com/pgaudit/pgaudit#format -- context: log - conditions: +- conditions: # Messages from pgAudit have always been prefixed with "AUDIT:", but that # could change in the future. # @@ -203,9 +200,9 @@ statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - >- - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) - 7), + set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - >- set(instrumentation_scope.name, "pgaudit") - where Len(body["pgaudit"]) > 0 + where Len(log.body["pgaudit"]) > 0 diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index d9bb161b9d..83deb349ad 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -96,103 +96,121 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) transform/postgres_logs: log_statements: - conditions: - body["format"] == "csv" - context: log statements: - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) - - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), - "insert") where Len(cache["connection_from"]) > 0 - - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 - and IsString(cache["remote_port"]) - - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) - > 0 and not IsString(cache["remote_host"]) - - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), - )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", + mode="strict")) + - merge_maps(log.cache, ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(log.cache["connection_from"]) > 0 + - set(log.cache["remote_host"], Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) + - Len(log.cache["remote_port"]) - 1)) where Len(log.cache["connection_from"]) + > 0 and IsString(log.cache["remote_port"]) + - set(log.cache["remote_host"], log.cache["connection_from"]) where Len(log.cache["connection_from"]) + > 0 and not IsString(log.cache["remote_host"]) + - merge_maps(log.cache, ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(log.cache["location"]) > 0 - - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], - "^[0-9.]+$") - - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + - set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) where + IsMatch(log.cache["cursor_position"], "^[0-9.]+$") + - set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) where + IsMatch(log.cache["file_line_num"], "^[0-9.]+$") + - set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") + - set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - - set(cache["internal_position"], Double(cache["internal_position"])) where - IsMatch(cache["internal_position"], "^[0-9.]+$") - - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + - set(log.cache["line_num"], Double(log.cache["line_num"])) where IsMatch(log.cache["line_num"], "^[0-9.]+$") - - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + - set(log.cache["pid"], Double(log.cache["pid"])) where IsMatch(log.cache["pid"], "^[0-9.]+$") - - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") - - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + - set(log.cache["query_id"], Double(log.cache["query_id"])) where IsMatch(log.cache["query_id"], "^[0-9.]+$") - - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], + - set(log.cache["remote_port"], Double(log.cache["remote_port"])) where IsMatch(log.cache["remote_port"], "^[0-9.]+$") - - set(body["parsed"], cache) - - context: log - statements: + - set(log.body["parsed"], log.cache) + - statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) - - set(cache, body["parsed"]) where body["format"] == "csv" - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" - - set(severity_text, cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == + "json" + - set(log.severity_text, log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == + "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == + "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == + "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == + "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == + "PANIC" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - set(resource.attributes["db.system"], "postgresql") - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) - - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], - 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], - "^[^ ]{10} [^ ]{8} UTC$") - - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) - - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) - - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where + IsString(log.body["state_code"]) + - set(log.attributes["process.creation.time"], Concat([ Substring(log.body["session_start"], + 0, 10), "T", Substring(log.body["session_start"], 11, 8), "Z"], "")) where + IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + - set(log.attributes["process.pid"], Int(log.body["pid"])) where IsDouble(log.body["pid"]) + - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) + - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: "' - context: log statements: - - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 + - set(instrumentation_scope.name, "pgaudit") where Len(log.body["pgaudit"]) + > 0 receivers: filelog/pgbackrest_log: include: @@ -338,103 +356,121 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) transform/postgres_logs: log_statements: - conditions: - body["format"] == "csv" - context: log statements: - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) - - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), - "insert") where Len(cache["connection_from"]) > 0 - - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 - and IsString(cache["remote_port"]) - - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) - > 0 and not IsString(cache["remote_host"]) - - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), - )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", + mode="strict")) + - merge_maps(log.cache, ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(log.cache["connection_from"]) > 0 + - set(log.cache["remote_host"], Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) + - Len(log.cache["remote_port"]) - 1)) where Len(log.cache["connection_from"]) + > 0 and IsString(log.cache["remote_port"]) + - set(log.cache["remote_host"], log.cache["connection_from"]) where Len(log.cache["connection_from"]) + > 0 and not IsString(log.cache["remote_host"]) + - merge_maps(log.cache, ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(log.cache["location"]) > 0 - - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], - "^[0-9.]+$") - - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + - set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) where + IsMatch(log.cache["cursor_position"], "^[0-9.]+$") + - set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) where + IsMatch(log.cache["file_line_num"], "^[0-9.]+$") + - set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") + - set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - - set(cache["internal_position"], Double(cache["internal_position"])) where - IsMatch(cache["internal_position"], "^[0-9.]+$") - - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + - set(log.cache["line_num"], Double(log.cache["line_num"])) where IsMatch(log.cache["line_num"], "^[0-9.]+$") - - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + - set(log.cache["pid"], Double(log.cache["pid"])) where IsMatch(log.cache["pid"], "^[0-9.]+$") - - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") - - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + - set(log.cache["query_id"], Double(log.cache["query_id"])) where IsMatch(log.cache["query_id"], "^[0-9.]+$") - - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], + - set(log.cache["remote_port"], Double(log.cache["remote_port"])) where IsMatch(log.cache["remote_port"], "^[0-9.]+$") - - set(body["parsed"], cache) - - context: log - statements: + - set(log.body["parsed"], log.cache) + - statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) - - set(cache, body["parsed"]) where body["format"] == "csv" - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" - - set(severity_text, cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == + "json" + - set(log.severity_text, log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == + "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == + "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == + "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == + "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == + "PANIC" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - set(resource.attributes["db.system"], "postgresql") - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) - - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], - 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], - "^[^ ]{10} [^ ]{8} UTC$") - - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) - - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) - - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where + IsString(log.body["state_code"]) + - set(log.attributes["process.creation.time"], Concat([ Substring(log.body["session_start"], + 0, 10), "T", Substring(log.body["session_start"], 11, 8), "Z"], "")) where + IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + - set(log.attributes["process.pid"], Int(log.body["pid"])) where IsDouble(log.body["pid"]) + - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) + - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: "' - context: log statements: - - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 + - set(instrumentation_scope.name, "pgaudit") where Len(log.body["pgaudit"]) + > 0 receivers: filelog/pgbackrest_log: include: From be9bd318e917817b7324d4d8a55c75ba5d33b701 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Thu, 20 Mar 2025 10:37:36 -0500 Subject: [PATCH 09/79] Ensure required LDAP HBA options are present Issue: PGO-2263 See: 797fdf17698ae42eb02b4915a60a856938e73734 --- ...ator.crunchydata.com_postgresclusters.yaml | 22 +++- .../validation/postgrescluster_test.go | 115 ++++++++++++++++++ .../v1beta1/postgres_types.go | 15 ++- 3 files changed, 146 insertions(+), 6 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index d0891d05ba..3834ebf654 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -110,11 +110,25 @@ spec: x-kubernetes-map-type: atomic x-kubernetes-validations: - message: '"hba" cannot be combined with other fields' - rule: 'has(self.hba) ? !has(self.connection) && !has(self.databases) - && !has(self.method) && !has(self.options) && !has(self.users) - : true' + rule: '[has(self.hba), has(self.connection) || has(self.databases) + || has(self.method) || has(self.options) || has(self.users)].exists_one(b,b)' - message: '"connection" and "method" are required' - rule: 'has(self.hba) ? true : has(self.connection) && has(self.method)' + rule: has(self.hba) || (has(self.connection) && has(self.method)) + - message: the "ldap" method requires an "ldapbasedn", "ldapprefix", + or "ldapsuffix" option + rule: has(self.hba) || self.method != "ldap" || (has(self.options) + && ["ldapbasedn","ldapprefix","ldapsuffix"].exists(k, k + in self.options)) + - message: cannot use "ldapbasedn", "ldapbinddn", "ldapbindpasswd", + "ldapsearchattribute", or "ldapsearchfilter" options with + "ldapprefix" or "ldapsuffix" options + rule: has(self.hba) || self.method != "ldap" || !has(self.options) + || [["ldapprefix","ldapsuffix"], ["ldapbasedn","ldapbinddn","ldapbindpasswd","ldapsearchattribute","ldapsearchfilter"]].exists_one(a, + a.exists(k, k in self.options)) + - message: the "radius" method requires "radiusservers" and + "radiussecrets" options + rule: has(self.hba) || self.method != "radius" || (has(self.options) + && ["radiusservers","radiussecrets"].all(k, k in self.options)) maxItems: 10 type: array x-kubernetes-list-type: atomic diff --git a/internal/testing/validation/postgrescluster_test.go b/internal/testing/validation/postgrescluster_test.go index 18a17de069..f10fbe8023 100644 --- a/internal/testing/validation/postgrescluster_test.go +++ b/internal/testing/validation/postgrescluster_test.go @@ -118,6 +118,121 @@ func TestPostgresAuthenticationRules(t *testing.T) { assert.Assert(t, cmp.Contains(cause.Message, "unsafe")) } }) + + t.Run("LDAP", func(t *testing.T) { + t.Run("Required", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap }, + { connection: hostssl, method: ldap, options: {} }, + { connection: hostssl, method: ldap, options: { ldapbinddn: any } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 3)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Contains(cause.Message, `"ldap" method requires`)) + } + + // These are valid. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapbasedn: any } }, + { connection: hostssl, method: ldap, options: { ldapprefix: any } }, + { connection: hostssl, method: ldap, options: { ldapsuffix: any } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + + t.Run("Mixed", func(t *testing.T) { + // Some options cannot be combined with others. + + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapbinddn: any, ldapprefix: other } }, + { connection: hostssl, method: ldap, options: { ldapbasedn: any, ldapsuffix: other } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 2)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Regexp(`cannot use .+? options with .+? options`, cause.Message)) + } + + // These combinations are allowed. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapprefix: one, ldapsuffix: two } }, + { connection: hostssl, method: ldap, options: { ldapbasedn: one, ldapbinddn: two } }, + { connection: hostssl, method: ldap, options: { + ldapbasedn: one, ldapsearchattribute: two, ldapsearchfilter: three, + } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + }) + + t.Run("RADIUS", func(t *testing.T) { + t.Run("Required", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: radius }, + { connection: hostssl, method: radius, options: {} }, + { connection: hostssl, method: radius, options: { radiusidentifiers: any } }, + { connection: hostssl, method: radius, options: { radiusservers: any } }, + { connection: hostssl, method: radius, options: { radiussecrets: any } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 5)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Contains(cause.Message, `"radius" method requires`)) + } + + // These are valid. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: radius, options: { radiusservers: one, radiussecrets: two } }, + { connection: hostssl, method: radius, options: { + radiusservers: one, radiussecrets: two, radiusports: three, + } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + }) } func TestPostgresConfigParameters(t *testing.T) { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index 8f950dbfa9..b70a21a88d 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -124,8 +124,19 @@ type PostgresHBARule struct { // --- // Emulate OpenAPI "anyOf" aka Kubernetes union. -// +kubebuilder:validation:XValidation:rule=`has(self.hba) ? !has(self.connection) && !has(self.databases) && !has(self.method) && !has(self.options) && !has(self.users) : true`,message=`"hba" cannot be combined with other fields` -// +kubebuilder:validation:XValidation:rule=`has(self.hba) ? true : has(self.connection) && has(self.method)`,message=`"connection" and "method" are required` +// +kubebuilder:validation:XValidation:rule=`[has(self.hba), has(self.connection) || has(self.databases) || has(self.method) || has(self.options) || has(self.users)].exists_one(b,b)`,message=`"hba" cannot be combined with other fields` +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || (has(self.connection) && has(self.method))`,message=`"connection" and "method" are required` +// +// Some authentication methods *must* be further configured via options. +// +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/backend/libpq/hba.c#l1501 +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/backend/libpq/hba.c#l1886 +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "ldap" || (has(self.options) && ["ldapbasedn","ldapprefix","ldapsuffix"].exists(k, k in self.options))`,message=`the "ldap" method requires an "ldapbasedn", "ldapprefix", or "ldapsuffix" option` +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "ldap" || !has(self.options) || [["ldapprefix","ldapsuffix"], ["ldapbasedn","ldapbinddn","ldapbindpasswd","ldapsearchattribute","ldapsearchfilter"]].exists_one(a, a.exists(k, k in self.options))`,message=`cannot use "ldapbasedn", "ldapbinddn", "ldapbindpasswd", "ldapsearchattribute", or "ldapsearchfilter" options with "ldapprefix" or "ldapsuffix" options` +// +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/backend/libpq/hba.c#l1539 +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/backend/libpq/hba.c#l1945 +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "radius" || (has(self.options) && ["radiusservers","radiussecrets"].all(k, k in self.options))`,message=`the "radius" method requires "radiusservers" and "radiussecrets" options` // // +structType=atomic type PostgresHBARuleSpec struct { From 24ce37ebc9a2f4a5ccd81d82f0582d20c0927009 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Thu, 20 Mar 2025 11:01:47 -0500 Subject: [PATCH 10/79] Rename PostgresConfig struct to PostgresConfigSpec See: ef726823bbee85fa88af8ed22bb8692f64ab3926 --- internal/pgbackrest/reconcile_test.go | 2 +- internal/testing/validation/postgrescluster_test.go | 4 ++-- .../v1beta1/postgres_types.go | 2 +- .../v1beta1/postgrescluster_types.go | 2 +- .../v1beta1/zz_generated.deepcopy.go | 10 +++++----- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index 0c9aece2b1..6104a4e2a2 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -522,7 +522,7 @@ func TestAddConfigToRestorePod(t *testing.T) { custom.Name = "custom-configmap-files" cluster := cluster.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Files: []corev1.VolumeProjection{ {ConfigMap: &custom}, }, diff --git a/internal/testing/validation/postgrescluster_test.go b/internal/testing/validation/postgrescluster_test.go index f10fbe8023..ca4160b520 100644 --- a/internal/testing/validation/postgrescluster_test.go +++ b/internal/testing/validation/postgrescluster_test.go @@ -367,7 +367,7 @@ func TestPostgresConfigParameters(t *testing.T) { t.Run("Valid", func(t *testing.T) { cluster := base.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Parameters: map[string]intstr.IntOrString{ "wal_level": intstr.FromString("logical"), }, @@ -378,7 +378,7 @@ func TestPostgresConfigParameters(t *testing.T) { t.Run("Invalid", func(t *testing.T) { cluster := base.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Parameters: map[string]intstr.IntOrString{ "wal_level": intstr.FromString("minimal"), }, diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index b70a21a88d..ccf3368a2a 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -18,7 +18,7 @@ type PostgresAuthenticationSpec struct { Rules []PostgresHBARuleSpec `json:"rules,omitempty"` } -type PostgresConfig struct { +type PostgresConfigSpec struct { // Files to mount under "/etc/postgres". // --- // +optional diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 4d3be247fc..6ca3c96814 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -29,7 +29,7 @@ type PostgresClusterSpec struct { Backups Backups `json:"backups,omitempty"` // +optional - Config *PostgresConfig `json:"config,omitempty"` + Config *PostgresConfigSpec `json:"config,omitempty"` // The secret containing the Certificates and Keys to encrypt PostgreSQL // traffic will need to contain the server TLS certificate, TLS key and the diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 58281cb921..8ee494d5f8 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -2019,7 +2019,7 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { in.Backups.DeepCopyInto(&out.Backups) if in.Config != nil { in, out := &in.Config, &out.Config - *out = new(PostgresConfig) + *out = new(PostgresConfigSpec) (*in).DeepCopyInto(*out) } if in.CustomTLSSecret != nil { @@ -2191,7 +2191,7 @@ func (in *PostgresClusterStatus) DeepCopy() *PostgresClusterStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PostgresConfig) DeepCopyInto(out *PostgresConfig) { +func (in *PostgresConfigSpec) DeepCopyInto(out *PostgresConfigSpec) { *out = *in if in.Files != nil { in, out := &in.Files, &out.Files @@ -2209,12 +2209,12 @@ func (in *PostgresConfig) DeepCopyInto(out *PostgresConfig) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresConfig. -func (in *PostgresConfig) DeepCopy() *PostgresConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresConfigSpec. +func (in *PostgresConfigSpec) DeepCopy() *PostgresConfigSpec { if in == nil { return nil } - out := new(PostgresConfig) + out := new(PostgresConfigSpec) in.DeepCopyInto(out) return out } From 705ca140da712e617c2b4d008bdfa874898ed2d3 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Thu, 20 Mar 2025 11:25:08 -0500 Subject: [PATCH 11/79] Explain HBA rules in their field description Issue: PGO-2263 See: bc023792a491bdc3f062c77c5f1cfbb8a268576d --- ...perator.crunchydata.com_postgresclusters.yaml | 16 +++++++++++++++- .../v1beta1/postgres_types.go | 10 ++++++++++ .../v1beta1/postgrescluster_types.go | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 3834ebf654..bfa7d99c8b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -40,9 +40,19 @@ spec: description: PostgresClusterSpec defines the desired state of PostgresCluster properties: authentication: + description: Authentication settings for the PostgreSQL server properties: rules: - description: 'More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html' + description: |- + Postgres compares every new connection to these rules in the order they are + defined. The first rule that matches determines if and how the connection + must then authenticate. Connections that match no rules are disconnected. + + When this is omitted or empty, Postgres accepts encrypted connections to any + database from users that have a password. To refuse all network connections, + set this to one rule that matches "host" connections to the "reject" method. + + More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html items: properties: connection: @@ -79,6 +89,7 @@ spec: description: |- The authentication method to use when a connection matches this rule. The special value "reject" refuses connections that match this rule. + More info: https://www.postgresql.org/docs/current/auth-methods.html maxLength: 20 minLength: 1 @@ -93,6 +104,8 @@ spec: - type: integer - type: string x-kubernetes-int-or-string: true + description: Additional settings for this rule or its authentication + method. maxProperties: 20 type: object x-kubernetes-map-type: atomic @@ -4461,6 +4474,7 @@ spec: type: object type: object config: + description: General configuration of the PostgreSQL server properties: files: description: Files to mount under "/etc/postgres". diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index ccf3368a2a..47f7382671 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -10,6 +10,14 @@ import ( ) type PostgresAuthenticationSpec struct { + // Postgres compares every new connection to these rules in the order they are + // defined. The first rule that matches determines if and how the connection + // must then authenticate. Connections that match no rules are disconnected. + // + // When this is omitted or empty, Postgres accepts encrypted connections to any + // database from users that have a password. To refuse all network connections, + // set this to one rule that matches "host" connections to the "reject" method. + // // More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html // --- // +kubebuilder:validation:MaxItems=10 @@ -99,6 +107,7 @@ type PostgresHBARule struct { // The authentication method to use when a connection matches this rule. // The special value "reject" refuses connections that match this rule. + // // More info: https://www.postgresql.org/docs/current/auth-methods.html // --- // +kubebuilder:validation:MinLength=1 @@ -108,6 +117,7 @@ type PostgresHBARule struct { // +optional Method string `json:"method,omitempty"` + // Additional settings for this rule or its authentication method. // --- // +kubebuilder:validation:MaxProperties=20 // +mapType=atomic diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 6ca3c96814..59029958f4 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -21,6 +21,7 @@ type PostgresClusterSpec struct { // +optional DataSource *DataSource `json:"dataSource,omitempty"` + // Authentication settings for the PostgreSQL server // +optional Authentication *PostgresAuthenticationSpec `json:"authentication,omitempty"` @@ -28,6 +29,7 @@ type PostgresClusterSpec struct { // +optional Backups Backups `json:"backups,omitempty"` + // General configuration of the PostgreSQL server // +optional Config *PostgresConfigSpec `json:"config,omitempty"` From eac563f7d5a0c4a9f37b3f8babfbfd4bd6287584 Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Mon, 24 Mar 2025 11:30:23 -0500 Subject: [PATCH 12/79] Change logic to includeLogrotate (#697) Only logrotate if - boolean is true and - OTel log gate is on --- internal/collector/instance.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 54081b2684..f37eb7f4c3 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -54,6 +54,11 @@ func AddToPod( return } + // We only want to include log rotation if this type of pod requires it + // (indicate by the includeLogrotate boolean) AND if logging is enabled + // for this PostgresCluster/PGAdmin + includeLogrotate = includeLogrotate && OpenTelemetryLogsEnabled(ctx, spec) + // Create volume and volume mount for otel collector config configVolumeMount := corev1.VolumeMount{ Name: "collector-config", From 4a2eaaab1e6d86d2d9abb4101e25fb5c2fbefb03 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 24 Mar 2025 12:00:40 -0500 Subject: [PATCH 13/79] Bump github.com/golang-jwt/jwt/v5 to v5.2.2 Issue: CVE-2025-22870 Issue: CVE-2025-30204 Issue: GHSA-mh63-6h87-95cp Issue: GHSA-qxp5-gwg8-xv66 Issue: GO-2025-3503 See: 427faa0523a662f20b6d4198c29eddf638b96ce0 --- go.mod | 14 +++++++------- go.sum | 26 ++++++++++++++------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/go.mod b/go.mod index f2a0d0c756..8500880c23 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.23.0 require ( github.com/go-logr/logr v1.4.2 - github.com/golang-jwt/jwt/v5 v5.2.1 + github.com/golang-jwt/jwt/v5 v5.2.2 github.com/google/go-cmp v0.6.0 github.com/google/uuid v1.6.0 github.com/kubernetes-csi/external-snapshotter/client/v8 v8.0.0 @@ -21,7 +21,7 @@ require ( go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/sdk v1.32.0 go.opentelemetry.io/otel/trace v1.32.0 - golang.org/x/crypto v0.35.0 + golang.org/x/crypto v0.36.0 golang.org/x/tools v0.28.0 gotest.tools/v3 v3.5.1 k8s.io/api v0.31.0 @@ -104,12 +104,12 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.36.0 // indirect + golang.org/x/net v0.37.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sync v0.11.0 // indirect - golang.org/x/sys v0.30.0 // indirect - golang.org/x/term v0.29.0 // indirect - golang.org/x/text v0.22.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/sys v0.31.0 // indirect + golang.org/x/term v0.30.0 // indirect + golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect diff --git a/go.sum b/go.sum index 2822ed5e1e..83d6065d7f 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= -github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= +github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= @@ -210,8 +210,8 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= -golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= +golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= +golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 h1:LoYXNGAShUG3m/ehNk4iFctuhGX/+R1ZpfJ4/ia80JM= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -224,26 +224,28 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA= golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I= +golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= +golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= -golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= -golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= +golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= -golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From 960405c6da369bcd877169638d29cd2d4ac48619 Mon Sep 17 00:00:00 2001 From: tony-landreth Date: Mon, 24 Mar 2025 15:18:00 -0400 Subject: [PATCH 14/79] Runs tidy --- go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.sum b/go.sum index 83d6065d7f..03fbcbf0f1 100644 --- a/go.sum +++ b/go.sum @@ -222,8 +222,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA= -golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I= golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= From 6f90f8a959bca5c3f4ac1d8d927756476e7fa0ef Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Wed, 26 Mar 2025 10:13:06 -0500 Subject: [PATCH 15/79] Use collector.OpenTelemetryMetricsEnabled when config'ing exporter setup (#701) --- internal/controller/postgrescluster/pgmonitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index 48d15d1e6d..a08e182158 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -69,7 +69,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return err } - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { setup = metricsSetupForOTelCollector } else { // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. From 35d0fa6b2260c1e0d5deb084d0c593ef18813b2f Mon Sep 17 00:00:00 2001 From: Tony Landreth <56887169+tony-landreth@users.noreply.github.com> Date: Fri, 4 Apr 2025 10:42:16 -0400 Subject: [PATCH 16/79] Updates manager.yaml, removes admin 4.30 tests (#702) The latest version discontinues pgadmin4 v4.30. This commit removes it from related images, updates to the latest developer images, and removes tests that rely on the old pgadmin image. --- config/manager/manager.yaml | 24 ++++---- testing/kuttl/e2e/pgadmin/01--cluster.yaml | 40 ------------- testing/kuttl/e2e/pgadmin/01-assert.yaml | 32 ----------- .../kuttl/e2e/pgadmin/02--check-settings.yaml | 56 ------------------- 4 files changed, 11 insertions(+), 141 deletions(-) delete mode 100644 testing/kuttl/e2e/pgadmin/01--cluster.yaml delete mode 100644 testing/kuttl/e2e/pgadmin/01-assert.yaml delete mode 100644 testing/kuttl/e2e/pgadmin/02--check-settings.yaml diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 7e5c21a7b4..4e00818d89 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,29 +23,27 @@ spec: - name: CRUNCHY_DEBUG value: "true" - name: RELATED_IMAGE_POSTGRES_16 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.3 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513" - name: RELATED_IMAGE_POSTGRES_17 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513" - name: RELATED_IMAGE_POSTGRES_17_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0" - - name: RELATED_IMAGE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513" - name: RELATED_IMAGE_PGBACKREST - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513" - name: RELATED_IMAGE_PGBOUNCER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513" - name: RELATED_IMAGE_PGUPGRADE - value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-7.1-2513" - name: RELATED_IMAGE_COLLECTOR - value: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0" + value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } diff --git a/testing/kuttl/e2e/pgadmin/01--cluster.yaml b/testing/kuttl/e2e/pgadmin/01--cluster.yaml deleted file mode 100644 index d1afb7be04..0000000000 --- a/testing/kuttl/e2e/pgadmin/01--cluster.yaml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: test-cm -data: - configMap: config ---- -apiVersion: v1 -kind: Secret -metadata: - name: test-secret -type: Opaque -stringData: - password: myPassword ---- -# Create a cluster with a configured pgAdmin UI. -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PostgresCluster -metadata: - name: interfaced - labels: { postgres-operator-test: kuttl } -spec: - postgresVersion: ${KUTTL_PG_VERSION} - instances: - - name: instance1 - replicas: 1 - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } - userInterface: - pgAdmin: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } - config: - files: - - secret: - name: test-secret - - configMap: - name: test-cm - settings: - SHOW_GRAVATAR_IMAGE: False - LOGIN_BANNER: | - Custom KUTTL Login Banner diff --git a/testing/kuttl/e2e/pgadmin/01-assert.yaml b/testing/kuttl/e2e/pgadmin/01-assert.yaml deleted file mode 100644 index e4192a1217..0000000000 --- a/testing/kuttl/e2e/pgadmin/01-assert.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PostgresCluster -metadata: - name: interfaced -status: - instances: - - name: instance1 - replicas: 1 - readyReplicas: 1 - updatedReplicas: 1 - ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: interfaced-pgadmin -status: - replicas: 1 - readyReplicas: 1 - updatedReplicas: 1 - ---- -apiVersion: v1 -kind: Secret -metadata: - name: test-secret ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: test-cm diff --git a/testing/kuttl/e2e/pgadmin/02--check-settings.yaml b/testing/kuttl/e2e/pgadmin/02--check-settings.yaml deleted file mode 100644 index c68d032d1e..0000000000 --- a/testing/kuttl/e2e/pgadmin/02--check-settings.yaml +++ /dev/null @@ -1,56 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Log the amount of space on the startup volume. Assert that 4KiB are used. - - script: | - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- df --block-size=1K /etc/pgadmin | - awk '{ print } END { exit ($3 != "4") }' - - # Assert that current settings contain values from the spec. - - script: | - SETTINGS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/~postgres-operator/pgadmin.json - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${SETTINGS}" '"LOGIN_BANNER": "Custom KUTTL Login Banner\n"' && - contains "${SETTINGS}" '"SHOW_GRAVATAR_IMAGE": false' - } || { - echo >&2 'Wrong settings!' - echo "${SETTINGS}" - exit 1 - } - - - script: | - CONTENTS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/configMap - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${CONTENTS}" 'config' - } || { - echo >&2 'Wrong settings!' - echo "${CONTENTS}" - exit 1 - } - - - script: | - CONTENTS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/password - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${CONTENTS}" 'myPassword' - } || { - echo >&2 'Wrong settings!' - echo "${CONTENTS}" - exit 1 - } From fcf1baedfe5068ad54cadc4609b193b29a4b3b8f Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 4 Apr 2025 11:28:32 -0700 Subject: [PATCH 17/79] Add kuttl tests for OTel features. --- .github/workflows/test.yaml | 9 +- .../otel-logging-and-metrics/00--cluster.yaml | 6 + .../01--add-instrumentation.yaml | 6 + .../02-assert-instance.yaml | 63 ++++++ .../03-assert-pgbouncer.yaml | 34 +++ .../04-assert-pgadmin.yaml | 30 +++ .../05-assert-repo-host-does-not-logs.yaml | 28 +++ .../otel-logging-and-metrics/06--backup.yaml | 6 + .../07-assert-repo-host-contains-logs.yaml | 26 +++ .../08--add-custom-queries.yaml | 6 + .../09-assert-custom-queries.yaml | 41 ++++ .../10--add-logs-exporter.yaml | 6 + .../11-assert-logs-exported.yaml | 46 ++++ .../e2e/otel-logging-and-metrics/README.md | 29 +++ .../files/00--create-cluster.yaml | 60 +++++ .../files/00-cluster-created.yaml | 112 ++++++++++ .../files/01--add-instrumentation.yaml | 62 ++++++ .../files/01-instrumentation-added.yaml | 119 ++++++++++ .../files/06--annotate-cluster.yaml | 8 + .../files/06-backup-completed.yaml | 8 + .../files/08--add-custom-queries.yaml | 75 +++++++ .../files/08-custom-queries-added.yaml | 123 +++++++++++ .../files/10--add-logs-exporter.yaml | 205 ++++++++++++++++++ .../files/10-logs-exporter-added.yaml | 154 +++++++++++++ 24 files changed, 1259 insertions(+), 3 deletions(-) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/README.md create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f4a8ba0e39..b8d3d68299 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -2,9 +2,11 @@ name: Tests on: pull_request: + branches: + - REL_5_8 push: branches: - - main + - REL_5_8 env: # Use the Go toolchain installed by setup-go @@ -111,6 +113,7 @@ jobs: registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0 registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0 registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0 + registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0 - run: go mod download - name: Build executable run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator @@ -143,8 +146,8 @@ jobs: --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0' \ --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0' \ --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2' \ - --env 'RELATED_IMAGE_COLLECTOR=ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0' \ - --env 'PGO_FEATURE_GATES=TablespaceVolumes=true' \ + --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0' \ + --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ --name 'postgres-operator' ubuntu \ postgres-operator - name: Install kuttl diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml new file mode 100644 index 0000000000..5957e0fed6 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/00--create-cluster.yaml +assert: +- files/00-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml new file mode 100644 index 0000000000..ddf7a754b4 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/01--add-instrumentation.yaml +assert: +- files/01-instrumentation-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml new file mode 100644 index 0000000000..235d07e47e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml @@ -0,0 +1,63 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that a metric from both 5m +# and 5s queries are present, as well as patroni metrics. +# Then, check the collector logs for patroni, pgbackrest, and postgres logs. +# Finally, ensure the monitoring user exists and is configured. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { + retry "5 second metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_database_size_bytes'; } || { + retry "5 minute metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { + retry "patroni metric not found" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + + kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \ + -- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL' + DO $$ + DECLARE + result record; + BEGIN + SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring'; + ASSERT FOUND, 'user not found'; + END $$ + SQL diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml new file mode 100644 index 0000000000..87188b6f62 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml @@ -0,0 +1,34 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the pgbouncer pod are ready. +# Then, scrape the collector metrics and check that pgbouncer metrics are present. +# Then, check the collector logs for pgbouncer logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/role=pgbouncer) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_pgbouncer_clients_wait_seconds'; } || { + retry "pgbouncer metric not found" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgbouncer'; } || { + retry "pgbouncer logs not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml new file mode 100644 index 0000000000..71434397e1 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml @@ -0,0 +1,30 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the pgadmin pod are ready. +# Then, check the collector logs for pgadmin and gunicorn logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/pgadmin=otel-pgadmin) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgadmin'; } || { + retry "pgadmin logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope gunicorn.access'; } || { + retry "gunicorn logs not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml new file mode 100644 index 0000000000..31c077d540 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml @@ -0,0 +1,28 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the repo host pod are ready. +# Then, ensure that the collector logs for the repo-host do not contain any +# pgbackrest logs as the backup completed before the collector started up and we +# have the collector configured to only ingest new log records on start up. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { !(contains "${logs}" 'InstrumentationScope pgbackrest') } || { + retry "pgbackrest logs were found when we did not expect any" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml new file mode 100644 index 0000000000..cd4e92f32c --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/06--annotate-cluster.yaml +assert: +- files/06-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml new file mode 100644 index 0000000000..a6cb86fb22 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the repo host pod are ready. +# Then, ensure that the repo-host collector logs have pgbackrest logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs were not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml new file mode 100644 index 0000000000..290090e129 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/08--add-custom-queries.yaml +assert: +- files/08-custom-queries-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml new file mode 100644 index 0000000000..9476bb564e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml @@ -0,0 +1,41 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the two metrics that we +# checked for earlier are no longer there. +# Then, check that the two custom metrics that we added are present. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { !(contains "${scrape_metrics}" 'ccp_connection_stats_active') } || { + retry "5 second metric still present" + exit 1 + } + { !(contains "${scrape_metrics}" 'ccp_database_size_bytes') } || { + retry "5 minute metric still present" + exit 1 + } + { contains "${scrape_metrics}" 'custom_table_count'; } || { + retry "fast custom metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'custom_pg_stat_statements_row_count'; } || { + retry "slow custom metric not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml new file mode 100644 index 0000000000..55f43815dd --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/10--add-logs-exporter.yaml +assert: +- files/10-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml new file mode 100644 index 0000000000..8b86743cc0 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml @@ -0,0 +1,46 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that the standalone otel-collector container is ready. +# Then, check the standalone collector logs for logs from all six potential +# sources: patroni, pgbackrest, postgres, pgbouncer, pgadmin, and gunicorn. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" -l app=opentelemetry) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbouncer'; } || { + retry "pgbouncer logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgadmin'; } || { + retry "pgadmin logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope gunicorn.access'; } || { + retry "gunicorn logs not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/README.md b/testing/kuttl/e2e/otel-logging-and-metrics/README.md new file mode 100644 index 0000000000..069a17f089 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/README.md @@ -0,0 +1,29 @@ +# Test OTel Logging and Metrics + +## Assumptions + +This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetryMetrics feature gates turned on and that you are using an operator versioned 5.8 or greater. + +## Process + +1. Create a basic cluster with pgbouncer and pgadmin in place. + 1. Ensure cluster comes up, that all containers are running and ready, and that the initial backup is complete. +2. Add the `instrumentation` spec to both PostgresCluster and PGAdmin manifests. + 1. Ensure that OTel collector containers and `crunchy-otel-collector` labels are added to the four pods (postgres instance, repo-host, pgbouncer, & pgadmin) and that the collector containers are running and ready. + 2. Assert that the instance pod collector is getting postgres and patroni metrics and postgres, patroni, and pgbackrest logs. + 3. Assert that the pgbouncer pod collector is getting pgbouncer metrics and logs. + 4. Assert that the pgAdmin pod collector is getting pgAdmin and gunicorn logs. + 5. Assert that the repo-host pod collector is NOT getting pgbackrest logs. We do not expect logs yet as the initial backup completed and created a log file; however, we configure the collector to only ingest new logs after it has started up. + 6. Create a manual backup and ensure that it completes successfully. + 7. Ensure that the repo-host pod collector is now getting pgbackrest logs. +3. Add both "add" and "remove" custom queries to the PostgresCluster `instrumentation` spec and create a ConfigMap that holds the custom queries to add. + 1. Ensure that the ConfigMap is created. + 2. Assert that the metrics that were removed (which we checked for earlier) are in fact no longer present in the collector metrics. + 3. Assert that the custom metrics that were added are present in the collector metrics. +4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors. + 1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready. + 2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn). + +### NOTES + +It is possible this test could flake if for some reason a component is not producing any logs. If we start to see this happen, we could either create some test steps that execute some actions that should trigger logs or turn up the log levels (although the latter option could create more problems as we have seen issues with the collector when the stream of logs is too voluminous). diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml new file mode 100644 index 0000000000..3345bef5f9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml new file mode 100644 index 0000000000..97bd3e2b97 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml @@ -0,0 +1,112 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create +status: + succeeded: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +status: + containerStatuses: + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml new file mode 100644 index 0000000000..f02c09d380 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: {} +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml new file mode 100644 index 0000000000..b9bbe130bd --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml @@ -0,0 +1,119 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml new file mode 100644 index 0000000000..1133b7fe15 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml @@ -0,0 +1,8 @@ +--- +# Annotate the cluster to trigger a backup. +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster + annotations: + postgres-operator.crunchydata.com/pgbackrest-backup: do-it diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml new file mode 100644 index 0000000000..fed1f745b9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml @@ -0,0 +1,8 @@ +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/pgbackrest-backup: manual +status: + succeeded: 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml new file mode 100644 index 0000000000..ed133fc26a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml @@ -0,0 +1,75 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: + metrics: + customQueries: + add: + - name: slow-custom-queries + queries: + name: my-custom-queries + key: my-slow-custom-queries.yaml + collectionInterval: 300s + - name: 2fast2furious + queries: + name: my-custom-queries + key: my-fast-custom-queries.yaml + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries +data: + my-fast-custom-queries.yaml: | + - sql: > + SELECT count(*) FROM information_schema.tables; + metrics: + - metric_name: custom_table_count + value_column: count + description: Number of tables in the database + static_attributes: + server: "localhost:5432" + my-slow-custom-queries.yaml: | + - sql: > + SELECT count(*) FROM pg_stat_statements; + metrics: + - metric_name: custom_pg_stat_statements_row_count + value_column: count + description: Number of rows in pg_stat_statements + static_attributes: + server: "localhost:5432" diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml new file mode 100644 index 0000000000..344d52158e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml @@ -0,0 +1,123 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml new file mode 100644 index 0000000000..9943f61341 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml @@ -0,0 +1,205 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: + metrics: + customQueries: + add: + - name: slow-custom-queries + queries: + name: my-custom-queries + key: my-slow-custom-queries.yaml + collectionInterval: 300s + - name: 2fast2furious + queries: + name: my-custom-queries + key: my-fast-custom-queries.yaml + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes + config: + exporters: + otlp: + endpoint: otel-collector:4317 + tls: + insecure: true + logs: + exporters: ['otlp'] + retentionPeriod: 1h +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] + instrumentation: + config: + exporters: + otlp: + endpoint: otel-collector:4317 + tls: + insecure: true + logs: + exporters: ['otlp'] + retentionPeriod: 1h +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + labels: + app: opentelemetry + component: otel-collector-conf +data: + otel-collector-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: ${env:MY_POD_IP}:4317 + http: + endpoint: ${env:MY_POD_IP}:4318 + extensions: + zpages: {} + exporters: + debug: + verbosity: detailed + service: + extensions: [zpages] + pipelines: + logs/1: + receivers: [otlp] + exporters: [debug] +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector +spec: + ports: + - name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver. + port: 4317 + protocol: TCP + targetPort: 4317 + - name: otlp-http # Default endpoint for OpenTelemetry HTTP receiver. + port: 4318 + protocol: TCP + targetPort: 4318 + - name: metrics # Default endpoint for querying metrics. + port: 8888 + selector: + component: otel-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector +spec: + selector: + matchLabels: + app: opentelemetry + component: otel-collector + minReadySeconds: 5 + progressDeadlineSeconds: 120 + replicas: 1 #TODO - adjust this to your own requirements + template: + metadata: + labels: + app: opentelemetry + component: otel-collector + spec: + containers: + - command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + image: otel/opentelemetry-collector:latest + name: otel-collector + resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 200m + memory: 400Mi + ports: + - containerPort: 55679 # Default endpoint for ZPages. + - containerPort: 4317 # Default endpoint for OpenTelemetry receiver. + - containerPort: 14250 # Default endpoint for Jaeger gRPC receiver. + - containerPort: 14268 # Default endpoint for Jaeger HTTP receiver. + - containerPort: 9411 # Default endpoint for Zipkin receiver. + - containerPort: 8888 # Default endpoint for querying metrics. + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: GOMEMLIMIT + value: 1600MiB + volumeMounts: + - name: otel-collector-config-vol + mountPath: /conf + volumes: + - configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml + name: otel-collector-config-vol diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml new file mode 100644 index 0000000000..47a28ee418 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml @@ -0,0 +1,154 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector +status: + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: opentelemetry +status: + containerStatuses: + - name: otel-collector + ready: true + started: true + phase: Running From 96d3f58987152ad85194d6d5a0d258e6dccde0a6 Mon Sep 17 00:00:00 2001 From: Tony Landreth <56887169+tony-landreth@users.noreply.github.com> Date: Tue, 8 Apr 2025 13:58:41 -0400 Subject: [PATCH 18/79] Updates GH workflows test (#703) The latest version discontinues pgadmin4 v4.30. This commit removes it from related images, updates to the latest developer images, and removes tests that rely on the old pgadmin image. --- .github/workflows/test.yaml | 48 +++++++++---------- Makefile | 6 +-- .../e2e/security-context/00--cluster.yaml | 3 -- .../kuttl/e2e/security-context/00-assert.yaml | 32 ------------- 4 files changed, 26 insertions(+), 63 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b8d3d68299..8d0657eb2c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -69,9 +69,9 @@ jobs: with: k3s-channel: "${{ matrix.kubernetes }}" prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513 - run: make createnamespaces check-envtest-existing env: @@ -103,16 +103,15 @@ jobs: with: k3s-channel: "${{ matrix.kubernetes }}" prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest - registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513 registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0 - run: go mod download - name: Build executable @@ -135,17 +134,16 @@ jobs: --env 'CHECK_FOR_UPGRADES=false' \ --env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \ --env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \ - --env 'RELATED_IMAGE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35' \ - --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1' \ - --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4' \ - --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest' \ - --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest' \ - --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0' \ - --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0' \ - --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0' \ - --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2' \ + --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513' \ + --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513' \ + --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513' \ + --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513' \ + --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513' \ + --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513' \ + --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513' \ + --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513' \ + --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513' \ + --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.1-2513' \ --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0' \ --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ --name 'postgres-operator' ubuntu \ @@ -161,7 +159,7 @@ jobs: KUTTL_PG_UPGRADE_TO_VERSION: '17' KUTTL_PG_VERSION: '16' KUTTL_POSTGIS_VERSION: '3.4' - KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.6-2' + KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513' - run: | make check-kuttl && exit failed=$? diff --git a/Makefile b/Makefile index 5b291d7f66..744c747f3a 100644 --- a/Makefile +++ b/Makefile @@ -225,11 +225,11 @@ check-kuttl: ## example command: make check-kuttl KUTTL_TEST=' --config testing/kuttl/kuttl-test.yaml .PHONY: generate-kuttl -generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 15 -generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 16 +generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 16 +generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 17 generate-kuttl: export KUTTL_PG_VERSION ?= 16 generate-kuttl: export KUTTL_POSTGIS_VERSION ?= 3.4 -generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 +generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513 generate-kuttl: export KUTTL_TEST_DELETE_NAMESPACE ?= kuttl-test-delete-namespace generate-kuttl: ## Generate kuttl tests [ ! -d testing/kuttl/e2e-generated ] || rm -r testing/kuttl/e2e-generated diff --git a/testing/kuttl/e2e/security-context/00--cluster.yaml b/testing/kuttl/e2e/security-context/00--cluster.yaml index 5155eb4fc6..d754eedec6 100644 --- a/testing/kuttl/e2e/security-context/00--cluster.yaml +++ b/testing/kuttl/e2e/security-context/00--cluster.yaml @@ -18,9 +18,6 @@ spec: proxy: pgBouncer: replicas: 1 - userInterface: - pgAdmin: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } monitoring: pgmonitor: exporter: {} diff --git a/testing/kuttl/e2e/security-context/00-assert.yaml b/testing/kuttl/e2e/security-context/00-assert.yaml index a6a5f48b6a..6df19c6608 100644 --- a/testing/kuttl/e2e/security-context/00-assert.yaml +++ b/testing/kuttl/e2e/security-context/00-assert.yaml @@ -92,38 +92,6 @@ spec: readOnlyRootFilesystem: true runAsNonRoot: true --- -# pgAdmin -apiVersion: v1 -kind: Pod -metadata: - labels: - postgres-operator.crunchydata.com/cluster: security-context - postgres-operator.crunchydata.com/data: pgadmin - postgres-operator.crunchydata.com/role: pgadmin - statefulset.kubernetes.io/pod-name: security-context-pgadmin-0 - name: security-context-pgadmin-0 -spec: - containers: - - name: pgadmin - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - initContainers: - - name: pgadmin-startup - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - - name: nss-wrapper-init - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true ---- # pgBouncer apiVersion: v1 kind: Pod From e3b88aff5c91cea5c09630cfef8cad8b0762660f Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 9 Apr 2025 13:26:04 -0700 Subject: [PATCH 19/79] Fix issue where the presence of --target-timeline was adding --target-action. Adjust tests and add more test cases. --- .../controller/postgrescluster/pgbackrest.go | 8 +- .../postgrescluster/pgbackrest_test.go | 88 ++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index b7de247a5d..454c308bc9 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -1166,10 +1166,16 @@ func (r *Reconciler) reconcileRestoreJob(ctx context.Context, "--pg1-path=" + pgdata, "--repo=" + regexRepoIndex.FindString(repoName)}...) + // Look specifically for the "--target" flag, NOT flags that contain + // "--target" (e.g. "--target-timeline") + targetRegex, err := regexp.Compile("--target[ =]") + if err != nil { + return err + } var deltaOptFound, foundTarget bool for _, opt := range opts { switch { - case strings.Contains(opt, "--target"): + case targetRegex.Match([]byte(opt)): foundTarget = true case strings.Contains(opt, "--delta"): deltaOptFound = true diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index b63120b719..582fda0773 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -1777,6 +1777,9 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount, jobCount, pvcCount int invalidSourceRepo, invalidSourceCluster, invalidOptions bool expectedClusterCondition *metav1.Condition + expectedEventMessage string + expectedCommandPieces []string + missingCommandPieces []string } for _, dedicated := range []bool{true, false} { @@ -1799,6 +1802,8 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 1, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta"}, + missingCommandPieces: []string{"--target-action"}, }, }, { desc: "invalid source cluster", @@ -1812,6 +1817,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 0, jobCount: 0, pvcCount: 0, invalidSourceRepo: false, invalidSourceCluster: true, invalidOptions: false, expectedClusterCondition: nil, + expectedEventMessage: "does not exist", }, }, { desc: "invalid source repo", @@ -1825,6 +1831,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 0, invalidSourceRepo: true, invalidSourceCluster: false, invalidOptions: false, expectedClusterCondition: nil, + expectedEventMessage: "does not have a repo named", }, }, { desc: "invalid option: --repo=", @@ -1839,6 +1846,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--repo' is not allowed: please use the 'repoName' field instead.", }, }, { desc: "invalid option: --repo ", @@ -1853,6 +1861,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--repo' is not allowed: please use the 'repoName' field instead.", }, }, { desc: "invalid option: stanza", @@ -1867,6 +1876,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--stanza' is not allowed: the operator will automatically set this option", }, }, { desc: "invalid option: pg1-path", @@ -1881,6 +1891,68 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--pg1-path' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "invalid option: target-action", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "invalid-target-action-option", RepoName: "repo1", + Options: []string{"--target-action"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "invalid-target-action-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 0, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, + expectedClusterCondition: nil, + expectedEventMessage: "Option '--target-action' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "invalid option: link-map", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "invalid-link-map-option", RepoName: "repo1", + Options: []string{"--link-map"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "invalid-link-map-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 0, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, + expectedClusterCondition: nil, + expectedEventMessage: "Option '--link-map' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "valid option: target-timeline", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "valid-target-timeline-option", RepoName: "repo1", + Options: []string{"--target-timeline=1"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "valid-target-timeline-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 1, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, + expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta", "--target-timeline=1"}, + missingCommandPieces: []string{"--target=", "--target-action=promote"}, + }, + }, { + desc: "valid option: target", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "valid-target-option", RepoName: "repo1", + Options: []string{"--target=some-date"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "valid-target-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 1, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, + expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta", "--target=some-date", "--target-action=promote"}, }, }, { desc: "cluster bootstrapped init condition missing", @@ -2003,6 +2075,16 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { if len(restoreJobs.Items) == 1 { assert.Assert(t, restoreJobs.Items[0].Labels[naming.LabelStartupInstance] != "") assert.Assert(t, restoreJobs.Items[0].Annotations[naming.PGBackRestConfigHash] != "") + for _, cmd := range tc.result.expectedCommandPieces { + assert.Assert(t, cmp.Contains( + strings.Join(restoreJobs.Items[0].Spec.Template.Spec.Containers[0].Command, " "), + cmd)) + } + for _, cmd := range tc.result.missingCommandPieces { + assert.Assert(t, !strings.Contains( + strings.Join(restoreJobs.Items[0].Spec.Template.Spec.Containers[0].Command, " "), + cmd)) + } } dataPVCs := &corev1.PersistentVolumeClaimList{} @@ -2040,7 +2122,11 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { "involvedObject.namespace": namespace, "reason": "InvalidDataSource", }) - return len(events.Items) == 1, err + eventExists := len(events.Items) > 0 + if eventExists { + assert.Assert(t, cmp.Contains(events.Items[0].Message, tc.result.expectedEventMessage)) + } + return eventExists, err })) } }) From 576c0693a081311d522fa239471235ae68b7aa8f Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Tue, 22 Apr 2025 14:12:43 -0500 Subject: [PATCH 20/79] Update images (#713) * Update images * Update x/net --- .github/workflows/test.yaml | 50 ++++++++++++++++++------------------- Makefile | 2 +- config/manager/manager.yaml | 22 ++++++++-------- go.mod | 2 +- go.sum | 4 +-- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8d0657eb2c..2ef4f1c6e6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -69,9 +69,9 @@ jobs: with: k3s-channel: "${{ matrix.kubernetes }}" prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516 - run: make createnamespaces check-envtest-existing env: @@ -103,16 +103,16 @@ jobs: with: k3s-channel: "${{ matrix.kubernetes }}" prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513 - registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516 + registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0 - run: go mod download - name: Build executable run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator @@ -134,17 +134,17 @@ jobs: --env 'CHECK_FOR_UPGRADES=false' \ --env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \ --env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \ - --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513' \ - --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513' \ - --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513' \ - --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513' \ - --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513' \ - --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513' \ - --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513' \ - --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.1-2513' \ - --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0' \ + --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516' \ + --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516' \ + --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516' \ + --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516' \ + --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516' \ + --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516' \ + --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516' \ + --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516' \ + --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516' \ + --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2516' \ + --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0' \ --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ --name 'postgres-operator' ubuntu \ postgres-operator @@ -159,7 +159,7 @@ jobs: KUTTL_PG_UPGRADE_TO_VERSION: '17' KUTTL_PG_VERSION: '16' KUTTL_POSTGIS_VERSION: '3.4' - KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513' + KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516' - run: | make check-kuttl && exit failed=$? diff --git a/Makefile b/Makefile index 744c747f3a..d50834deb8 100644 --- a/Makefile +++ b/Makefile @@ -229,7 +229,7 @@ generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 16 generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 17 generate-kuttl: export KUTTL_PG_VERSION ?= 16 generate-kuttl: export KUTTL_POSTGIS_VERSION ?= 3.4 -generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513 +generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516 generate-kuttl: export KUTTL_TEST_DELETE_NAMESPACE ?= kuttl-test-delete-namespace generate-kuttl: ## Generate kuttl tests [ ! -d testing/kuttl/e2e-generated ] || rm -r testing/kuttl/e2e-generated diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 4e00818d89..ad21ff6ce9 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,27 +23,27 @@ spec: - name: CRUNCHY_DEBUG value: "true" - name: RELATED_IMAGE_POSTGRES_16 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.3 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516" - name: RELATED_IMAGE_POSTGRES_17 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516" - name: RELATED_IMAGE_POSTGRES_17_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516" - name: RELATED_IMAGE_PGBACKREST - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516" - name: RELATED_IMAGE_PGBOUNCER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.16.0-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516" - name: RELATED_IMAGE_PGUPGRADE - value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-7.1-2513" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2516" - name: RELATED_IMAGE_COLLECTOR - value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.0-0" + value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } diff --git a/go.mod b/go.mod index 8500880c23..b28ed642c1 100644 --- a/go.mod +++ b/go.mod @@ -104,7 +104,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.37.0 // indirect + golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect golang.org/x/sync v0.12.0 // indirect golang.org/x/sys v0.31.0 // indirect diff --git a/go.sum b/go.sum index 03fbcbf0f1..8aa5d6edac 100644 --- a/go.sum +++ b/go.sum @@ -222,8 +222,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= -golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From f2f00c19d85caf70f9a9ad95f5b4286ee7226fdb Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 11 Apr 2025 12:28:51 -0500 Subject: [PATCH 21/79] Add pipeline for GitLab merge requests These are the tests we want to run when submitting changes to this PGO release branch. --- .gitlab-ci.yml | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000..532edaca1e --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,147 @@ +# Copyright Crunchy Data Solutions, Inc. All rights reserved. +# +# schema-documentation: https://docs.gitlab.com/ci/yaml +# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json + +spec: + inputs: + + # https://go.dev/doc/install/source#environment + architectures: + type: array + default: ['amd64','arm64'] + description: > + The CPU architectures on which to run tests + + # TODO(retention): We can increase the retention on scheduled pipelines after + # https://gitlab.com/groups/gitlab-org/-/epics/16321 + retention: + type: string + default: 2d # Enough time to find and address MR failures the following day + description: > + How long to keep reports; see https://docs.gitlab.com/ci/yaml#artifactsexpire_in +--- + +# https://docs.gitlab.com/ci/yaml/workflow +workflow: + rules: + - if: >- + ($CI_PIPELINE_SOURCE == "merge_request_event") || + ($CI_PIPELINE_SOURCE == "schedule") || + ($CI_PIPELINE_SOURCE == "web") + +variables: + # https://docs.gitlab.com/runner/configuration/feature-flags + # Show the duration of individual script items in the job log. + FF_SCRIPT_SECTIONS: 'true' + +# See: [.github/workflows/lint.yaml] +# This uses a specific minor version of golangci-lint to ensure new code conforms +# to the rules we set when this release branch was cut. We do not want new rules +# suggesting sweeping changes to our release branches. +# +# NOTE(2025-04): Some versions of golangci-lint eat memory until they are killed by Linux. +# > Ops Team: +# > this container was hanging around even after the ci job died +# > `golangci-lint run` was using ~240GB of RAM and caused the system to swap +# +# | | go1.21.13 | go1.22.12 | go1.23.8 | go1.24.2 | +# | golangci-lint@v1.54.2 | typecheck | typecheck | panic | typecheck | +# | golangci-lint@v1.55.2 | typecheck | typecheck | panic | typecheck | +# | golangci-lint@v1.56.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.57.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.58.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.59.1 | killed | killed | panic | typecheck | +# | golangci-lint@v1.60.3 | go1.22.1 | go1.23.0 | pass | typecheck | +# | golangci-lint@v1.61.0 | go1.22.1 | go1.23.0 | pass | typecheck | +# | golangci-lint@v1.62.2 | go1.22.1 | go1.23.0 | pass | recvcheck | +# | golangci-lint@v1.63.4 | go1.22.1 | go1.23.0 | pass | pass | +# | golangci-lint@v1.64.8 | go1.23.0 | go1.23.0 | pass | pass | +golang-lint: + stage: build + needs: [] + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Download golangci-lint and log its version. + - |- + TOOL='github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64' + go run "${TOOL}" version + + # Produce a report for the GitLab UI. This only fails when the tool crashes. + - >- + go run "${TOOL}" run + --concurrency 2 + --timeout 5m + --issues-exit-code 0 + --max-issues-per-linter 0 + --max-same-issues 0 + --out-format junit-xml-extended > golangci-lint.junit.xml + + # Fail the job if there are any issues found and print a handful to the log. + - >- + go run "${TOOL}" run + --concurrency 2 + --timeout 5m + --verbose + + # Send the report to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: golangci-lint.junit.xml + +# See: [.github/workflows/test.yaml] +must-commit-generated: + stage: build + needs: [] + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + - make check-generate + +# See: [.github/workflows/test.yaml] +# This uses the latest version of Go we have internally. +go-test: + stage: test + needs: + - job: must-commit-generated + tags: ['image=container','cpu=${TARGET_ARCHITECTURE}'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + parallel: + matrix: + - TARGET_ARCHITECTURE: $[[ inputs.architectures ]] + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Tidy the file and fail if it changed. + - go mod tidy && git diff --exit-code -- go.mod + - go mod download + + # Run the fast/unit tests first. Failure here fails the job. + - >- + make check + GO_TEST='go run gotest.tools/gotestsum@latest --' + GOTESTSUM_JUNITFILE="make-check-${TARGET_ARCHITECTURE}.junit.xml" + + # Run the entire test suite using a local Kubernetes API. + - >- + make check-envtest + ENVTEST_K8S_VERSION='1.32' + GO_TEST='go run gotest.tools/gotestsum@latest --' + GOTESTSUM_JUNITFILE="make-check-envtest-${TARGET_ARCHITECTURE}.junit.xml" + + # Send the reports to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: '*.junit.xml' From 36966d30316357a26acaf203e7fddfcff0d48025 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Thu, 17 Apr 2025 18:02:32 -0500 Subject: [PATCH 22/79] Scan dependency licenses and vulnerabilities Issue: PGO-1802 --- .gitlab-ci.yml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 532edaca1e..161ccfc70c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -145,3 +145,45 @@ go-test: expire_in: '$[[ inputs.retention ]]' reports: junit: '*.junit.xml' + +# See: [.github/workflows/trivy.yaml] +trivy: + stage: test + needs: [] + rules: + # Run this job during scheduled pipelines and merge requests that change dependencies. + - changes: ['go.mod'] + + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Download Trivy and log its version. + - |- + VERSION=$(go list -m -f '{{.Version}}' github.com/aquasecurity/trivy@latest) + TOOL="github.com/aquasecurity/trivy/cmd/trivy@${VERSION}" + go run -exec true "${TOOL}" + + # Download the JUnit template for this version. + - curl -sSL -o /tmp/trivy-junit.tpl "https://raw.githubusercontent.com/aquasecurity/trivy/refs/tags/${VERSION}/contrib/junit.tpl" + + # Generate a report and fail when there are issues that can be fixed. + # Trivy needs a populated Go module cache to detect Go module licenses. + - go mod download + - >- + go run "${TOOL}" filesystem . --exit-code 1 + --scanners license,secret,vuln + --ignore-unfixed + --no-progress + --format template + --template '@/tmp/trivy-junit.tpl' + --output 'trivy.junit.xml' + + # Send the report to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: 'trivy.junit.xml' From 8fa266cbcf5bcfbff1928fa7cd929c2fda79332b Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Wed, 23 Apr 2025 04:42:00 -0500 Subject: [PATCH 23/79] Run govulncheck during scheduled pipelines Issue: PGO-2441 --- .gitlab-ci.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 161ccfc70c..fb5abdea61 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -146,6 +146,29 @@ go-test: reports: junit: '*.junit.xml' +# See: [.github/workflows/govulncheck.yaml] +govulncheck: + stage: test + needs: [] + rules: + # Run this job during scheduled pipelines and merge requests that change dependencies. + - changes: ['go.mod'] + + tags: ['image=container','cpu=${TARGET_ARCHITECTURE}'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + parallel: + matrix: + - TARGET_ARCHITECTURE: $[[ inputs.architectures ]] + script: + # Download govulncheck and log its version. + - |- + TOOL='golang.org/x/vuln/cmd/govulncheck@latest' + go run "${TOOL}" --version + + # Print any detected vulnerabilities to the log. + # This fails the job when it detects a vulnerability in called code. + - go run "${TOOL}" --format text --show verbose ./... + # See: [.github/workflows/trivy.yaml] trivy: stage: test From 8fada51a83f5fcd1af34ba00daf709411bdbf6ac Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 23 Apr 2025 17:00:11 -0700 Subject: [PATCH 24/79] Fix for PGO-2380: Only add logrotate volume mounts to instance pod when backups are enabled. Add kuttl tests to ensure that collector will run on postgres instance when backups are disabled. --- .../controller/postgrescluster/instance.go | 7 +- .../12--cluster-no-backups.yaml | 6 ++ .../13-assert-instance.yaml | 55 ++++++++++++++ .../14--cluster-add-backups.yaml | 6 ++ .../15--remove-backups.yaml | 6 ++ .../16--annotate-cluster.yaml | 7 ++ .../e2e/otel-logging-and-metrics/README.md | 6 ++ .../files/01-instrumentation-added.yaml | 1 + .../files/08-custom-queries-added.yaml | 1 + .../files/10-logs-exporter-added.yaml | 1 + .../files/12--create-cluster.yaml | 16 +++++ .../files/12-cluster-created.yaml | 36 ++++++++++ .../files/14--add-backups.yaml | 31 ++++++++ .../files/14-backups-added.yaml | 71 +++++++++++++++++++ .../files/16-backups-removed.yaml | 36 ++++++++++ 15 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 85f23d960b..e24c0aca7b 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1218,11 +1218,12 @@ func (r *Reconciler) reconcileInstance( } } - // For now, we are not using logrotate to rotate postgres or patroni logs - // but we are using it for pgbackrest logs in the postgres pod + // For now, we are not using logrotate to rotate postgres or patroni logs, + // but we are using it for pgbackrest logs in the postgres pod, so we will + // set includeLogrotate to true, but only if backups are enabled. collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template, []corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword, - []string{naming.PGBackRestPGDataLogPath}, true, true) + []string{naming.PGBackRestPGDataLogPath}, backupsSpecFound, true) } // Add postgres-exporter to the instance Pod spec diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml new file mode 100644 index 0000000000..9798566140 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/12--create-cluster.yaml +assert: +- files/12-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml new file mode 100644 index 0000000000..411c910486 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml @@ -0,0 +1,55 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that a postgres +# metric is present, as well as a patroni metric. +# Then, check the collector logs for patroni, and postgres logs. +# Finally, ensure the monitoring user exists and is configured. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster-no-backups,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { + retry "5 second metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { + retry "patroni metric not found" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + + kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \ + -- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL' + DO $$ + DECLARE + result record; + BEGIN + SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring'; + ASSERT FOUND, 'user not found'; + END $$ + SQL diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml new file mode 100644 index 0000000000..f063eeda7b --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/14--add-backups.yaml +assert: +- files/14-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml new file mode 100644 index 0000000000..abd64d40a9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- command: |- + kubectl patch postgrescluster otel-cluster-no-backups --type 'merge' -p '{"spec":{"backups": null}}' + namespaced: true diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml new file mode 100644 index 0000000000..f37696ecf2 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml @@ -0,0 +1,7 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true" + namespaced: true +assert: +- files/16-backups-removed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/README.md b/testing/kuttl/e2e/otel-logging-and-metrics/README.md index 069a17f089..e14bdd899c 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/README.md +++ b/testing/kuttl/e2e/otel-logging-and-metrics/README.md @@ -23,6 +23,12 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry 4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors. 1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready. 2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn). +5. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups. + 1. Ensure that the cluster comes up and the database and collector containers are running and ready. + 2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods. + 3. Remove the backups spec from the new cluster. + 4. Annotate the cluster to allow backups to be removed. + 5. Ensure that the repo-host pod is destroyed, pgbackrest is removed from the instance pod, and the collector continues to run on the instance pod. ### NOTES diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml index b9bbe130bd..858b78ff83 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml @@ -46,6 +46,7 @@ metadata: labels: postgres-operator.crunchydata.com/data: pgbackrest postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" status: containerStatuses: - name: collector diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml index 344d52158e..1a756b7a73 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml @@ -46,6 +46,7 @@ metadata: labels: postgres-operator.crunchydata.com/data: pgbackrest postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" status: containerStatuses: - name: collector diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml index 47a28ee418..f730898692 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml @@ -46,6 +46,7 @@ metadata: labels: postgres-operator.crunchydata.com/data: pgbackrest postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" status: containerStatuses: - name: collector diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml new file mode 100644 index 0000000000..3983405b34 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml new file mode 100644 index 0000000000..c9aad7ec25 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml @@ -0,0 +1,36 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml new file mode 100644 index 0000000000..bb7c70ea37 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml new file mode 100644 index 0000000000..52221d2349 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml @@ -0,0 +1,71 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create +status: + succeeded: 1 +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml new file mode 100644 index 0000000000..c9aad7ec25 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml @@ -0,0 +1,36 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary From a6f17a3ad349b4e8d193d23607905f2f799ece00 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 25 Apr 2025 11:42:26 -0700 Subject: [PATCH 25/79] Reorder otel kuttl test so that we check pgadmin logs right after we add the instrumentation spec to PGAdmin. --- ...d-instrumentation-to-postgrescluster.yaml} | 0 ...=> 04-assert-repo-host-does-not-logs.yaml} | 0 .../otel-logging-and-metrics/05--backup.yaml | 6 + .../otel-logging-and-metrics/06--backup.yaml | 6 - ...=> 06-assert-repo-host-contains-logs.yaml} | 0 .../07--add-instrumentation-to-pgadmin.yaml | 6 + .../08--add-custom-queries.yaml | 6 - ...rt-pgadmin.yaml => 08-assert-pgadmin.yaml} | 0 .../09--add-custom-queries.yaml | 6 + .../10--add-logs-exporter.yaml | 6 - ...ies.yaml => 10-assert-custom-queries.yaml} | 0 .../11--add-logs-exporter.yaml | 6 + .../12--cluster-no-backups.yaml | 6 - ...rted.yaml => 12-assert-logs-exported.yaml} | 0 .../13--cluster-no-backups.yaml | 6 + ...-instance.yaml => 14-assert-instance.yaml} | 0 ...kups.yaml => 15--cluster-add-backups.yaml} | 4 +- ...e-backups.yaml => 16--remove-backups.yaml} | 0 ...cluster.yaml => 17--annotate-cluster.yaml} | 2 +- .../files/01--add-instrumentation.yaml | 26 ---- .../files/01-instrumentation-added.yaml | 4 - ...cluster.yaml => 05--annotate-cluster.yaml} | 0 ...ompleted.yaml => 05-backup-completed.yaml} | 0 .../files/07--add-instrumentation.yaml | 26 ++++ .../files/07-instrumentation-added.yaml | 120 ++++++++++++++++++ ...eries.yaml => 09--add-custom-queries.yaml} | 0 ...dded.yaml => 09-custom-queries-added.yaml} | 0 ...porter.yaml => 11--add-logs-exporter.yaml} | 0 ...added.yaml => 11-logs-exporter-added.yaml} | 0 ...e-cluster.yaml => 13--create-cluster.yaml} | 0 ...r-created.yaml => 13-cluster-created.yaml} | 0 ...-add-backups.yaml => 15--add-backups.yaml} | 0 ...ckups-added.yaml => 15-backups-added.yaml} | 0 ...s-removed.yaml => 17-backups-removed.yaml} | 0 34 files changed, 179 insertions(+), 57 deletions(-) rename testing/kuttl/e2e/otel-logging-and-metrics/{01--add-instrumentation.yaml => 01--add-instrumentation-to-postgrescluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{05-assert-repo-host-does-not-logs.yaml => 04-assert-repo-host-does-not-logs.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{07-assert-repo-host-contains-logs.yaml => 06-assert-repo-host-contains-logs.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{04-assert-pgadmin.yaml => 08-assert-pgadmin.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{09-assert-custom-queries.yaml => 10-assert-custom-queries.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{11-assert-logs-exported.yaml => 12-assert-logs-exported.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{13-assert-instance.yaml => 14-assert-instance.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{14--cluster-add-backups.yaml => 15--cluster-add-backups.yaml} (50%) rename testing/kuttl/e2e/otel-logging-and-metrics/{15--remove-backups.yaml => 16--remove-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{16--annotate-cluster.yaml => 17--annotate-cluster.yaml} (86%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{06--annotate-cluster.yaml => 05--annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{06-backup-completed.yaml => 05-backup-completed.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/files/{08--add-custom-queries.yaml => 09--add-custom-queries.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{08-custom-queries-added.yaml => 09-custom-queries-added.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{10--add-logs-exporter.yaml => 11--add-logs-exporter.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{10-logs-exporter-added.yaml => 11-logs-exporter-added.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{12--create-cluster.yaml => 13--create-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{12-cluster-created.yaml => 13-cluster-created.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{14--add-backups.yaml => 15--add-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{14-backups-added.yaml => 15-backups-added.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{16-backups-removed.yaml => 17-backups-removed.yaml} (100%) diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation-to-postgrescluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation-to-postgrescluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-does-not-logs.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/05-assert-repo-host-does-not-logs.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-does-not-logs.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml new file mode 100644 index 0000000000..166ef662a5 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/05--annotate-cluster.yaml +assert: +- files/05-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml deleted file mode 100644 index cd4e92f32c..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/06--annotate-cluster.yaml -assert: -- files/06-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-repo-host-contains-logs.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/07-assert-repo-host-contains-logs.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/06-assert-repo-host-contains-logs.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml new file mode 100644 index 0000000000..55f2179939 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/07--add-instrumentation.yaml +assert: +- files/07-instrumentation-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml deleted file mode 100644 index 290090e129..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/08--add-custom-queries.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/08--add-custom-queries.yaml -assert: -- files/08-custom-queries-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/08-assert-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/04-assert-pgadmin.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/08-assert-pgadmin.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml new file mode 100644 index 0000000000..223b1d71a8 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/09--add-custom-queries.yaml +assert: +- files/09-custom-queries-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml deleted file mode 100644 index 55f43815dd..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/10--add-logs-exporter.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/10--add-logs-exporter.yaml -assert: -- files/10-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/10-assert-custom-queries.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/09-assert-custom-queries.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/10-assert-custom-queries.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml new file mode 100644 index 0000000000..298adb06b4 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/11--add-logs-exporter.yaml +assert: +- files/11-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml deleted file mode 100644 index 9798566140..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/12--cluster-no-backups.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/12--create-cluster.yaml -assert: -- files/12-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-logs-exported.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/11-assert-logs-exported.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/12-assert-logs-exported.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml new file mode 100644 index 0000000000..b4c6f272f6 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/13--create-cluster.yaml +assert: +- files/13-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-instance.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/13-assert-instance.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/14-assert-instance.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml similarity index 50% rename from testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml index f063eeda7b..3bdd0b37e8 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/14--cluster-add-backups.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep apply: -- files/14--add-backups.yaml +- files/15--add-backups.yaml assert: -- files/14-backups-added.yaml +- files/15-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/16--remove-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/15--remove-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/16--remove-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml similarity index 86% rename from testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml index f37696ecf2..2da3da58a3 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/16--annotate-cluster.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml @@ -4,4 +4,4 @@ commands: - command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true" namespaced: true assert: -- files/16-backups-removed.yaml +- files/17-backups-removed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml index f02c09d380..ebde9f7caa 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml @@ -34,29 +34,3 @@ spec: proxy: pgBouncer: {} instrumentation: {} ---- -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PGAdmin -metadata: - name: otel-pgadmin -spec: - users: - - username: otel@example.com - role: Administrator - passwordRef: - name: pgadmin-password-secret - key: otel-password - dataVolumeClaimSpec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: 1Gi - serverGroups: - - name: supply - # An empty selector selects all postgresclusters in the Namespace - postgresClusterSelector: {} - config: - settings: - AUTHENTICATION_SOURCES: ['internal'] - instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml index 858b78ff83..672bdd2d1d 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml @@ -99,12 +99,8 @@ metadata: postgres-operator.crunchydata.com/data: pgadmin postgres-operator.crunchydata.com/role: pgadmin postgres-operator.crunchydata.com/pgadmin: otel-pgadmin - postgres-operator.crunchydata.com/crunchy-otel-collector: "true" status: containerStatuses: - - name: collector - ready: true - started: true - name: pgadmin ready: true started: true diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/05--annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/06--annotate-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/05--annotate-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/05-backup-completed.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/06-backup-completed.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/05-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml new file mode 100644 index 0000000000..166f0d3347 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml new file mode 100644 index 0000000000..858b78ff83 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml @@ -0,0 +1,120 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/09--add-custom-queries.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/08--add-custom-queries.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/09--add-custom-queries.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/09-custom-queries-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/09-custom-queries-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-logs-exporter.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/10--add-logs-exporter.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-logs-exporter.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/11-logs-exporter-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/11-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/13--create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/12--create-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/13--create-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/13-cluster-created.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/12-cluster-created.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/13-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/15--add-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/14--add-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/15--add-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/15-backups-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/14-backups-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/15-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/17-backups-removed.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/16-backups-removed.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/17-backups-removed.yaml From 4bde4637a01a407b3c17c6f5a2e4817c2166d156 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 5 May 2025 12:25:42 -0700 Subject: [PATCH 26/79] Remove fieldPath from CRD validation. --- .../postgres-operator.crunchydata.com_postgresclusters.yaml | 3 +-- .../v1beta1/postgrescluster_types.go | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index bfa7d99c8b..108eb59e58 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -6675,8 +6675,7 @@ spec: - stanza type: object x-kubernetes-validations: - - fieldPath: .repo - message: Only S3, GCS or Azure repos can be used as a pgBackRest + - message: Only S3, GCS or Azure repos can be used as a pgBackRest data source. rule: '!has(self.repo.volume)' postgresCluster: diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 59029958f4..71be93a55e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -204,8 +204,9 @@ type DataSource struct { // PostgreSQL data directory for a new PostgreSQL cluster using a pgBackRest restore. // The PGBackRest field is incompatible with the PostgresCluster field: only one // data source can be used for pre-populating a new PostgreSQL cluster + // TODO(k8s-1.28): fieldPath=`.repo` // +optional - // +kubebuilder:validation:XValidation:rule="!has(self.repo.volume)", message="Only S3, GCS or Azure repos can be used as a pgBackRest data source.", fieldPath=".repo" + // +kubebuilder:validation:XValidation:rule="!has(self.repo.volume)", message="Only S3, GCS or Azure repos can be used as a pgBackRest data source." PGBackRest *PGBackRestDataSource `json:"pgbackrest,omitempty"` // Defines a pgBackRest data source that can be used to pre-populate the PostgreSQL data From 3503fc92aff550ad5504f69db2d3c787a5ce2b95 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 5 May 2025 12:01:56 -0700 Subject: [PATCH 27/79] Add ccp_replication_slots to OTel metrics. Use COALESCE to change NULLs to zero-values. --- internal/collector/eq_pg16_metrics.yaml | 48 +++++++++++++++++++ .../collector/generated/eq_pg16_metrics.json | 1 + .../collector/generated/gte_pg17_metrics.json | 2 +- .../collector/generated/lt_pg16_metrics.json | 2 +- internal/collector/gte_pg17_metrics.yaml | 43 +++++++++++++++++ internal/collector/lt_pg16_metrics.yaml | 43 +++++++++++++++++ internal/collector/postgres_metrics.go | 10 ++++ 7 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 internal/collector/eq_pg16_metrics.yaml create mode 100644 internal/collector/generated/eq_pg16_metrics.json diff --git a/internal/collector/eq_pg16_metrics.yaml b/internal/collector/eq_pg16_metrics.yaml new file mode 100644 index 0000000000..2abc0e2208 --- /dev/null +++ b/internal/collector/eq_pg16_metrics.yaml @@ -0,0 +1,48 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , COALESCE(s.conflicting::int, 0) + , 0 AS failover + , 0 AS synced + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/generated/eq_pg16_metrics.json b/internal/collector/generated/eq_pg16_metrics.json new file mode 100644 index 0000000000..a695d811d9 --- /dev/null +++ b/internal/collector/generated/eq_pg16_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/gte_pg17_metrics.json b/internal/collector/generated/gte_pg17_metrics.json index 563abf01b3..b0c312b3aa 100644 --- a/internal/collector/generated/gte_pg17_metrics.json +++ b/internal/collector/generated/gte_pg17_metrics.json @@ -1 +1 @@ -[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_written FROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"}] +[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_written FROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/lt_pg16_metrics.json b/internal/collector/generated/lt_pg16_metrics.json index 98bb0cc213..acc1a5f30e 100644 --- a/internal/collector/generated/lt_pg16_metrics.json +++ b/internal/collector/generated/lt_pg16_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"}] +[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , 0 AS conflicting\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/gte_pg17_metrics.yaml b/internal/collector/gte_pg17_metrics.yaml index de8f6786f5..ea5d6c0fe3 100644 --- a/internal/collector/gte_pg17_metrics.yaml +++ b/internal/collector/gte_pg17_metrics.yaml @@ -70,3 +70,46 @@ value_column: buffers_written static_attributes: server: "localhost:5432" + + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , COALESCE(s.conflicting::int, 0) + , COALESCE(s.failover::int, 0) + , COALESCE(s.synced::int, 0) + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/lt_pg16_metrics.yaml b/internal/collector/lt_pg16_metrics.yaml index ca9fe8a0c8..afa4e48228 100644 --- a/internal/collector/lt_pg16_metrics.yaml +++ b/internal/collector/lt_pg16_metrics.yaml @@ -133,3 +133,46 @@ attribute_columns: ["dbname", "relname", "schemaname"] static_attributes: server: "localhost:5432" + + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , 0 AS conflicting + , 0 AS failover + , 0 AS synced + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index 4530c431a3..f3aadb0142 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -30,6 +30,9 @@ var gtePG17 json.RawMessage //go:embed "generated/lt_pg17_metrics.json" var ltPG17 json.RawMessage +//go:embed "generated/eq_pg16_metrics.json" +var eqPG16 json.RawMessage + //go:embed "generated/gte_pg16_metrics.json" var gtePG16 json.RawMessage @@ -75,6 +78,13 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust log.Error(err, "error compiling postgres metrics") } + if inCluster.Spec.PostgresVersion == 16 { + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, eqPG16) + } + if err != nil { + log.Error(err, "error compiling postgres metrics") + } + if inCluster.Spec.PostgresVersion >= 16 { fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG16) } else { From 13c766a1069524f2b09180c2497c5fe743cefe6b Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 21 Apr 2025 16:38:15 -0500 Subject: [PATCH 28/79] Create directories with group-write permissions The group-write permission is important for persistent file systems in environments where different containers are assigned different UIDs over time. Some network file systems, however, reject attempts to set POSIX directory permissions. CIFS and NFS are notable in this regard. Issue: PGO-2417 --- internal/collector/instance.go | 3 +-- internal/controller/standalone_pgadmin/pod.go | 6 ++--- .../controller/standalone_pgadmin/pod_test.go | 8 +++---- internal/pgbackrest/config.go | 2 +- internal/pgbackrest/config_test.go | 2 +- internal/postgres/config.go | 6 ++--- internal/postgres/reconcile_test.go | 6 ++--- internal/shell/paths.go | 22 +++++++++++++------ internal/shell/paths_test.go | 12 +++++----- 9 files changed, 37 insertions(+), 30 deletions(-) diff --git a/internal/collector/instance.go b/internal/collector/instance.go index f37eb7f4c3..8158d9dda3 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -180,8 +180,7 @@ func startCommand(logDirectories []string, includeLogrotate bool) []string { if len(logDirectories) != 0 { for _, logDir := range logDirectories { mkdirScript = mkdirScript + ` -` + shell.MakeDirectories(0o775, logDir, - path.Join(logDir, "receiver")) +` + shell.MakeDirectories(logDir, path.Join(logDir, "receiver")) } } diff --git a/internal/controller/standalone_pgadmin/pod.go b/internal/controller/standalone_pgadmin/pod.go index 88f483c570..734789bd3c 100644 --- a/internal/controller/standalone_pgadmin/pod.go +++ b/internal/controller/standalone_pgadmin/pod.go @@ -442,10 +442,10 @@ with open('` + configMountPath + `/` + gunicornConfigFilePath + `') as _f: script := strings.Join([]string{ // Create the config directory so Kubernetes can mount it later. // - https://issue.k8s.io/121294 - shell.MakeDirectories(0o775, scriptMountPath, configMountPath), + shell.MakeDirectories(scriptMountPath, configMountPath), - // Create the logs directory with g+rwx to ensure pgAdmin can write to it as well. - shell.MakeDirectories(0o775, dataMountPath, LogDirectoryAbsolutePath), + // Create the logs directory and ensure pgAdmin can write to it as well. + shell.MakeDirectories(dataMountPath, LogDirectoryAbsolutePath), // Write the system and server configurations. `echo "$1" > ` + scriptMountPath + `/config_system.py`, diff --git a/internal/controller/standalone_pgadmin/pod_test.go b/internal/controller/standalone_pgadmin/pod_test.go index 84f6e56cdc..b30b35bc65 100644 --- a/internal/controller/standalone_pgadmin/pod_test.go +++ b/internal/controller/standalone_pgadmin/pod_test.go @@ -137,8 +137,8 @@ initContainers: - -ceu - -- - |- - mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' - mkdir -p '/var/lib/pgadmin/logs' && chmod 0775 '/var/lib/pgadmin/logs' + mkdir -p '/etc/pgadmin/conf.d' && { chmod 0775 '/etc/pgadmin/conf.d' || :; } + mkdir -p '/var/lib/pgadmin/logs' && { chmod 0775 '/var/lib/pgadmin/logs' || :; } echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup @@ -342,8 +342,8 @@ initContainers: - -ceu - -- - |- - mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' - mkdir -p '/var/lib/pgadmin/logs' && chmod 0775 '/var/lib/pgadmin/logs' + mkdir -p '/etc/pgadmin/conf.d' && { chmod 0775 '/etc/pgadmin/conf.d' || :; } + mkdir -p '/var/lib/pgadmin/logs' && { chmod 0775 '/var/lib/pgadmin/logs' || :; } echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 498be32d3b..c99e952afc 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -177,7 +177,7 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, container := corev1.Container{ // TODO(log-rotation): The second argument here should be the path // of the volume mount. Find a way to calculate that consistently. - Command: []string{"bash", "-c", shell.MakeDirectories(0o775, path.Dir(pgBackRestLogPath), pgBackRestLogPath)}, + Command: []string{"bash", "-c", shell.MakeDirectories(path.Dir(pgBackRestLogPath), pgBackRestLogPath)}, Image: config.PGBackRestContainerImage(cluster), ImagePullPolicy: cluster.Spec.ImagePullPolicy, Name: naming.ContainerPGBackRestLogDirInit, diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index 08aaaf8d94..a314ad3102 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -292,7 +292,7 @@ func TestMakePGBackrestLogDir(t *testing.T) { for _, c := range podTemplate.Spec.InitContainers { if c.Name == naming.ContainerPGBackRestLogDirInit { // ignore "bash -c", should skip repo with no volume - assert.Equal(t, `mkdir -p '/pgbackrest/repo2/log' && chmod 0775 '/pgbackrest/repo2/log'`, c.Command[2]) + assert.Equal(t, `mkdir -p '/pgbackrest/repo2/log' && { chmod 0775 '/pgbackrest/repo2/log' || :; }`, c.Command[2]) assert.Equal(t, c.Image, "test-image") assert.Equal(t, c.ImagePullPolicy, corev1.PullAlways) assert.Assert(t, !cmp.DeepEqual(c.SecurityContext, diff --git a/internal/postgres/config.go b/internal/postgres/config.go index a478c0e72b..9270472163 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -375,11 +375,11 @@ chmod +x /tmp/pg_rewind_tde.sh `halt "$(permissions "${postgres_data_directory}" ||:)"`, // Create log directories. - `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, `halt "$(permissions ` + naming.PGBackRestPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, `halt "$(permissions ` + naming.PatroniPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(0o775, dataMountPath, LogDirectory()) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, LogDirectory()) + `) ||`, `halt "$(permissions ` + LogDirectory() + ` ||:)"`, // Copy replication client certificate files diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index 9903afb97c..ba3a90b57b 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -268,11 +268,11 @@ initContainers: recreate "${postgres_data_directory}" '0700' else (halt Permissions!); fi || halt "$(permissions "${postgres_data_directory}" ||:)" - (mkdir -p '/pgdata/pgbackrest/log' && chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest') || + (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || halt "$(permissions /pgdata/pgbackrest/log ||:)" - (mkdir -p '/pgdata/patroni/log' && chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni') || + (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || halt "$(permissions /pgdata/patroni/log ||:)" - (mkdir -p '/pgdata/logs/postgres' && chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs') || + (mkdir -p '/pgdata/logs/postgres' && { chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs' || :; }) || halt "$(permissions /pgdata/logs/postgres ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} diff --git a/internal/shell/paths.go b/internal/shell/paths.go index d1df635e68..94c997f7b4 100644 --- a/internal/shell/paths.go +++ b/internal/shell/paths.go @@ -33,14 +33,14 @@ func CleanFileName(path string) string { // MakeDirectories returns a list of POSIX shell commands that ensure each path // exists. It creates every directory leading to path from (but not including) -// base and sets their permissions to exactly perms, regardless of umask. +// base and sets their permissions for Kubernetes, regardless of umask. // // See: // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/chmod.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/mkdir.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/test.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/umask.html -func MakeDirectories(perms fs.FileMode, base string, paths ...string) string { +func MakeDirectories(base string, paths ...string) string { // Without any paths, return a command that succeeds when the base path // exists. if len(paths) == 0 { @@ -61,14 +61,22 @@ func MakeDirectories(perms fs.FileMode, base string, paths ...string) string { } } + const perms fs.FileMode = 0 | + // S_IRWXU: enable owner read, write, and execute permissions. + 0o0700 | + // S_IRWXG: enable group read, write, and execute permissions. + 0o0070 | + // S_IXOTH, S_IROTH: enable other read and execute permissions. + 0o0001 | 0o0004 + return `` + // Create all the paths and any missing parents. `mkdir -p ` + strings.Join(QuoteWords(paths...), " ") + - // Set the permissions of every path and each parent. - // NOTE: FileMode bits other than file permissions are ignored. - fmt.Sprintf(` && chmod %#o %s`, - perms&fs.ModePerm, - strings.Join(QuoteWords(allPaths...), " "), + // Try to set the permissions of every path and each parent. + // This swallows the exit status of `chmod` because not all filesystems + // tolerate the operation; CIFS and NFS are notable examples. + fmt.Sprintf(` && { chmod %#o %s || :; }`, + perms, strings.Join(QuoteWords(allPaths...), " "), ) } diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index 8af16a73c0..33e68c2332 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -52,20 +52,20 @@ func TestMakeDirectories(t *testing.T) { t.Run("NoPaths", func(t *testing.T) { assert.Equal(t, - MakeDirectories(0o755, "/asdf/jklm"), + MakeDirectories("/asdf/jklm"), `test -d '/asdf/jklm'`) }) t.Run("Children", func(t *testing.T) { assert.DeepEqual(t, - MakeDirectories(0o775, "/asdf", "/asdf/jklm", "/asdf/qwerty"), - `mkdir -p '/asdf/jklm' '/asdf/qwerty' && chmod 0775 '/asdf/jklm' '/asdf/qwerty'`) + MakeDirectories("/asdf", "/asdf/jklm", "/asdf/qwerty"), + `mkdir -p '/asdf/jklm' '/asdf/qwerty' && { chmod 0775 '/asdf/jklm' '/asdf/qwerty' || :; }`) }) t.Run("Grandchild", func(t *testing.T) { - script := MakeDirectories(0o775, "/asdf", "/asdf/qwerty/boots") + script := MakeDirectories("/asdf", "/asdf/qwerty/boots") assert.DeepEqual(t, script, - `mkdir -p '/asdf/qwerty/boots' && chmod 0775 '/asdf/qwerty/boots' '/asdf/qwerty'`) + `mkdir -p '/asdf/qwerty/boots' && { chmod 0775 '/asdf/qwerty/boots' '/asdf/qwerty' || :; }`) t.Run("ShellCheckPOSIX", func(t *testing.T) { shellcheck := require.ShellCheck(t) @@ -83,7 +83,7 @@ func TestMakeDirectories(t *testing.T) { }) t.Run("Long", func(t *testing.T) { - script := MakeDirectories(0o700, "/", strings.Repeat("/asdf", 20)) + script := MakeDirectories("/", strings.Repeat("/asdf", 20)) t.Run("PrettyYAML", func(t *testing.T) { b, err := yaml.Marshal(script) From e96d467f9ea8abce1ef515967af598b8f238a473 Mon Sep 17 00:00:00 2001 From: Ben Blattberg Date: Thu, 8 May 2025 12:13:42 -0500 Subject: [PATCH 29/79] Add pg_hba checksum metric (#4169) --- .../generated/postgres_5m_metrics.json | 2 +- internal/collector/postgres_5m_metrics.yaml | 13 +- internal/collector/postgres_metrics_test.go | 4 +- .../postgrescluster/metrics_setup.sql | 161 ++++++++++++++++++ 4 files changed, 176 insertions(+), 4 deletions(-) diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json index a9a3500a02..371a7fa182 100644 --- a/internal/collector/generated/postgres_5m_metrics.json +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml index 9f5c3212dc..95764fe3e1 100644 --- a/internal/collector/postgres_5m_metrics.yaml +++ b/internal/collector/postgres_5m_metrics.yaml @@ -140,4 +140,15 @@ attribute_columns: ["dbname"] static_attributes: server: "localhost:5432" - + + - sql: SELECT monitor.pg_hba_checksum() AS status; + metrics: + - metric_name: ccp_pg_hba_checksum + value_column: status + description: | + Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf). + 0 = valid config. 1 = settings changed. + Settings history is available for review in the table `monitor.pg_hba_checksum`. + To reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table. + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_metrics_test.go b/internal/collector/postgres_metrics_test.go index 8a22f42b52..63a6c654f3 100644 --- a/internal/collector/postgres_metrics_test.go +++ b/internal/collector/postgres_metrics_test.go @@ -17,9 +17,9 @@ func TestRemoveMetricsFromQueries(t *testing.T) { err := json.Unmarshal(fiveMinuteMetrics, &fiveMinuteMetricsArr) assert.NilError(t, err) - assert.Equal(t, len(fiveMinuteMetricsArr), 3) + assert.Equal(t, len(fiveMinuteMetricsArr), 4) newArr := removeMetricsFromQueries([]string{"ccp_database_size_bytes"}, fiveMinuteMetricsArr) - assert.Equal(t, len(newArr), 2) + assert.Equal(t, len(newArr), 3) t.Run("DeleteOneMetric", func(t *testing.T) { sqlMetricsData := `[ diff --git a/internal/controller/postgrescluster/metrics_setup.sql b/internal/controller/postgrescluster/metrics_setup.sql index 728de80c3e..858f95c023 100644 --- a/internal/controller/postgrescluster/metrics_setup.sql +++ b/internal/controller/postgrescluster/metrics_setup.sql @@ -220,3 +220,164 @@ BEGIN END; $$ LANGUAGE plpgsql; +/* +* The `pg_hba_checksum` table, functions, and view are taken from +* https://github.com/CrunchyData/pgmonitor/blob/development/postgres_exporter/common +* +* The goal of these table, functions, and view is to monitor changes +* to the pg_hba_file_rules system catalog. +* +* This material is used in the metric `ccp_pg_hba_checksum`. +*/ + +/* +* `monitor.pg_hba_checksum` table is used to store +* - the pg_hba settings as string (for reference) +* - the pg_hba settings as hash (for quick comparison) +* - the `hba_hash_known_provided` (for overide hash manually given to the `monitor.pg_hba_checksum` function) +* - the `valid` field to signal whether the pg_hba settings have not changed since they were accepted as valid +* +* We create an index on `created_at` in order to pull the most recent entry for +* comparison in the `monitor.pg_hba_checksum` function +*/ +DROP TABLE IF EXISTS monitor.pg_hba_checksum; +CREATE TABLE monitor.pg_hba_checksum ( + hba_hash_generated text NOT NULL + , hba_hash_known_provided text + , hba_string text NOT NULL + , created_at timestamptz DEFAULT now() NOT NULL + , valid smallint NOT NULL ); +COMMENT ON COLUMN monitor.pg_hba_checksum.valid IS 'Set this column to zero if this group of settings is a valid change'; +CREATE INDEX ON monitor.pg_hba_checksum (created_at); + +/* + * `monitor.pg_hba_checksum(text)` is used to compare the previous pg_hba hash + * with a hash made of the current pg_hba hash, derived from the `monitor.pg_hba_hash` view below. + * + * This function returns + * - 0, indicating NO settings have changed + * - 1, indicating something has changed since last known valid state + * + * `monitor.pg_hba_checksum` can take a hash to be used as an override. + * This may be useful when you have a standby with different pg_hba rules; + * since it will have different rules (and therefore a different hash), you + * could alter the metric function to pass the actual hash, which would be + * used in lieu of this table's value (derived from the primary cluster's rules). + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) + RETURNS smallint + LANGUAGE plpgsql SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_hba_hash text; +v_hba_hash_old text; +v_hba_string text; +v_is_in_recovery boolean; +v_valid smallint; + +BEGIN + +-- Retrieve the current settings from the `monitor.pg_hba_hash` view below +IF current_setting('server_version_num')::int >= 100000 THEN + SELECT sha256_hash, hba_string + INTO v_hba_hash, v_hba_string + FROM monitor.pg_hba_hash; +ELSE + RAISE EXCEPTION 'pg_hba change monitoring unsupported in versions older than PostgreSQL 10'; +END IF; + +-- Retrieve the last previous hash from the table +SELECT hba_hash_generated, valid +INTO v_hba_hash_old, v_valid +FROM monitor.pg_hba_checksum +ORDER BY created_at DESC LIMIT 1; + +-- If an manual/override hash has been given, we will use that: +-- Do not base validity on the stored value if manual hash is given. +IF p_known_hba_hash IS NOT NULL THEN + v_hba_hash_old := p_known_hba_hash; + v_valid := 0; +END IF; + +/* If the table is not empty or a manual hash was given, + * then we want to compare the old hash (from the table) + * with the new hash: if those differ, then we set the validity to 1; + * if they are the same, then we honor what the validity was + * in the table (which would be 1). + */ +IF (v_hba_hash_old IS NOT NULL) THEN + IF (v_hba_hash != v_hba_hash_old) THEN + v_valid := 1; + END IF; +ELSE + v_valid := 0; +END IF; + +/* + * We only want to insert into the table if we're on a primary and + * - the table/manually entered hash is empty, e.g., we've just started the cluster; or + * - the hashes don't match + * + * There's no value added by inserting into the table when no change was detected. + */ +IF (v_hba_hash_old IS NULL) OR (v_hba_hash != v_hba_hash_old) THEN + SELECT pg_is_in_recovery() INTO v_is_in_recovery; + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_hba_checksum ( + hba_hash_generated + , hba_hash_known_provided + , hba_string + , valid) + VALUES ( + v_hba_hash + , p_known_hba_hash + , v_hba_string + , v_valid); + END IF; +END IF; + +RETURN v_valid; + +END +$function$; + +/* + * The `monitor.pg_hba_hash` view return both a hash and a string aggregate of the + * pg_catalog.pg_hba_file_rules. + * Note: We use `sha256` to hash to allow this to run on FIPS environments. + */ +DROP VIEW IF EXISTS monitor.pg_hba_hash; +CREATE VIEW monitor.pg_hba_hash AS + -- Order by line number so it's caught if no content is changed but the order of entries is changed + WITH hba_ordered_list AS ( + SELECT COALESCE(type, '<>') AS type + , array_to_string(COALESCE(database, ARRAY['<>']), ',') AS database + , array_to_string(COALESCE(user_name, ARRAY['<>']), ',') AS user_name + , COALESCE(address, '<>') AS address + , COALESCE(netmask, '<>') AS netmask + , COALESCE(auth_method, '<>') AS auth_method + , array_to_string(COALESCE(options, ARRAY['<>']), ',') AS options + FROM pg_catalog.pg_hba_file_rules + ORDER BY line_number) + SELECT sha256((string_agg(type||database||user_name||address||netmask||auth_method||options, ','))::bytea) AS sha256_hash + , string_agg(type||database||user_name||address||netmask||auth_method||options, ',') AS hba_string + FROM hba_ordered_list; + +/* + * The `monitor.pg_hba_checksum_set_valid` function provides an interface for resetting the + * checksum monitor. + * Note: configuration history will be cleared. + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); +CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint + LANGUAGE sql +AS $function$ + +TRUNCATE monitor.pg_hba_checksum; + +SELECT monitor.pg_hba_checksum(); + +$function$; From 600751e8084eea99733b25d588181448e508374c Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Tue, 6 May 2025 15:40:49 -0700 Subject: [PATCH 30/79] Move ccp_stat_database metrics/query to 5 second interval receiver. Move ccp_stat_user_tables metrics/query to 5 minute interval receiver. Add comment about null values to ccp_replication_slots metrics/query. --- ...metrics.yaml => eq_pg16_fast_metrics.yaml} | 3 + ...metrics.json => eq_pg16_fast_metrics.json} | 0 ...etrics.json => gte_pg16_slow_metrics.json} | 0 ...etrics.json => gte_pg17_fast_metrics.json} | 0 .../generated/lt_pg16_fast_metrics.json | 1 + ...metrics.json => lt_pg16_slow_metrics.json} | 2 +- ...metrics.json => lt_pg17_fast_metrics.json} | 0 .../generated/postgres_5m_metrics.json | 2 +- .../generated/postgres_5s_metrics.json | 2 +- ...etrics.yaml => gte_pg16_slow_metrics.yaml} | 0 ...etrics.yaml => gte_pg17_fast_metrics.yaml} | 3 + internal/collector/lt_pg16_fast_metrics.yaml | 51 +++++++++ ...metrics.yaml => lt_pg16_slow_metrics.yaml} | 43 ------- ...metrics.yaml => lt_pg17_fast_metrics.yaml} | 0 internal/collector/postgres_5m_metrics.yaml | 105 ------------------ internal/collector/postgres_5s_metrics.yaml | 105 ++++++++++++++++++ internal/collector/postgres_metrics.go | 57 ++++++---- internal/collector/postgres_metrics_test.go | 4 +- 18 files changed, 203 insertions(+), 175 deletions(-) rename internal/collector/{eq_pg16_metrics.yaml => eq_pg16_fast_metrics.yaml} (90%) rename internal/collector/generated/{eq_pg16_metrics.json => eq_pg16_fast_metrics.json} (100%) rename internal/collector/generated/{gte_pg16_metrics.json => gte_pg16_slow_metrics.json} (100%) rename internal/collector/generated/{gte_pg17_metrics.json => gte_pg17_fast_metrics.json} (100%) create mode 100644 internal/collector/generated/lt_pg16_fast_metrics.json rename internal/collector/generated/{lt_pg16_metrics.json => lt_pg16_slow_metrics.json} (64%) rename internal/collector/generated/{lt_pg17_metrics.json => lt_pg17_fast_metrics.json} (100%) rename internal/collector/{gte_pg16_metrics.yaml => gte_pg16_slow_metrics.yaml} (100%) rename internal/collector/{gte_pg17_metrics.yaml => gte_pg17_fast_metrics.yaml} (94%) create mode 100644 internal/collector/lt_pg16_fast_metrics.yaml rename internal/collector/{lt_pg16_metrics.yaml => lt_pg16_slow_metrics.yaml} (71%) rename internal/collector/{lt_pg17_metrics.yaml => lt_pg17_fast_metrics.yaml} (100%) diff --git a/internal/collector/eq_pg16_metrics.yaml b/internal/collector/eq_pg16_fast_metrics.yaml similarity index 90% rename from internal/collector/eq_pg16_metrics.yaml rename to internal/collector/eq_pg16_fast_metrics.yaml index 2abc0e2208..855dc8a3d3 100644 --- a/internal/collector/eq_pg16_metrics.yaml +++ b/internal/collector/eq_pg16_fast_metrics.yaml @@ -4,6 +4,9 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes, database, and conflicting and we avoid NULL by using COALESCE. - sql: > SELECT s.slot_name diff --git a/internal/collector/generated/eq_pg16_metrics.json b/internal/collector/generated/eq_pg16_fast_metrics.json similarity index 100% rename from internal/collector/generated/eq_pg16_metrics.json rename to internal/collector/generated/eq_pg16_fast_metrics.json diff --git a/internal/collector/generated/gte_pg16_metrics.json b/internal/collector/generated/gte_pg16_slow_metrics.json similarity index 100% rename from internal/collector/generated/gte_pg16_metrics.json rename to internal/collector/generated/gte_pg16_slow_metrics.json diff --git a/internal/collector/generated/gte_pg17_metrics.json b/internal/collector/generated/gte_pg17_fast_metrics.json similarity index 100% rename from internal/collector/generated/gte_pg17_metrics.json rename to internal/collector/generated/gte_pg17_fast_metrics.json diff --git a/internal/collector/generated/lt_pg16_fast_metrics.json b/internal/collector/generated/lt_pg16_fast_metrics.json new file mode 100644 index 0000000000..dcd1d5fe77 --- /dev/null +++ b/internal/collector/generated/lt_pg16_fast_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , 0 AS conflicting\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/lt_pg16_metrics.json b/internal/collector/generated/lt_pg16_slow_metrics.json similarity index 64% rename from internal/collector/generated/lt_pg16_metrics.json rename to internal/collector/generated/lt_pg16_slow_metrics.json index acc1a5f30e..98bb0cc213 100644 --- a/internal/collector/generated/lt_pg16_metrics.json +++ b/internal/collector/generated/lt_pg16_slow_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , 0 AS conflicting\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] +[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/lt_pg17_metrics.json b/internal/collector/generated/lt_pg17_fast_metrics.json similarity index 100% rename from internal/collector/generated/lt_pg17_metrics.json rename to internal/collector/generated/lt_pg17_fast_metrics.json diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json index 371a7fa182..6c438218a9 100644 --- a/internal/collector/generated/postgres_5m_metrics.json +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] diff --git a/internal/collector/generated/postgres_5s_metrics.json b/internal/collector/generated/postgres_5s_metrics.json index 484c99dfa0..978f89d305 100644 --- a/internal/collector/generated/postgres_5s_metrics.json +++ b/internal/collector/generated/postgres_5s_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] +[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] diff --git a/internal/collector/gte_pg16_metrics.yaml b/internal/collector/gte_pg16_slow_metrics.yaml similarity index 100% rename from internal/collector/gte_pg16_metrics.yaml rename to internal/collector/gte_pg16_slow_metrics.yaml diff --git a/internal/collector/gte_pg17_metrics.yaml b/internal/collector/gte_pg17_fast_metrics.yaml similarity index 94% rename from internal/collector/gte_pg17_metrics.yaml rename to internal/collector/gte_pg17_fast_metrics.yaml index ea5d6c0fe3..688a919f5c 100644 --- a/internal/collector/gte_pg17_metrics.yaml +++ b/internal/collector/gte_pg17_fast_metrics.yaml @@ -71,6 +71,9 @@ static_attributes: server: "localhost:5432" +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes, database, conflicting, failover, and synced and we avoid NULL by using COALESCE. - sql: > SELECT s.slot_name diff --git a/internal/collector/lt_pg16_fast_metrics.yaml b/internal/collector/lt_pg16_fast_metrics.yaml new file mode 100644 index 0000000000..8144abc144 --- /dev/null +++ b/internal/collector/lt_pg16_fast_metrics.yaml @@ -0,0 +1,51 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes and database and we avoid NULL by using COALESCE. + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , 0 AS conflicting + , 0 AS failover + , 0 AS synced + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/lt_pg16_metrics.yaml b/internal/collector/lt_pg16_slow_metrics.yaml similarity index 71% rename from internal/collector/lt_pg16_metrics.yaml rename to internal/collector/lt_pg16_slow_metrics.yaml index afa4e48228..ca9fe8a0c8 100644 --- a/internal/collector/lt_pg16_metrics.yaml +++ b/internal/collector/lt_pg16_slow_metrics.yaml @@ -133,46 +133,3 @@ attribute_columns: ["dbname", "relname", "schemaname"] static_attributes: server: "localhost:5432" - - - sql: > - SELECT - s.slot_name - , s.active::int - , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes - , COALESCE(s.database, '') - , s.slot_type - , 0 AS conflicting - , 0 AS failover - , 0 AS synced - FROM pg_catalog.pg_replication_slots s; - metrics: - - metric_name: ccp_replication_slots_active - value_column: active - description: Active state of slot. 1 = true. 0 = false. - attribute_columns: ["database", "slot_name", "slot_type"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_replication_slots_retained_bytes - value_column: retained_bytes - description: The amount of WAL (in bytes) being retained for this slot - attribute_columns: ["database", "slot_name", "slot_type"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_replication_slots_conflicting - value_column: conflicting - description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. - attribute_columns: ["database", "slot_name", "slot_type"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_replication_slots_failover - value_column: failover - description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. - attribute_columns: ["database", "slot_name", "slot_type"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_replication_slots_synced - value_column: synced - description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. - attribute_columns: ["database", "slot_name", "slot_type"] - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/lt_pg17_metrics.yaml b/internal/collector/lt_pg17_fast_metrics.yaml similarity index 100% rename from internal/collector/lt_pg17_metrics.yaml rename to internal/collector/lt_pg17_fast_metrics.yaml diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml index 95764fe3e1..ce04b443d3 100644 --- a/internal/collector/postgres_5m_metrics.yaml +++ b/internal/collector/postgres_5m_metrics.yaml @@ -36,111 +36,6 @@ static_attributes: server: "localhost:5432" - - sql: > - SELECT s.datname AS dbname - , s.xact_commit - , s.xact_rollback - , s.blks_read - , s.blks_hit - , s.tup_returned - , s.tup_fetched - , s.tup_inserted - , s.tup_updated - , s.tup_deleted - , s.conflicts - , s.temp_files - , s.temp_bytes - , s.deadlocks - FROM pg_catalog.pg_stat_database s - JOIN pg_catalog.pg_database d ON d.datname = s.datname - WHERE d.datistemplate = false; - metrics: - - metric_name: ccp_stat_database_blks_hit - value_column: blks_hit - description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_blks_read - value_column: blks_read - description: Number of disk blocks read in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_conflicts - value_column: conflicts - description: Number of queries canceled due to conflicts with recovery in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_deadlocks - value_column: deadlocks - description: Number of deadlocks detected in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_temp_bytes - value_column: temp_bytes - description: Total amount of data written to temporary files by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_temp_files - value_column: temp_files - description: Number of rows deleted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_deleted - value_column: tup_deleted - description: Number of rows deleted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_fetched - value_column: tup_fetched - description: Number of rows fetched by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_inserted - value_column: tup_inserted - description: Number of rows inserted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_returned - value_column: tup_returned - description: Number of rows returned by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_updated - value_column: tup_updated - description: Number of rows updated by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_xact_commit - value_column: xact_commit - description: Number of transactions in this database that have been committed - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_xact_rollback - value_column: xact_rollback - description: Number of transactions in this database that have been rolled back - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - sql: SELECT monitor.pg_hba_checksum() AS status; metrics: - metric_name: ccp_pg_hba_checksum diff --git a/internal/collector/postgres_5s_metrics.yaml b/internal/collector/postgres_5s_metrics.yaml index 82ab10ef3c..6d92dfa75a 100644 --- a/internal/collector/postgres_5s_metrics.yaml +++ b/internal/collector/postgres_5s_metrics.yaml @@ -957,3 +957,108 @@ attribute_columns: ["repo"] static_attributes: server: "localhost:5432" + + - sql: > + SELECT s.datname AS dbname + , s.xact_commit + , s.xact_rollback + , s.blks_read + , s.blks_hit + , s.tup_returned + , s.tup_fetched + , s.tup_inserted + , s.tup_updated + , s.tup_deleted + , s.conflicts + , s.temp_files + , s.temp_bytes + , s.deadlocks + FROM pg_catalog.pg_stat_database s + JOIN pg_catalog.pg_database d ON d.datname = s.datname + WHERE d.datistemplate = false; + metrics: + - metric_name: ccp_stat_database_blks_hit + value_column: blks_hit + description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_blks_read + value_column: blks_read + description: Number of disk blocks read in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_conflicts + value_column: conflicts + description: Number of queries canceled due to conflicts with recovery in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_deadlocks + value_column: deadlocks + description: Number of deadlocks detected in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_bytes + value_column: temp_bytes + description: Total amount of data written to temporary files by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_files + value_column: temp_files + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_deleted + value_column: tup_deleted + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_fetched + value_column: tup_fetched + description: Number of rows fetched by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_inserted + value_column: tup_inserted + description: Number of rows inserted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_returned + value_column: tup_returned + description: Number of rows returned by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_updated + value_column: tup_updated + description: Number of rows updated by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_commit + value_column: xact_commit + description: Number of transactions in this database that have been committed + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_rollback + value_column: xact_rollback + description: Number of transactions in this database that have been rolled back + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index f3aadb0142..a03f657397 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -24,20 +24,23 @@ var fiveSecondMetrics json.RawMessage //go:embed "generated/postgres_5m_metrics.json" var fiveMinuteMetrics json.RawMessage -//go:embed "generated/gte_pg17_metrics.json" -var gtePG17 json.RawMessage +//go:embed "generated/gte_pg17_fast_metrics.json" +var gtePG17Fast json.RawMessage -//go:embed "generated/lt_pg17_metrics.json" -var ltPG17 json.RawMessage +//go:embed "generated/lt_pg17_fast_metrics.json" +var ltPG17Fast json.RawMessage -//go:embed "generated/eq_pg16_metrics.json" -var eqPG16 json.RawMessage +//go:embed "generated/eq_pg16_fast_metrics.json" +var eqPG16Fast json.RawMessage -//go:embed "generated/gte_pg16_metrics.json" -var gtePG16 json.RawMessage +//go:embed "generated/gte_pg16_slow_metrics.json" +var gtePG16Slow json.RawMessage -//go:embed "generated/lt_pg16_metrics.json" -var ltPG16 json.RawMessage +//go:embed "generated/lt_pg16_fast_metrics.json" +var ltPG16Fast json.RawMessage + +//go:embed "generated/lt_pg16_slow_metrics.json" +var ltPG16Slow json.RawMessage type queryMetrics struct { Metrics []*metric `json:"metrics"` @@ -70,28 +73,38 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust fiveMinuteMetricsClone := slices.Clone(fiveMinuteMetrics) if inCluster.Spec.PostgresVersion >= 17 { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG17) + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG17Fast) + if err != nil { + log.Error(err, "error compiling metrics for postgres 17 and greater") + } } else { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG17) - } - if err != nil { - log.Error(err, "error compiling postgres metrics") + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG17Fast) + if err != nil { + log.Error(err, "error compiling metrics for postgres versions less than 17") + } } if inCluster.Spec.PostgresVersion == 16 { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, eqPG16) + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, eqPG16Fast) } if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling metrics for postgres 16") } if inCluster.Spec.PostgresVersion >= 16 { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG16) + fiveMinuteMetricsClone, err = appendToJSONArray(fiveMinuteMetricsClone, gtePG16Slow) + if err != nil { + log.Error(err, "error compiling metrics for postgres 16 and greater") + } } else { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG16) - } - if err != nil { - log.Error(err, "error compiling postgres metrics") + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG16Fast) + if err != nil { + log.Error(err, "error compiling fast metrics for postgres versions less than 16") + } + fiveMinuteMetricsClone, err = appendToJSONArray(fiveMinuteMetricsClone, ltPG16Slow) + if err != nil { + log.Error(err, "error compiling slow metrics for postgres versions less than 16") + } } // Remove any queries that user has specified in the spec diff --git a/internal/collector/postgres_metrics_test.go b/internal/collector/postgres_metrics_test.go index 63a6c654f3..8a22f42b52 100644 --- a/internal/collector/postgres_metrics_test.go +++ b/internal/collector/postgres_metrics_test.go @@ -17,9 +17,9 @@ func TestRemoveMetricsFromQueries(t *testing.T) { err := json.Unmarshal(fiveMinuteMetrics, &fiveMinuteMetricsArr) assert.NilError(t, err) - assert.Equal(t, len(fiveMinuteMetricsArr), 4) + assert.Equal(t, len(fiveMinuteMetricsArr), 3) newArr := removeMetricsFromQueries([]string{"ccp_database_size_bytes"}, fiveMinuteMetricsArr) - assert.Equal(t, len(newArr), 3) + assert.Equal(t, len(newArr), 2) t.Run("DeleteOneMetric", func(t *testing.T) { sqlMetricsData := `[ From 87016c8df55f607af789ef882f1119fa1424b6ed Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Tue, 6 May 2025 16:05:32 -0700 Subject: [PATCH 31/79] OTel metrics: bump initial_delay time on sqlquery receivers to avoid ccp_monitoring authentication errors. --- internal/collector/postgres_metrics.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index a03f657397..098d1ff2be 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -155,7 +155,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust MonitoringUser), "collection_interval": "5s", // Give Postgres time to finish setup. - "initial_delay": "10s", + "initial_delay": "15s", "queries": slices.Clone(fiveSecondMetricsClone), } @@ -166,7 +166,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust MonitoringUser), "collection_interval": "300s", // Give Postgres time to finish setup. - "initial_delay": "10s", + "initial_delay": "15s", "queries": slices.Clone(fiveMinuteMetricsClone), } @@ -196,7 +196,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust MonitoringUser), "collection_interval": querySet.CollectionInterval, // Give Postgres time to finish setup. - "initial_delay": "10s", + "initial_delay": "15s", "queries": "${file:/etc/otel-collector/" + querySet.Name + "/" + querySet.Queries.Key + "}", } From 67812de937ce7d8c3baa4c501b4a85fb6772ca34 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 7 May 2025 11:05:36 -0700 Subject: [PATCH 32/79] OTel kuttl test: move check for 5 minute metric to last to avoid failures. --- ...=> 02-assert-repo-host-does-not-logs.yaml} | 0 .../otel-logging-and-metrics/03--backup.yaml | 6 ++++ ...=> 04-assert-repo-host-contains-logs.yaml} | 0 .../otel-logging-and-metrics/05--backup.yaml | 6 ---- ...gbouncer.yaml => 05-assert-pgbouncer.yaml} | 0 ...-instance.yaml => 06-assert-instance.yaml} | 33 ++++++++++--------- ...cluster.yaml => 03--annotate-cluster.yaml} | 0 ...ompleted.yaml => 03-backup-completed.yaml} | 0 8 files changed, 23 insertions(+), 22 deletions(-) rename testing/kuttl/e2e/otel-logging-and-metrics/{04-assert-repo-host-does-not-logs.yaml => 02-assert-repo-host-does-not-logs.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{06-assert-repo-host-contains-logs.yaml => 04-assert-repo-host-contains-logs.yaml} (100%) delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{03-assert-pgbouncer.yaml => 05-assert-pgbouncer.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{02-assert-instance.yaml => 06-assert-instance.yaml} (98%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{05--annotate-cluster.yaml => 03--annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{05-backup-completed.yaml => 03-backup-completed.yaml} (100%) diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-does-not-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-repo-host-does-not-logs.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-does-not-logs.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/02-assert-repo-host-does-not-logs.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml new file mode 100644 index 0000000000..95daf31a6a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/03--annotate-cluster.yaml +assert: +- files/03-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-repo-host-contains-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-contains-logs.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/06-assert-repo-host-contains-logs.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-contains-logs.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml deleted file mode 100644 index 166ef662a5..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/05--annotate-cluster.yaml -assert: -- files/05-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/03-assert-pgbouncer.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml similarity index 98% rename from testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml index 235d07e47e..096c024d89 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-instance.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml @@ -6,7 +6,8 @@ commands: # and 5s queries are present, as well as patroni metrics. # Then, check the collector logs for patroni, pgbackrest, and postgres logs. # Finally, ensure the monitoring user exists and is configured. -- script: | +- timeout: 400 + script: | retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } @@ -22,21 +23,6 @@ commands: exit 1 } - scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) - { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { - retry "5 second metric not found" - exit 1 - } - { contains "${scrape_metrics}" 'ccp_database_size_bytes'; } || { - retry "5 minute metric not found" - exit 1 - } - { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { - retry "patroni metric not found" - exit 1 - } - logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) { contains "${logs}" 'InstrumentationScope patroni'; } || { retry "patroni logs not found" @@ -51,6 +37,21 @@ commands: exit 1 } + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { + retry "5 second metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { + retry "patroni metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_database_size_bytes'; } || { + retry "5 minute metric not found" + exit 1 + } + kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \ -- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL' DO $$ diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/05--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/03--annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/05--annotate-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/03--annotate-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/05-backup-completed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/03-backup-completed.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/05-backup-completed.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/03-backup-completed.yaml From 4ff33211d4c5c9bf25e3eb887a51c260684fe761 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Thu, 8 May 2025 18:05:17 -0700 Subject: [PATCH 33/79] OTel: Add log context to body in transform processor config to satisfy collector 0.125.0. --- .../generated/postgres_logs_transforms.json | 2 +- internal/collector/postgres_logs_transforms.yaml | 4 ++-- internal/collector/postgres_test.go | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/collector/generated/postgres_logs_transforms.json b/internal/collector/generated/postgres_logs_transforms.json index f7409174eb..066c067399 100644 --- a/internal/collector/generated/postgres_logs_transforms.json +++ b/internal/collector/generated/postgres_logs_transforms.json @@ -1 +1 @@ -[{"conditions":["body[\"format\"] == \"csv\""],"statements":["set(log.cache, ParseCSV(log.body[\"original\"], log.body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(log.cache, ExtractPatterns(log.cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(log.cache[\"connection_from\"]) \u003e 0","set(log.cache[\"remote_host\"], Substring(log.cache[\"connection_from\"], 0, Len(log.cache[\"connection_from\"]) - Len(log.cache[\"remote_port\"]) - 1)) where Len(log.cache[\"connection_from\"]) \u003e 0 and IsString(log.cache[\"remote_port\"])","set(log.cache[\"remote_host\"], log.cache[\"connection_from\"]) where Len(log.cache[\"connection_from\"]) \u003e 0 and not IsString(log.cache[\"remote_host\"])","merge_maps(log.cache, ExtractPatterns(log.cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(log.cache[\"location\"]) \u003e 0","set(log.cache[\"cursor_position\"], Double(log.cache[\"cursor_position\"])) where IsMatch(log.cache[\"cursor_position\"], \"^[0-9.]+$\")","set(log.cache[\"file_line_num\"], Double(log.cache[\"file_line_num\"])) where IsMatch(log.cache[\"file_line_num\"], \"^[0-9.]+$\")","set(log.cache[\"internal_position\"], Double(log.cache[\"internal_position\"])) where IsMatch(log.cache[\"internal_position\"], \"^[0-9.]+$\")","set(log.cache[\"leader_pid\"], Double(log.cache[\"leader_pid\"])) where IsMatch(log.cache[\"leader_pid\"], \"^[0-9.]+$\")","set(log.cache[\"line_num\"], Double(log.cache[\"line_num\"])) where IsMatch(log.cache[\"line_num\"], \"^[0-9.]+$\")","set(log.cache[\"pid\"], Double(log.cache[\"pid\"])) where IsMatch(log.cache[\"pid\"], \"^[0-9.]+$\")","set(log.cache[\"query_id\"], Double(log.cache[\"query_id\"])) where IsMatch(log.cache[\"query_id\"], \"^[0-9.]+$\")","set(log.cache[\"remote_port\"], Double(log.cache[\"remote_port\"])) where IsMatch(log.cache[\"remote_port\"], \"^[0-9.]+$\")","set(log.body[\"parsed\"], log.cache)"]},{"statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(log.cache, log.body[\"parsed\"]) where log.body[\"format\"] == \"csv\"","set(log.cache, ParseJSON(log.body[\"original\"])) where log.body[\"format\"] == \"json\"","set(log.severity_text, log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"DEBUG5\"","set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == \"DEBUG4\"","set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == \"DEBUG3\"","set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == \"DEBUG2\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG1\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\" or log.severity_text == \"LOG\"","set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == \"NOTICE\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARNING\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == \"FATAL\"","set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == \"PANIC\"","set(log.time, Time(log.cache[\"timestamp\"], \"%F %T.%L %Z\")) where IsString(log.cache[\"timestamp\"])","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(log.attributes[\"log.record.original\"], log.body[\"original\"])","set(log.body, log.cache)","set(log.attributes[\"client.address\"], log.body[\"remote_host\"]) where IsString(log.body[\"remote_host\"])","set(log.attributes[\"client.port\"], Int(log.body[\"remote_port\"])) where IsDouble(log.body[\"remote_port\"])","set(log.attributes[\"code.filepath\"], log.body[\"file_name\"]) where IsString(log.body[\"file_name\"])","set(log.attributes[\"code.function\"], log.body[\"func_name\"]) where IsString(log.body[\"func_name\"])","set(log.attributes[\"code.lineno\"], Int(log.body[\"file_line_num\"])) where IsDouble(log.body[\"file_line_num\"])","set(log.attributes[\"db.namespace\"], log.body[\"dbname\"]) where IsString(log.body[\"dbname\"])","set(log.attributes[\"db.response.status_code\"], log.body[\"state_code\"]) where IsString(log.body[\"state_code\"])","set(log.attributes[\"process.creation.time\"], Concat([ Substring(log.body[\"session_start\"], 0, 10), \"T\", Substring(log.body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(log.body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(log.attributes[\"process.pid\"], Int(log.body[\"pid\"])) where IsDouble(log.body[\"pid\"])","set(log.attributes[\"process.title\"], log.body[\"ps\"]) where IsString(log.body[\"ps\"])","set(log.attributes[\"user.name\"], log.body[\"user\"]) where IsString(log.body[\"user\"])"]},{"conditions":["Len(body[\"message\"]) \u003e 7 and Substring(body[\"message\"], 0, 7) == \"AUDIT: \""],"statements":["set(log.body[\"pgaudit\"], ParseCSV(Substring(log.body[\"message\"], 7, Len(log.body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(log.body[\"pgaudit\"]) \u003e 0"]}] +[{"conditions":["log.body[\"format\"] == \"csv\""],"statements":["set(log.cache, ParseCSV(log.body[\"original\"], log.body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(log.cache, ExtractPatterns(log.cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(log.cache[\"connection_from\"]) \u003e 0","set(log.cache[\"remote_host\"], Substring(log.cache[\"connection_from\"], 0, Len(log.cache[\"connection_from\"]) - Len(log.cache[\"remote_port\"]) - 1)) where Len(log.cache[\"connection_from\"]) \u003e 0 and IsString(log.cache[\"remote_port\"])","set(log.cache[\"remote_host\"], log.cache[\"connection_from\"]) where Len(log.cache[\"connection_from\"]) \u003e 0 and not IsString(log.cache[\"remote_host\"])","merge_maps(log.cache, ExtractPatterns(log.cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(log.cache[\"location\"]) \u003e 0","set(log.cache[\"cursor_position\"], Double(log.cache[\"cursor_position\"])) where IsMatch(log.cache[\"cursor_position\"], \"^[0-9.]+$\")","set(log.cache[\"file_line_num\"], Double(log.cache[\"file_line_num\"])) where IsMatch(log.cache[\"file_line_num\"], \"^[0-9.]+$\")","set(log.cache[\"internal_position\"], Double(log.cache[\"internal_position\"])) where IsMatch(log.cache[\"internal_position\"], \"^[0-9.]+$\")","set(log.cache[\"leader_pid\"], Double(log.cache[\"leader_pid\"])) where IsMatch(log.cache[\"leader_pid\"], \"^[0-9.]+$\")","set(log.cache[\"line_num\"], Double(log.cache[\"line_num\"])) where IsMatch(log.cache[\"line_num\"], \"^[0-9.]+$\")","set(log.cache[\"pid\"], Double(log.cache[\"pid\"])) where IsMatch(log.cache[\"pid\"], \"^[0-9.]+$\")","set(log.cache[\"query_id\"], Double(log.cache[\"query_id\"])) where IsMatch(log.cache[\"query_id\"], \"^[0-9.]+$\")","set(log.cache[\"remote_port\"], Double(log.cache[\"remote_port\"])) where IsMatch(log.cache[\"remote_port\"], \"^[0-9.]+$\")","set(log.body[\"parsed\"], log.cache)"]},{"statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(log.cache, log.body[\"parsed\"]) where log.body[\"format\"] == \"csv\"","set(log.cache, ParseJSON(log.body[\"original\"])) where log.body[\"format\"] == \"json\"","set(log.severity_text, log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"DEBUG5\"","set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == \"DEBUG4\"","set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == \"DEBUG3\"","set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == \"DEBUG2\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG1\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\" or log.severity_text == \"LOG\"","set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == \"NOTICE\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARNING\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == \"FATAL\"","set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == \"PANIC\"","set(log.time, Time(log.cache[\"timestamp\"], \"%F %T.%L %Z\")) where IsString(log.cache[\"timestamp\"])","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(log.attributes[\"log.record.original\"], log.body[\"original\"])","set(log.body, log.cache)","set(log.attributes[\"client.address\"], log.body[\"remote_host\"]) where IsString(log.body[\"remote_host\"])","set(log.attributes[\"client.port\"], Int(log.body[\"remote_port\"])) where IsDouble(log.body[\"remote_port\"])","set(log.attributes[\"code.filepath\"], log.body[\"file_name\"]) where IsString(log.body[\"file_name\"])","set(log.attributes[\"code.function\"], log.body[\"func_name\"]) where IsString(log.body[\"func_name\"])","set(log.attributes[\"code.lineno\"], Int(log.body[\"file_line_num\"])) where IsDouble(log.body[\"file_line_num\"])","set(log.attributes[\"db.namespace\"], log.body[\"dbname\"]) where IsString(log.body[\"dbname\"])","set(log.attributes[\"db.response.status_code\"], log.body[\"state_code\"]) where IsString(log.body[\"state_code\"])","set(log.attributes[\"process.creation.time\"], Concat([ Substring(log.body[\"session_start\"], 0, 10), \"T\", Substring(log.body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(log.body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(log.attributes[\"process.pid\"], Int(log.body[\"pid\"])) where IsDouble(log.body[\"pid\"])","set(log.attributes[\"process.title\"], log.body[\"ps\"]) where IsString(log.body[\"ps\"])","set(log.attributes[\"user.name\"], log.body[\"user\"]) where IsString(log.body[\"user\"])"]},{"conditions":["Len(log.body[\"message\"]) \u003e 7 and Substring(log.body[\"message\"], 0, 7) == \"AUDIT: \""],"statements":["set(log.body[\"pgaudit\"], ParseCSV(Substring(log.body[\"message\"], 7, Len(log.body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(log.body[\"pgaudit\"]) \u003e 0"]}] diff --git a/internal/collector/postgres_logs_transforms.yaml b/internal/collector/postgres_logs_transforms.yaml index c8178f2d6e..c58f1a1a7b 100644 --- a/internal/collector/postgres_logs_transforms.yaml +++ b/internal/collector/postgres_logs_transforms.yaml @@ -8,7 +8,7 @@ # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme - conditions: - - body["format"] == "csv" + - log.body["format"] == "csv" statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", mode="strict")) @@ -196,7 +196,7 @@ # https://github.com/pgaudit/pgaudit/blame/17.0/pgaudit.c#L876 # TODO(postgres-18): Check this prefix and update the URL above. - >- - Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: " + Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == "AUDIT: " statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - >- diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 83deb349ad..222b263e25 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -123,7 +123,7 @@ processors: transform/postgres_logs: log_statements: - conditions: - - body["format"] == "csv" + - log.body["format"] == "csv" statements: - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", mode="strict")) @@ -203,8 +203,8 @@ processors: - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: - "' + - 'Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == + "AUDIT: "' statements: - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", @@ -383,7 +383,7 @@ processors: transform/postgres_logs: log_statements: - conditions: - - body["format"] == "csv" + - log.body["format"] == "csv" statements: - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", mode="strict")) @@ -463,8 +463,8 @@ processors: - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: - "' + - 'Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == + "AUDIT: "' statements: - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", From 105a3d320f6e1bf16ed85afe5fba0bd7a63a3851 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Wed, 14 May 2025 15:06:58 -0500 Subject: [PATCH 34/79] Consolidate .gitattributes at the top level The top-level file can define macros that combine multiple attributes. See: d3ea3a90b613d66d3f408886f6b8a2ceefb86753 --- .gitattributes | 6 ++++++ internal/collector/generated/.gitattributes | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 .gitattributes delete mode 100644 internal/collector/generated/.gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..c698441f73 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# https://docs.gitlab.com/user/project/merge_requests/changes#collapse-generated-files +# https://github.com/github-linguist/linguist/blob/-/docs/overrides.md#generated-code +# https://git-scm.com/docs/gitattributes#_defining_macro_attributes +[attr]generated gitlab-generated linguist-generated + +/internal/collector/generated/*.json generated diff --git a/internal/collector/generated/.gitattributes b/internal/collector/generated/.gitattributes deleted file mode 100644 index 49e9f142dd..0000000000 --- a/internal/collector/generated/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# https://docs.github.com/en/repositories/working-with-files/managing-files/customizing-how-changed-files-appear-on-github -/*.json linguist-generated=true From 0d5271534cd90a702a124c32d023f3b10187a5e6 Mon Sep 17 00:00:00 2001 From: tony-landreth Date: Mon, 19 May 2025 13:02:55 -0400 Subject: [PATCH 35/79] Removes GH files Now that postgres-operator back branches live in Gitlab there's no need to maintain GitHub files in these branches. Issue: PGO-1805 --- .github/ISSUE_TEMPLATE/bug_report.md | 60 ----- .github/ISSUE_TEMPLATE/feature_request.md | 42 ---- .../support---question-and-answer.md | 35 --- .github/actions/awk-matcher.json | 13 -- .github/actions/k3d/action.yaml | 94 -------- .github/actions/trivy/action.yaml | 107 --------- .github/dependabot.yml | 16 -- .github/pull_request_template.md | 30 --- .github/workflows/codeql-analysis.yaml | 40 ---- .github/workflows/govulncheck.yaml | 46 ---- .github/workflows/lint.yaml | 39 ---- .github/workflows/test.yaml | 211 ------------------ .github/workflows/trivy.yaml | 102 --------- 13 files changed, 835 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.md delete mode 100644 .github/ISSUE_TEMPLATE/support---question-and-answer.md delete mode 100644 .github/actions/awk-matcher.json delete mode 100644 .github/actions/k3d/action.yaml delete mode 100644 .github/actions/trivy/action.yaml delete mode 100644 .github/dependabot.yml delete mode 100644 .github/pull_request_template.md delete mode 100644 .github/workflows/codeql-analysis.yaml delete mode 100644 .github/workflows/govulncheck.yaml delete mode 100644 .github/workflows/lint.yaml delete mode 100644 .github/workflows/test.yaml delete mode 100644 .github/workflows/trivy.yaml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 30e551a122..0000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: Report a Bug -about: Found an issue? Let us fix it. ---- - -Please ensure you do the following when reporting a bug: - -- [ ] Provide a concise description of what the bug is. -- [ ] Provide information about your environment. -- [ ] Provide clear steps to reproduce the bug. -- [ ] Attach applicable logs. Please do not attach screenshots showing logs unless you are unable to copy and paste the log data. -- [ ] Ensure any code / output examples are [properly formatted](https://docs.github.com/en/github/writing-on-github/basic-writing-and-formatting-syntax#quoting-code) for legibility. - -Note that some logs needed to troubleshoot may be found in the `/pgdata//pg_log` directory on your Postgres instance. - -An incomplete bug report can lead to delays in resolving the issue or the closing of a ticket, so please be as detailed as possible. - -If you are looking for [general support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/), please view the [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) page for where you can ask questions. - -Thanks for reporting the issue, we're looking forward to helping you! - -## Overview - -Add a concise description of what the bug is. - -## Environment - -Please provide the following details: - -- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) -- Platform Version: (e.g. `1.20.3`, `4.7.0`) -- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) -- Postgres Version (e.g. `15`) -- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) - -## Steps to Reproduce - -### REPRO - -Provide steps to get to the error condition: - -1. Run `...` -1. Do `...` -1. Try `...` - -### EXPECTED - -1. Provide the behavior that you expected. - -### ACTUAL - -1. Describe what actually happens - -## Logs - -Please provided appropriate log output or any configuration files that may help troubleshoot the issue. **DO NOT** include sensitive information, such as passwords. - -## Additional Information - -Please provide any additional information that may be helpful. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 4de2077c77..0000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: Feature Request -about: Help us improve PGO! ---- - -Have an idea to improve PGO? We'd love to hear it! We're going to need some information from you to learn more about your feature requests. - -Please be sure you've done the following: - -- [ ] Provide a concise description of your feature request. -- [ ] Describe your use case. Detail the problem you are trying to solve. -- [ ] Describe how you envision that the feature would work. -- [ ] Provide general information about your current PGO environment. - -## Overview - -Provide a concise description of your feature request. - -## Use Case - -Describe your use case. Why do you want this feature? What problem will it solve? Why will it help you? Why will it make it easier to use PGO? - -## Desired Behavior - -Describe how the feature would work. How do you envision interfacing with it? - -## Environment - -Tell us about your environment: - -Please provide the following details: - -- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) -- Platform Version: (e.g. `1.20.3`, `4.7.0`) -- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) -- Postgres Version (e.g. `15`) -- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) -- Number of Postgres clusters: (`XYZ`) - -## Additional Information - -Please provide any additional information that may be helpful. diff --git a/.github/ISSUE_TEMPLATE/support---question-and-answer.md b/.github/ISSUE_TEMPLATE/support---question-and-answer.md deleted file mode 100644 index 271caa9029..0000000000 --- a/.github/ISSUE_TEMPLATE/support---question-and-answer.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Support -about: "Learn how to interact with the PGO community" ---- - -If you believe you have found have found a bug, please open up [Bug Report](https://github.com/CrunchyData/postgres-operator/issues/new?template=bug_report.md) - -If you have a feature request, please open up a [Feature Request](https://github.com/CrunchyData/postgres-operator/issues/new?template=feature_request.md) - -You can find information about general PGO [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) at: - -[https://access.crunchydata.com/documentation/postgres-operator/latest/support/](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) - -## Questions - -For questions that are neither bugs nor feature requests, please be sure to - -- [ ] Provide information about your environment (see below for more information). -- [ ] Provide any steps or other relevant details related to your question. -- [ ] Attach logs, where applicable. Please do not attach screenshots showing logs unless you are unable to copy and paste the log data. -- [ ] Ensure any code / output examples are [properly formatted](https://docs.github.com/en/github/writing-on-github/basic-writing-and-formatting-syntax#quoting-code) for legibility. - -Besides Pod logs, logs may also be found in the `/pgdata/pg/log` directory on your Postgres instance. - -If you are looking for [general support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/), please view the [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) page for where you can ask questions. - -### Environment - -Please provide the following details: - -- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) -- Platform Version: (e.g. `1.20.3`, `4.7.0`) -- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) -- Postgres Version (e.g. `15`) -- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) diff --git a/.github/actions/awk-matcher.json b/.github/actions/awk-matcher.json deleted file mode 100644 index 852a723577..0000000000 --- a/.github/actions/awk-matcher.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "problemMatcher": [ - { - "owner": "awk", - "pattern": [ - { - "regexp": "^([^:]+):([^ ]+) (([^:]+):.*)$", - "file": 1, "line": 2, "message": 3, "severity": 4 - } - ] - } - ] -} diff --git a/.github/actions/k3d/action.yaml b/.github/actions/k3d/action.yaml deleted file mode 100644 index 395d5f1116..0000000000 --- a/.github/actions/k3d/action.yaml +++ /dev/null @@ -1,94 +0,0 @@ -name: k3d -description: Start k3s using k3d -inputs: - k3d-tag: - default: latest - required: true - description: > - Git tag from https://github.com/k3d-io/k3d/releases or "latest" - k3s-channel: - default: latest - required: true - description: > - https://docs.k3s.io/upgrades/manual#release-channels - prefetch-images: - required: true - description: > - Each line is the name of an image to fetch onto all Kubernetes nodes - prefetch-timeout: - default: 90s - required: true - description: > - Amount of time to wait for images to be fetched - -outputs: - k3d-version: - value: ${{ steps.k3d.outputs.k3d }} - description: > - K3d version - kubernetes-version: - value: ${{ steps.k3s.outputs.server }} - description: > - Kubernetes server version, as reported by the Kubernetes API - pause-image: - value: ${{ steps.k3s.outputs.pause-image }} - description: > - Pause image for prefetch images DaemonSet - -runs: - using: composite - steps: - - id: k3d - name: Install k3d - shell: bash - env: - K3D_TAG: ${{ inputs.k3d-tag }} - run: | - curl --fail --silent https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | - TAG="${K3D_TAG#latest}" bash - k3d version | awk '{ print "${tolower($1)}=${$3}" >> $GITHUB_OUTPUT }' - - - id: k3s - name: Start k3s - shell: bash - run: | - k3d cluster create --image '+${{ inputs.k3s-channel }}' --no-lb --timeout=2m --wait - kubectl version | awk '{ print "${tolower($1)}=${$3}" >> $GITHUB_OUTPUT }' - - PAUSE_IMAGE=$(docker exec $(k3d node list --output json | jq --raw-output 'first.name') \ - k3s agent --help | awk '$1 == "--pause-image" { - match($0, /default: "[^"]*"/); - print substr($0, RSTART+10, RLENGTH-11) - }') - echo "pause-image=${PAUSE_IMAGE}" >> $GITHUB_OUTPUT - - - name: Prefetch container images - shell: bash - env: - INPUT_IMAGES: ${{ inputs.prefetch-images }} - INPUT_TIMEOUT: ${{ inputs.prefetch-timeout }} - run: | - jq <<< "$INPUT_IMAGES" --raw-input 'select(. != "")' | - jq --slurp \ - --arg pause '${{ steps.k3s.outputs.pause-image }}' \ - --argjson labels '{"name":"image-prefetch"}' \ - --argjson name '"image-prefetch"' \ - '{ - apiVersion: "apps/v1", kind: "DaemonSet", - metadata: { name: $name, labels: $labels }, - spec: { - selector: { matchLabels: $labels }, - template: { - metadata: { labels: $labels }, - spec: { - initContainers: to_entries | map({ - name: "c\(.key)", image: .value, command: ["true"], - }), - containers: [{ name: "pause", image: $pause }] - } - } - } - }' | - kubectl create --filename=- - kubectl rollout status daemonset.apps/image-prefetch --timeout "$INPUT_TIMEOUT" || - kubectl describe daemonset.apps/image-prefetch diff --git a/.github/actions/trivy/action.yaml b/.github/actions/trivy/action.yaml deleted file mode 100644 index d5d51e0441..0000000000 --- a/.github/actions/trivy/action.yaml +++ /dev/null @@ -1,107 +0,0 @@ -name: Trivy -description: Scan this project using Trivy - -# The Trivy team maintains an action, but it has trouble caching its vulnerability data: -# https://github.com/aquasecurity/trivy-action/issues/389 -# -# The action below uses any recent cache matching `cache-prefix` and calculates a cache key -# derived from the data Trivy downloads. - -inputs: - cache: - default: restore,success,use - description: >- - What Trivy data to cache; one or more of restore, save, success, or use. - - database: - default: update - description: >- - How Trivy should handle its data; one of update or skip. - - setup: - default: v0.57.1,cache - description: >- - How to install Trivy; one or more of version, none, or cache. - - cache-directory: - default: ${{ github.workspace }}/.cache/trivy - - cache-prefix: - default: cache-trivy - - scan-target: - default: . - - scan-type: - default: filesystem - -runs: - using: composite - steps: - # Parse list inputs as separated by commas and spaces. - # Select the maximum version-looking string from `inputs.setup`. - - id: parsed - shell: bash - run: | - # Validate inputs - ( - <<< '${{ inputs.cache }}' jq -rRsS '"cache=\(split("[,\\s]+"; "") - [""])"' - <<< '${{ inputs.setup }}' jq -rRsS ' - "setup=\(split("[,\\s]+"; "") - [""])", - "version=\(split("[,\\s]+"; "") | max_by(split("[v.]"; "") | map(tonumber?)))" - ' - ) | tee --append $GITHUB_OUTPUT - - # Install Trivy as requested. - - if: ${{ ! contains(fromJSON(steps.parsed.outputs.setup), 'none') }} - uses: aquasecurity/setup-trivy@v0.2.2 - with: - cache: ${{ contains(fromJSON(steps.parsed.outputs.setup), 'cache') }} - version: ${{ steps.parsed.outputs.version }} - - # Restore a recent cache beginning with the prefix. - - id: restore - if: ${{ contains(fromJSON(steps.parsed.outputs.cache), 'restore') }} - uses: actions/cache/restore@v4 - with: - path: ${{ inputs.cache-directory }} - key: ${{ inputs.cache-prefix }}- - - - id: trivy - shell: bash - env: - TRIVY_CACHE_DIR: >- - ${{ contains(fromJSON(steps.parsed.outputs.cache), 'use') && inputs.cache-directory || '' }} - TRIVY_SKIP_CHECK_UPDATE: ${{ inputs.database == 'skip' }} - TRIVY_SKIP_DB_UPDATE: ${{ inputs.database == 'skip' }} - TRIVY_SKIP_JAVA_DB_UPDATE: ${{ inputs.database == 'skip' }} - run: | - # Run Trivy - trivy '${{ inputs.scan-type }}' '${{ inputs.scan-target }}' || result=$? - - checksum=$([[ -z "${TRIVY_CACHE_DIR}" ]] || cat "${TRIVY_CACHE_DIR}/"*/metadata.json | sha256sum) - echo 'cache-key=${{ inputs.cache-prefix }}-'"${checksum%% *}" >> $GITHUB_OUTPUT - - exit "${result-0}" - - # Save updated data to the cache when requested. - - if: >- - ${{ - steps.restore.outcome == 'success' && - steps.restore.outputs.cache-matched-key == steps.trivy.outputs.cache-key - }} - shell: bash - run: | - # Cache hit on ${{ steps.restore.outputs.cache-matched-key }} - - if: >- - ${{ - steps.restore.outputs.cache-matched-key != steps.trivy.outputs.cache-key && - ( - (contains(fromJSON(steps.parsed.outputs.cache), 'save') && !cancelled()) || - (contains(fromJSON(steps.parsed.outputs.cache), 'success') && success()) - ) - }} - uses: actions/cache/save@v4 - with: - key: ${{ steps.trivy.outputs.cache-key }} - path: ${{ inputs.cache-directory }} diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 639a059edc..0000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,16 +0,0 @@ -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/customizing-dependency-updates -# -# See: https://www.github.com/dependabot/dependabot-core/issues/4605 ---- -# yaml-language-server: $schema=https://json.schemastore.org/dependabot-2.0.json -version: 2 -updates: - - package-ecosystem: github-actions - directory: / - schedule: - interval: weekly - day: tuesday - groups: - all-github-actions: - patterns: ['*'] diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index b03369bf09..0000000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,30 +0,0 @@ -**Checklist:** - - - - [ ] Have you added an explanation of what your changes do and why you'd like them to be included? - - [ ] Have you updated or added documentation for the change, as applicable? - - [ ] Have you tested your changes on all related environments with successful results, as applicable? - - [ ] Have you added automated tests? - - - -**Type of Changes:** - - - - [ ] New feature - - [ ] Bug fix - - [ ] Documentation - - [ ] Testing enhancement - - [ ] Other - - -**What is the current behavior (link to any open issues here)?** - - - -**What is the new behavior (if this is a feature change)?** -- [ ] Breaking change (fix or feature that would cause existing functionality to change) - - - -**Other Information**: diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml deleted file mode 100644 index 78079bd4bc..0000000000 --- a/.github/workflows/codeql-analysis.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# https://codeql.github.com -name: CodeQL - -on: - pull_request: - push: - branches: - - main - schedule: - - cron: '10 18 * * 2' - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - -jobs: - analyze: - if: ${{ github.repository == 'CrunchyData/postgres-operator' }} - permissions: - actions: read - contents: read - security-events: write - - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: { languages: go } - - - name: Autobuild - # This action calls `make` which runs our "help" target. - uses: github/codeql-action/autobuild@v3 - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/govulncheck.yaml b/.github/workflows/govulncheck.yaml deleted file mode 100644 index df81b90e53..0000000000 --- a/.github/workflows/govulncheck.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# https://go.dev/security/vuln -name: govulncheck - -on: - pull_request: - push: - branches: - - main - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - -jobs: - vulnerabilities: - if: ${{ github.repository == 'CrunchyData/postgres-operator' }} - permissions: - security-events: write - - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - # Install Go and produce a SARIF report. This fails only when the tool is - # unable to scan. - - name: Prepare report - uses: golang/govulncheck-action@v1 - with: - output-file: 'govulncheck-results.sarif' - output-format: 'sarif' - repo-checkout: false - - # Submit the SARIF report to GitHub code scanning. Pull request checks - # succeed or fail according to branch protection rules. - # - https://docs.github.com/en/code-security/code-scanning - - name: Upload results to GitHub - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: 'govulncheck-results.sarif' - - # Print any detected vulnerabilities to the workflow log. This step fails - # when the tool detects a vulnerability in code that is called. - # - https://go.dev/blog/govulncheck - - name: Log results - run: govulncheck --format text --show verbose ./... diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index fa84193d09..0000000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: Linters - -on: - pull_request: - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - -jobs: - golangci-lint: - runs-on: ubuntu-24.04 - permissions: - contents: read - checks: write - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - - uses: golangci/golangci-lint-action@v6 - with: - version: latest - args: --timeout=5m - - # Count issues reported by disabled linters. The command always - # exits zero to ensure it does not fail the pull request check. - - name: Count non-blocking issues - run: | - golangci-lint run --config .golangci.next.yaml \ - --issues-exit-code 0 \ - --max-issues-per-linter 0 \ - --max-same-issues 0 \ - --out-format json | - jq --sort-keys 'reduce .Issues[] as $i ({}; .[$i.FromLinter] += 1)' | - awk >> "${GITHUB_STEP_SUMMARY}" ' - NR == 1 { print "```json" } { print } END { if (NR > 0) print "```" } - ' || true diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 2ef4f1c6e6..0000000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,211 +0,0 @@ -name: Tests - -on: - pull_request: - branches: - - REL_5_8 - push: - branches: - - REL_5_8 - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - -jobs: - go-test: - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - run: make check - - run: make check-generate - - - name: Ensure go.mod is tidy - run: go mod tidy && git diff --exit-code -- go.mod - - kubernetes-api: - runs-on: ubuntu-24.04 - needs: [go-test] - strategy: - fail-fast: false - matrix: - kubernetes: ['default'] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - run: go mod download - - run: ENVTEST_K8S_VERSION="${KUBERNETES#default}" make check-envtest - env: - KUBERNETES: "${{ matrix.kubernetes }}" - GO_TEST: go test --coverprofile 'envtest.coverage' --coverpkg ./internal/... - - # Upload coverage to GitHub - - run: gzip envtest.coverage - - uses: actions/upload-artifact@v4 - with: - name: "~coverage~kubernetes-api=${{ matrix.kubernetes }}" - path: envtest.coverage.gz - retention-days: 1 - - kubernetes-k3d: - if: "${{ github.repository == 'CrunchyData/postgres-operator' }}" - runs-on: ubuntu-24.04 - needs: [go-test] - strategy: - fail-fast: false - matrix: - kubernetes: [v1.31, v1.28] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - - name: Start k3s - uses: ./.github/actions/k3d - with: - k3s-channel: "${{ matrix.kubernetes }}" - prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516 - - - run: make createnamespaces check-envtest-existing - env: - PGO_TEST_TIMEOUT_SCALE: 1.2 - GO_TEST: go test --coverprofile 'envtest-existing.coverage' --coverpkg ./internal/... - - # Upload coverage to GitHub - - run: gzip envtest-existing.coverage - - uses: actions/upload-artifact@v4 - with: - name: "~coverage~kubernetes-k3d=${{ matrix.kubernetes }}" - path: envtest-existing.coverage.gz - retention-days: 1 - - kuttl-k3d: - runs-on: ubuntu-24.04 - needs: [go-test] - strategy: - fail-fast: false - matrix: - kubernetes: [v1.32, v1.31, v1.30, v1.29, v1.28] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - - name: Start k3s - uses: ./.github/actions/k3d - with: - k3s-channel: "${{ matrix.kubernetes }}" - prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516 - registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0 - - run: go mod download - - name: Build executable - run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator - - - name: Get pgMonitor files. - run: make get-pgmonitor - env: - PGMONITOR_DIR: "${{ github.workspace }}/hack/tools/pgmonitor" - QUERIES_CONFIG_DIR: "${{ github.workspace }}/hack/tools/queries" - - # Start a Docker container with the working directory mounted. - - name: Start PGO - run: | - kubectl apply --server-side -k ./config/namespace - kubectl apply --server-side -k ./config/dev - hack/create-kubeconfig.sh postgres-operator pgo - docker run --detach --network host --read-only \ - --volume "$(pwd):/mnt" --workdir '/mnt' --env 'PATH=/mnt/bin' \ - --env 'CHECK_FOR_UPGRADES=false' \ - --env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \ - --env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \ - --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516' \ - --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516' \ - --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516' \ - --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516' \ - --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516' \ - --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516' \ - --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516' \ - --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2516' \ - --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0' \ - --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ - --name 'postgres-operator' ubuntu \ - postgres-operator - - name: Install kuttl - run: | - curl -Lo /usr/local/bin/kubectl-kuttl https://github.com/kudobuilder/kuttl/releases/download/v0.13.0/kubectl-kuttl_0.13.0_linux_x86_64 - chmod +x /usr/local/bin/kubectl-kuttl - - - run: make generate-kuttl - env: - KUTTL_PG_UPGRADE_FROM_VERSION: '16' - KUTTL_PG_UPGRADE_TO_VERSION: '17' - KUTTL_PG_VERSION: '16' - KUTTL_POSTGIS_VERSION: '3.4' - KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516' - - run: | - make check-kuttl && exit - failed=$? - echo '::group::PGO logs'; docker logs 'postgres-operator'; echo '::endgroup::' - exit $failed - env: - KUTTL_TEST: kubectl-kuttl test - - name: Stop PGO - run: docker stop 'postgres-operator' || true - - coverage-report: - if: ${{ success() || contains(needs.*.result, 'success') }} - runs-on: ubuntu-24.04 - needs: - - kubernetes-api - - kubernetes-k3d - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: { go-version: stable } - - uses: actions/download-artifact@v4 - with: { path: download } - - # Combine the coverage profiles by taking the mode line from any one file - # and the data from all files. Write a list of functions with less than - # 100% coverage to the job summary, and upload a complete HTML report. - - name: Generate report - run: | - gunzip --keep download/*/*.gz - ( sed -e '1q' download/*/*.coverage - tail -qn +2 download/*/*.coverage ) > total.coverage - go tool cover --func total.coverage -o total-coverage.txt - go tool cover --html total.coverage -o total-coverage.html - - awk < total-coverage.txt ' - END { print "
Total Coverage: " $3 " " $2 "" } - ' >> "${GITHUB_STEP_SUMMARY}" - - sed < total-coverage.txt -e '/100.0%/d' -e "s,$(go list -m)/,," | column -t | awk ' - NR == 1 { print "\n\n```" } { print } END { if (NR > 0) print "```\n\n"; print "
" } - ' >> "${GITHUB_STEP_SUMMARY}" - - # Upload coverage to GitHub - - run: gzip total-coverage.html - - uses: actions/upload-artifact@v4 - with: - name: coverage-report=html - path: total-coverage.html.gz - retention-days: 15 diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml deleted file mode 100644 index de07b96c08..0000000000 --- a/.github/workflows/trivy.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# https://aquasecurity.github.io/trivy -name: Trivy - -on: - pull_request: - push: - branches: - - main - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - -jobs: - cache: - # Run only one of these jobs at a time across the entire project. - concurrency: { group: trivy-cache } - # Do not fail this workflow when this job fails. - continue-on-error: true - - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - name: Download Trivy - uses: ./.github/actions/trivy - env: - TRIVY_DEBUG: true - TRIVY_DOWNLOAD_DB_ONLY: true - TRIVY_NO_PROGRESS: true - TRIVY_SCANNERS: license,secret,vuln - - licenses: - # Run this job after the cache job regardless of its success or failure. - needs: [cache] - if: >- - ${{ !cancelled() }} - - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - # Trivy needs a populated Go module cache to detect Go module licenses. - - uses: actions/setup-go@v5 - with: { go-version: stable } - - run: go mod download - - # Report success only when detected licenses are listed in [/trivy.yaml]. - - name: Scan licenses - uses: ./.github/actions/trivy - env: - TRIVY_DEBUG: true - TRIVY_EXIT_CODE: 1 - TRIVY_SCANNERS: license - with: - cache: restore,use - database: skip - - vulnerabilities: - # Run this job after the cache job regardless of its success or failure. - needs: [cache] - if: >- - ${{ github.repository == 'CrunchyData/postgres-operator' && !cancelled() }} - permissions: - security-events: write - - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - # Print any detected secrets or vulnerabilities to the workflow log for - # human consumption. This step fails only when Trivy is unable to scan. - # A later step uploads results to GitHub as a pull request check. - - name: Log detected vulnerabilities - uses: ./.github/actions/trivy - env: - TRIVY_SCANNERS: secret,vuln - with: - cache: restore,use - database: skip - - # Produce a SARIF report of actionable results. This step fails only when - # Trivy is unable to scan. - - name: Report actionable vulnerabilities - uses: ./.github/actions/trivy - env: - TRIVY_IGNORE_UNFIXED: true - TRIVY_FORMAT: 'sarif' - TRIVY_OUTPUT: 'trivy-results.sarif' - TRIVY_SCANNERS: secret,vuln - with: - cache: use - database: skip - setup: none - - # Submit the SARIF report to GitHub code scanning. Pull requests checks - # succeed or fail according to branch protection rules. - # - https://docs.github.com/en/code-security/code-scanning - - name: Upload results to GitHub - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: 'trivy-results.sarif' From 398bd33370bd1a8d8212aea7e6a29e5847083e68 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 19 May 2025 16:33:03 -0500 Subject: [PATCH 36/79] Download the Trivy binary rather than compile it This is significantly faster and aligns with the upstream action for GitHub: github.com/aquasecurity/setup-trivy@v0.2.3 --- .gitlab-ci.yml | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fb5abdea61..0e611a6309 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -13,13 +13,12 @@ spec: description: > The CPU architectures on which to run tests - # TODO(retention): We can increase the retention on scheduled pipelines after - # https://gitlab.com/groups/gitlab-org/-/epics/16321 + # https://docs.gitlab.com/ci/yaml#artifactsexpire_in retention: type: string default: 2d # Enough time to find and address MR failures the following day description: > - How long to keep reports; see https://docs.gitlab.com/ci/yaml#artifactsexpire_in + How long to keep reports --- # https://docs.gitlab.com/ci/yaml/workflow @@ -35,7 +34,6 @@ variables: # Show the duration of individual script items in the job log. FF_SCRIPT_SECTIONS: 'true' -# See: [.github/workflows/lint.yaml] # This uses a specific minor version of golangci-lint to ensure new code conforms # to the rules we set when this release branch was cut. We do not want new rules # suggesting sweeping changes to our release branches. @@ -95,7 +93,6 @@ golang-lint: reports: junit: golangci-lint.junit.xml -# See: [.github/workflows/test.yaml] must-commit-generated: stage: build needs: [] @@ -107,7 +104,6 @@ must-commit-generated: - git config --global --add safe.directory "$(pwd)" - make check-generate -# See: [.github/workflows/test.yaml] # This uses the latest version of Go we have internally. go-test: stage: test @@ -146,7 +142,7 @@ go-test: reports: junit: '*.junit.xml' -# See: [.github/workflows/govulncheck.yaml] +# https://go.dev/blog/govulncheck govulncheck: stage: test needs: [] @@ -169,7 +165,7 @@ govulncheck: # This fails the job when it detects a vulnerability in called code. - go run "${TOOL}" --format text --show verbose ./... -# See: [.github/workflows/trivy.yaml] +# https://trivy.dev/latest/ecosystem/cicd trivy: stage: test needs: [] @@ -187,22 +183,25 @@ trivy: # Download Trivy and log its version. - |- VERSION=$(go list -m -f '{{.Version}}' github.com/aquasecurity/trivy@latest) - TOOL="github.com/aquasecurity/trivy/cmd/trivy@${VERSION}" - go run -exec true "${TOOL}" - - # Download the JUnit template for this version. - - curl -sSL -o /tmp/trivy-junit.tpl "https://raw.githubusercontent.com/aquasecurity/trivy/refs/tags/${VERSION}/contrib/junit.tpl" + git clone --config 'advice.detachedHead=no' --depth 1 --branch "${VERSION}" --sparse \ + 'https://github.com/aquasecurity/trivy.git' \ + '.gitlab-remotes/aquasecurity-trivy' + ( + cd '.gitlab-remotes/aquasecurity-trivy' + git sparse-checkout set 'contrib' + bash 'contrib/install.sh' -b "${HOME}/bin" "${VERSION}" + ) # Generate a report and fail when there are issues that can be fixed. # Trivy needs a populated Go module cache to detect Go module licenses. - go mod download - >- - go run "${TOOL}" filesystem . --exit-code 1 + trivy filesystem . --exit-code 1 --scanners license,secret,vuln --ignore-unfixed --no-progress --format template - --template '@/tmp/trivy-junit.tpl' + --template '@.gitlab-remotes/aquasecurity-trivy/contrib/junit.tpl' --output 'trivy.junit.xml' # Send the report to GitLab. From 68cc3349192f70196b71e471045963d80b6b1c53 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 21 May 2025 12:35:14 -0700 Subject: [PATCH 37/79] pgMonitor v5.2.1 bump: Combine ccp_archive_command_status queries into one query. Add semicolons to the end of all queries. Make ccp_replication_lag_size return the replica name for grafana dashboard legend. DROP functions rather than CREATE OR REPLACE to avoid errors due to changes in functions. --- Makefile | 2 +- .../generated/gte_pg17_fast_metrics.json | 2 +- .../generated/lt_pg17_fast_metrics.json | 2 +- .../generated/pgbouncer_metrics_queries.json | 2 +- .../generated/postgres_5m_metrics.json | 2 +- .../generated/postgres_5s_metrics.json | 2 +- internal/collector/gte_pg17_fast_metrics.yaml | 43 ++++++---------- internal/collector/lt_pg17_fast_metrics.yaml | 27 +++------- .../collector/pgbouncer_metrics_queries.yaml | 12 ++--- internal/collector/postgres_5m_metrics.yaml | 4 +- internal/collector/postgres_5s_metrics.yaml | 49 ++++++------------- .../postgrescluster/metrics_setup.sql | 16 +++--- .../postgrescluster/pgmonitor_test.go | 2 +- 13 files changed, 60 insertions(+), 105 deletions(-) diff --git a/Makefile b/Makefile index d50834deb8..9a5ef90b0c 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGO_IMAGE_URL ?= https://www.crunchydata.com/products/crunchy-postgresql-for-kub PGO_IMAGE_PREFIX ?= localhost PGMONITOR_DIR ?= hack/tools/pgmonitor -PGMONITOR_VERSION ?= v5.1.1 +PGMONITOR_VERSION ?= v5.2.1 QUERIES_CONFIG_DIR ?= hack/tools/queries # Buildah's "build" used to be "bud". Use the alias to be compatible for a while. diff --git a/internal/collector/generated/gte_pg17_fast_metrics.json b/internal/collector/generated/gte_pg17_fast_metrics.json index b0c312b3aa..9553e8c756 100644 --- a/internal/collector/generated/gte_pg17_fast_metrics.json +++ b/internal/collector/generated/gte_pg17_fast_metrics.json @@ -1 +1 @@ -[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_written FROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/lt_pg17_fast_metrics.json b/internal/collector/generated/lt_pg17_fast_metrics.json index d6266ffacb..55b6ca78fc 100644 --- a/internal/collector/generated/lt_pg17_fast_metrics.json +++ b/internal/collector/generated/lt_pg17_fast_metrics.json @@ -1 +1 @@ -[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_checkpoint AS buffers_written FROM pg_catalog.pg_stat_bgwriter c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] diff --git a/internal/collector/generated/pgbouncer_metrics_queries.json b/internal/collector/generated/pgbouncer_metrics_queries.json index 78260bcf44..21ebb140bc 100644 --- a/internal/collector/generated/pgbouncer_metrics_queries.json +++ b/internal/collector/generated/pgbouncer_metrics_queries.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS"},{"metrics":[{"attribute_columns":["name","port","database"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool","value_column":"reserve_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS"}] +[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS;"},{"metrics":[{"attribute_columns":["name","port","database"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool_size","value_column":"reserve_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES;"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS;"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS;"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS;"}] diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json index 6c438218a9..f8f73cdde5 100644 --- a/internal/collector/generated/postgres_5m_metrics.json +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_catalog.pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum_status","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] diff --git a/internal/collector/generated/postgres_5s_metrics.json b/internal/collector/generated/postgres_5s_metrics.json index 978f89d305..dda612ae59 100644 --- a/internal/collector/generated/postgres_5s_metrics.json +++ b/internal/collector/generated/postgres_5s_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] +[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"},{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"},{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"},{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT\n COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive,\n archived_count,\n failed_count,\n CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit;\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats();\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"attribute_columns":["replica"],"description":"Replication lag in bytes.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] diff --git a/internal/collector/gte_pg17_fast_metrics.yaml b/internal/collector/gte_pg17_fast_metrics.yaml index 688a919f5c..a590b48272 100644 --- a/internal/collector/gte_pg17_fast_metrics.yaml +++ b/internal/collector/gte_pg17_fast_metrics.yaml @@ -4,17 +4,6 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - - sql: > - SELECT c.buffers_written - FROM pg_catalog.pg_stat_checkpointer c; - metrics: - - metric_name: ccp_stat_bgwriter_buffers_checkpoint - value_column: buffers_written - data_type: sum - description: Number of buffers written during checkpoints and restartpoints - static_attributes: - server: "localhost:5432" - - sql: > SELECT s.writes @@ -22,16 +11,16 @@ FROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer'; metrics: - - metric_name: ccp_stat_bgwriter_buffers_backend + - metric_name: ccp_stat_io_bgwriter_writes value_column: writes data_type: sum - description: Number of write operations, each of the size specified in op_bytes. + description: Number of write operations by background writers static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_buffers_backend_fsync + - metric_name: ccp_stat_io_bgwriter_fsyncs value_column: fsyncs data_type: sum - description: Number of fsync calls. These are only tracked in context normal. + description: Number of fsync calls by background writers static_attributes: server: "localhost:5432" @@ -44,25 +33,25 @@ , c.buffers_written FROM pg_catalog.pg_stat_checkpointer c; metrics: - - metric_name: ccp_stat_bgwriter_checkpoint_sync_time - value_column: sync_time - description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds + - metric_name: ccp_stat_checkpointer_num_timed + value_column: num_timed + description: Number of scheduled checkpoints that have been performed static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_write_time + - metric_name: ccp_stat_checkpointer_num_requested + value_column: num_requested + description: Number of requested checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_write_time value_column: write_time value_type: double description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_req - value_column: num_requested - description: Number of requested checkpoints that have been performed - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_timed - value_column: num_timed - description: Number of scheduled checkpoints that have been performed + - metric_name: ccp_stat_checkpointer_sync_time + value_column: sync_time + description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds static_attributes: server: "localhost:5432" - metric_name: ccp_stat_checkpointer_buffers_written diff --git a/internal/collector/lt_pg17_fast_metrics.yaml b/internal/collector/lt_pg17_fast_metrics.yaml index 330ff7d798..576ea8e4a6 100644 --- a/internal/collector/lt_pg17_fast_metrics.yaml +++ b/internal/collector/lt_pg17_fast_metrics.yaml @@ -4,33 +4,22 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - - sql: > - SELECT c.buffers_checkpoint AS buffers_written - FROM pg_catalog.pg_stat_bgwriter c; - metrics: - - metric_name: ccp_stat_bgwriter_buffers_checkpoint - value_column: buffers_written - data_type: sum - description: Number of buffers written during checkpoints and restartpoints - static_attributes: - server: "localhost:5432" - - sql: > SELECT s.buffers_backend AS writes , s.buffers_backend_fsync AS fsyncs FROM pg_catalog.pg_stat_bgwriter s; metrics: - - metric_name: ccp_stat_bgwriter_buffers_backend + - metric_name: ccp_stat_io_bgwriter_writes value_column: writes data_type: sum - description: Number of write operations, each of the size specified in op_bytes. + description: Number of write operations by background writers static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_buffers_backend_fsync + - metric_name: ccp_stat_io_bgwriter_fsyncs value_column: fsyncs data_type: sum - description: Number of fsync calls. These are only tracked in context normal. + description: Number of fsync calls by background writers static_attributes: server: "localhost:5432" @@ -43,23 +32,23 @@ , c.buffers_checkpoint AS buffers_written FROM pg_catalog.pg_stat_bgwriter c; metrics: - - metric_name: ccp_stat_bgwriter_checkpoints_timed + - metric_name: ccp_stat_checkpointer_num_timed value_column: num_timed description: Number of scheduled checkpoints that have been performed static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_req + - metric_name: ccp_stat_checkpointer_num_requested value_column: num_requested description: Number of requested checkpoints that have been performed static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_write_time + - metric_name: ccp_stat_checkpointer_write_time value_column: write_time value_type: double description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_sync_time + - metric_name: ccp_stat_checkpointer_sync_time value_column: sync_time description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds static_attributes: diff --git a/internal/collector/pgbouncer_metrics_queries.yaml b/internal/collector/pgbouncer_metrics_queries.yaml index a4e3a918fb..21cd0ae6ee 100644 --- a/internal/collector/pgbouncer_metrics_queries.yaml +++ b/internal/collector/pgbouncer_metrics_queries.yaml @@ -4,7 +4,7 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml - - sql: "SHOW CLIENTS" + - sql: "SHOW CLIENTS;" metrics: - metric_name: ccp_pgbouncer_clients_wait_seconds value_column: wait @@ -15,7 +15,7 @@ # can be NULL; the collector will warn against NULL even when not used. But it will emit # an error log if those columns are used. # The host column should always point either to pgBouncer's virtual database (the null case) or to the primary. - - sql: "SHOW DATABASES" + - sql: "SHOW DATABASES;" metrics: - metric_name: ccp_pgbouncer_databases_pool_size value_column: pool_size @@ -27,7 +27,7 @@ attribute_columns: ["name", "port", "database"] description: "Minimum number of server connections" - - metric_name: ccp_pgbouncer_databases_reserve_pool + - metric_name: ccp_pgbouncer_databases_reserve_pool_size value_column: reserve_pool_size attribute_columns: ["name", "port", "database"] description: "Maximum number of additional connections for this database" @@ -54,14 +54,14 @@ attribute_columns: ["name", "port", "database"] description: "1 if this database is currently disabled, else 0" - - sql: "SHOW LISTS" + - sql: "SHOW LISTS;" metrics: - metric_name: ccp_pgbouncer_lists_item_count value_column: items attribute_columns: ["list"] description: "Count of items registered with pgBouncer" - - sql: "SHOW POOLS" + - sql: "SHOW POOLS;" metrics: - metric_name: ccp_pgbouncer_pools_client_active value_column: cl_active @@ -92,7 +92,7 @@ Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again - - sql: "SHOW SERVERS" + - sql: "SHOW SERVERS;" metrics: - metric_name: ccp_pgbouncer_servers_close_needed value_column: close_needed diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml index ce04b443d3..b554ed8dae 100644 --- a/internal/collector/postgres_5m_metrics.yaml +++ b/internal/collector/postgres_5m_metrics.yaml @@ -5,7 +5,7 @@ # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - sql: > SELECT datname as dbname - , pg_database_size(datname) as bytes + , pg_catalog.pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false; metrics: @@ -38,7 +38,7 @@ - sql: SELECT monitor.pg_hba_checksum() AS status; metrics: - - metric_name: ccp_pg_hba_checksum + - metric_name: ccp_pg_hba_checksum_status value_column: status description: | Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf). diff --git a/internal/collector/postgres_5s_metrics.yaml b/internal/collector/postgres_5s_metrics.yaml index 6d92dfa75a..d424dcb014 100644 --- a/internal/collector/postgres_5s_metrics.yaml +++ b/internal/collector/postgres_5s_metrics.yaml @@ -4,7 +4,7 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml # - # TODO ccp_pg_stat_activity can be removed after metrics are fully aligned with the latest pgMonitor + # TODO ccp_pg_stat_activity can be removed/replaced once an equivalent metric is added to pgMonitor - sql: > SELECT pg_database.datname, @@ -43,9 +43,15 @@ - sql: > SELECT - COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive + COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive, + archived_count, + failed_count, + CASE + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 + ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) + END AS seconds_since_last_fail FROM pg_catalog.pg_stat_archiver; - metrics: - metric_name: ccp_archive_command_status_seconds_since_last_archive value_column: seconds_since_last_archive @@ -53,36 +59,16 @@ description: Seconds since the last successful archive operation static_attributes: server: "localhost:5432" - - - sql: > - SELECT archived_count - FROM pg_catalog.pg_stat_archiver - metrics: - metric_name: ccp_archive_command_status_archived_count value_column: archived_count description: Number of WAL files that have been successfully archived static_attributes: server: "localhost:5432" - - - sql: > - SELECT failed_count - FROM pg_catalog.pg_stat_archiver - metrics: - metric_name: ccp_archive_command_status_failed_count value_column: failed_count description: Number of failed attempts for archiving WAL files static_attributes: server: "localhost:5432" - - - sql: > - SELECT CASE - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 - ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) - END AS seconds_since_last_fail - FROM pg_catalog.pg_stat_archiver - - metrics: - metric_name: ccp_archive_command_status_seconds_since_last_fail value_column: seconds_since_last_fail description: Seconds since the last recorded failure of the archive_command @@ -103,7 +89,6 @@ , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true); - metrics: - metric_name: ccp_connection_stats_active value_column: active @@ -201,7 +186,7 @@ - sql: > SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request - , monitor.kdapi_scalar_bigint('cpu_limit') AS limit + , monitor.kdapi_scalar_bigint('cpu_limit') AS limit; metrics: - metric_name: ccp_nodemx_cpu_limit value_column: limit @@ -300,7 +285,7 @@ FROM monitor.proc_mountinfo() m JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number) WHERE m.mount_point IN ('/pgdata', '/pgwal') OR - m.mount_point like '/tablespaces/%' + m.mount_point like '/tablespaces/%'; metrics: - metric_name: ccp_nodemx_data_disk_available_bytes value_column: available_bytes @@ -472,7 +457,7 @@ ,tx_bytes ,tx_packets ,rx_bytes - ,rx_packets from monitor.proc_network_stats() + ,rx_packets from monitor.proc_network_stats(); metrics: - metric_name: ccp_nodemx_network_rx_bytes value_column: rx_bytes @@ -634,7 +619,8 @@ - metric_name: ccp_replication_lag_size_bytes value_column: bytes value_type: double - description: Time interval in seconds since PostgreSQL database was last restarted. + description: Replication lag in bytes. + attribute_columns: ['replica'] static_attributes: server: "localhost:5432" @@ -944,13 +930,6 @@ static_attributes: server: "localhost:5432" stanza: "db" - - metric_name: ccp_backrest_last_info_repo_total_size_bytes - description: Total size of this backup in the pgbackrest repository, including all required previous backups and WAL - value_column: repo_total_size_bytes - attribute_columns: ["backup_type", "repo"] - static_attributes: - server: "localhost:5432" - stanza: "db" - metric_name: ccp_backrest_oldest_full_backup_time_seconds description: Seconds since the oldest completed full backup value_column: oldest_full_backup diff --git a/internal/controller/postgrescluster/metrics_setup.sql b/internal/controller/postgrescluster/metrics_setup.sql index 858f95c023..dbaee4f030 100644 --- a/internal/controller/postgrescluster/metrics_setup.sql +++ b/internal/controller/postgrescluster/metrics_setup.sql @@ -71,23 +71,25 @@ $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring; ---- get_pgbackrest_info is used by the OTel collector. +DROP FUNCTION IF EXISTS get_replication_lag(); +--- get_replication_lag is used by the OTel collector. --- get_replication_lag is created as function, so that we can query without warning on a replica. -CREATE OR REPLACE FUNCTION get_replication_lag() RETURNS TABLE(bytes NUMERIC) AS $$ +CREATE FUNCTION get_replication_lag() RETURNS TABLE(replica text, bytes NUMERIC) AS $$ BEGIN IF pg_is_in_recovery() THEN - RETURN QUERY SELECT 0::NUMERIC AS bytes; + RETURN QUERY SELECT ''::text as replica, 0::NUMERIC AS bytes; ELSE - RETURN QUERY SELECT pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes + RETURN QUERY SELECT application_name AS replica, pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes FROM pg_catalog.pg_stat_replication; END IF; END; $$ LANGUAGE plpgsql; +DROP FUNCTION IF EXISTS get_pgbackrest_info(); --- get_pgbackrest_info is used by the OTel collector. --- get_pgbackrest_info is created as a function so that no ddl runs on a replica. --- In the query, the --stanza argument matches DefaultStanzaName, defined in internal/pgbackrest/config.go. -CREATE OR REPLACE FUNCTION get_pgbackrest_info() +CREATE FUNCTION get_pgbackrest_info() RETURNS TABLE ( last_diff_backup BIGINT, last_full_backup BIGINT, @@ -97,7 +99,6 @@ RETURNS TABLE ( backup_type TEXT, backup_runtime_seconds BIGINT, repo_backup_size_bytes TEXT, - repo_total_size_bytes TEXT, oldest_full_backup BIGINT, repo TEXT ) AS $$ @@ -113,7 +114,6 @@ BEGIN 'n/a'::text AS backup_type, 0::bigint AS backup_runtime_seconds, '0'::text AS repo_backup_size_bytes, - '0'::text AS repo_total_size_bytes, 0::bigint AS oldest_full_backup, 'n/a' AS repo; ELSE @@ -151,7 +151,6 @@ BEGIN backup->'database'->>'repo-key' AS repo, backup->>'type' AS backup_type, backup->'info'->'repository'->>'delta' AS repo_backup_size_bytes, - backup->'info'->'repository'->>'size' AS repo_total_size_bytes, (backup->'timestamp'->>'stop')::bigint - (backup->'timestamp'->>'start')::bigint AS backup_runtime_seconds, CASE WHEN backup->>'error' = 'true' THEN 1 ELSE 0 END AS backup_error FROM ordered_backups @@ -207,7 +206,6 @@ BEGIN ccp_backrest_last_info.backup_type, ccp_backrest_last_info.backup_runtime_seconds, ccp_backrest_last_info.repo_backup_size_bytes, - ccp_backrest_last_info.repo_total_size_bytes, ccp_backrest_oldest_full_backup.time_seconds, ccp_backrest_last_incr_backup.repo FROM diff --git a/internal/controller/postgrescluster/pgmonitor_test.go b/internal/controller/postgrescluster/pgmonitor_test.go index bf46dd204b..e4ccaf0d9f 100644 --- a/internal/controller/postgrescluster/pgmonitor_test.go +++ b/internal/controller/postgrescluster/pgmonitor_test.go @@ -598,7 +598,7 @@ func TestReconcilePGMonitorExporterStatus(t *testing.T) { podExecCalled: false, // Status was generated manually for this test case // TODO (jmckulk): add code to generate status - status: v1beta1.MonitoringStatus{ExporterConfiguration: "5c5f955485"}, + status: v1beta1.MonitoringStatus{ExporterConfiguration: "86cdc4f778"}, statusChangedAfterReconcile: false, }} { t.Run(test.name, func(t *testing.T) { From 6fbd94bc0a4064346ef60aa78c3a2bb7d9487999 Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Thu, 29 May 2025 13:40:47 -0500 Subject: [PATCH 38/79] Add per db metrics (#4183) * Add per-db metrics to OTel This PR adds changes to allow per-db metrics in OTel: - change API for per-db metrics - add default metrics for per-db metrics based on pgmonitor 5.2.1 - remove unused metrics - add kuttl test --- ...res-operator.crunchydata.com_pgadmins.yaml | 13 ++ ...ator.crunchydata.com_postgresclusters.yaml | 13 ++ .../generated/gte_pg16_slow_metrics.json | 1 - .../generated/lt_pg16_slow_metrics.json | 1 - .../generated/pgbackrest_metrics.json | 1 - .../generated/postgres_5m_per_db_metrics.json | 1 + internal/collector/gte_pg16_slow_metrics.yaml | 127 -------------- internal/collector/lt_pg16_slow_metrics.yaml | 135 --------------- .../collector/postgres_5m_per_db_metrics.yaml | 161 ++++++++++++++++++ internal/collector/postgres_metrics.go | 115 ++++++++----- .../v1beta1/instrumentation_types.go | 11 ++ .../v1beta1/zz_generated.deepcopy.go | 10 ++ .../11--add-logs-exporter.yaml | 6 - ...add-per-db-metrics-to-postgrescluster.yaml | 4 + .../12-assert-per-db-queries.yaml | 32 ++++ ...ond-per-db-metrics-to-postgrescluster.yaml | 4 + .../13--cluster-no-backups.yaml | 6 - ...t-per-db-queries-for-multiple-targets.yaml | 32 ++++ ...e-per-db-metrics-from-postgrescluster.yaml | 4 + .../16-assert-per-db-query-removed.yaml | 32 ++++ .../17--add-custom-queries-per-db.yaml | 6 + .../18-assert-custom-queries-per-db.yaml | 42 +++++ .../19--add-logs-exporter.yaml | 6 + ...rted.yaml => 20-assert-logs-exported.yaml} | 0 .../21--cluster-no-backups.yaml | 6 + ...-instance.yaml => 22-assert-instance.yaml} | 0 ...kups.yaml => 23--cluster-add-backups.yaml} | 4 +- ...e-backups.yaml => 24--remove-backups.yaml} | 0 ...cluster.yaml => 25--annotate-cluster.yaml} | 2 +- .../e2e/otel-logging-and-metrics/README.md | 15 +- .../files/11--add-per-db-metrics.yaml | 17 ++ .../files/13--add-per-db-metrics.yaml | 11 ++ .../files/15--remove-per-db-metrics.yaml | 13 ++ .../files/17--add-custom-queries-per-db.yaml | 62 +++++++ .../files/17-custom-queries-per-db-added.yaml | 124 ++++++++++++++ ...porter.yaml => 19--add-logs-exporter.yaml} | 0 ...added.yaml => 19-logs-exporter-added.yaml} | 0 ...e-cluster.yaml => 21--create-cluster.yaml} | 0 ...r-created.yaml => 21-cluster-created.yaml} | 0 ...-add-backups.yaml => 23--add-backups.yaml} | 0 ...ckups-added.yaml => 23-backups-added.yaml} | 0 ...s-removed.yaml => 25-backups-removed.yaml} | 0 42 files changed, 692 insertions(+), 325 deletions(-) delete mode 100644 internal/collector/generated/gte_pg16_slow_metrics.json delete mode 100644 internal/collector/generated/lt_pg16_slow_metrics.json delete mode 100644 internal/collector/generated/pgbackrest_metrics.json create mode 100644 internal/collector/generated/postgres_5m_per_db_metrics.json delete mode 100644 internal/collector/gte_pg16_slow_metrics.yaml delete mode 100644 internal/collector/lt_pg16_slow_metrics.yaml create mode 100644 internal/collector/postgres_5m_per_db_metrics.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{12-assert-logs-exported.yaml => 20-assert-logs-exported.yaml} (100%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{14-assert-instance.yaml => 22-assert-instance.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{15--cluster-add-backups.yaml => 23--cluster-add-backups.yaml} (50%) rename testing/kuttl/e2e/otel-logging-and-metrics/{16--remove-backups.yaml => 24--remove-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{17--annotate-cluster.yaml => 25--annotate-cluster.yaml} (86%) create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/files/{11--add-logs-exporter.yaml => 19--add-logs-exporter.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{11-logs-exporter-added.yaml => 19-logs-exporter-added.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{13--create-cluster.yaml => 21--create-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{13-cluster-created.yaml => 21-cluster-created.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{15--add-backups.yaml => 23--add-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{15-backups-added.yaml => 23-backups-added.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{17-backups-removed.yaml => 25-backups-removed.yaml} (100%) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index d26b968d41..c61f9a000a 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -2120,6 +2120,13 @@ spec: type: string x-kubernetes-validations: - rule: duration("0") <= self && self <= duration("60m") + databases: + description: |- + The databases to target with added custom queries. + Default behavior is to target `postgres`. + items: + type: string + type: array name: description: |- The name of this batch of queries, which will be used in naming the OTel @@ -2165,6 +2172,12 @@ spec: type: string type: array type: object + perDBMetricTargets: + description: User defined databases to target for default + per-db metrics + items: + type: string + type: array type: object resources: description: Resources holds the resource requirements for the diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 108eb59e58..c56769b5c0 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -11962,6 +11962,13 @@ spec: type: string x-kubernetes-validations: - rule: duration("0") <= self && self <= duration("60m") + databases: + description: |- + The databases to target with added custom queries. + Default behavior is to target `postgres`. + items: + type: string + type: array name: description: |- The name of this batch of queries, which will be used in naming the OTel @@ -12007,6 +12014,12 @@ spec: type: string type: array type: object + perDBMetricTargets: + description: User defined databases to target for default + per-db metrics + items: + type: string + type: array type: object resources: description: Resources holds the resource requirements for the diff --git a/internal/collector/generated/gte_pg16_slow_metrics.json b/internal/collector/generated/gte_pg16_slow_metrics.json deleted file mode 100644 index 3b27be7bc0..0000000000 --- a/internal/collector/generated/gte_pg16_slow_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , p.n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\n FROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/lt_pg16_slow_metrics.json b/internal/collector/generated/lt_pg16_slow_metrics.json deleted file mode 100644 index 98bb0cc213..0000000000 --- a/internal/collector/generated/lt_pg16_slow_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/pgbackrest_metrics.json b/internal/collector/generated/pgbackrest_metrics.json deleted file mode 100644 index 63114afc03..0000000000 --- a/internal/collector/generated/pgbackrest_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] diff --git a/internal/collector/generated/postgres_5m_per_db_metrics.json b/internal/collector/generated/postgres_5m_per_db_metrics.json new file mode 100644 index 0000000000..0478569504 --- /dev/null +++ b/internal/collector/generated/postgres_5m_per_db_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["dbname","schemaname","relname"],"description":"Table size in bytes including indexes","metric_name":"ccp_table_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT current_database() as dbname , n.nspname as schemaname , c.relname , pg_catalog.pg_total_relation_size(c.oid) as bytes FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid WHERE NOT pg_is_other_temp_schema(n.oid) AND relkind IN ('r', 'm', 'f');\n"},{"metrics":[{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows updated where the successor version goes onto a new heap page, leaving behind an original version with a t_ctid field that points to a different heap page. These are always non-HOT updates.","metric_name":"ccp_stat_user_tables_n_tup_newpage_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_newpage_upd"},{"attribute_columns":["dbname","schemaname","relname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","schemaname","relname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"}],"sql":"SELECT current_database() as dbname , p.schemaname , p.relname , p.seq_scan , p.seq_tup_read , COALESCE(p.idx_scan, 0) AS idx_scan , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch , p.n_tup_ins , p.n_tup_upd , p.n_tup_del , p.n_tup_hot_upd , CASE\n WHEN current_setting('server_version_num')::int \u003e= 160000 \n THEN p.n_tup_newpage_upd\n ELSE 0::bigint\n END AS n_tup_newpage_upd\n, p.n_live_tup , p.n_dead_tup , p.vacuum_count , p.autovacuum_count , p.analyze_count , p.autoanalyze_count FROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/gte_pg16_slow_metrics.yaml b/internal/collector/gte_pg16_slow_metrics.yaml deleted file mode 100644 index 319aad62dc..0000000000 --- a/internal/collector/gte_pg16_slow_metrics.yaml +++ /dev/null @@ -1,127 +0,0 @@ -# This list of queries configures an OTel SQL Query Receiver to read pgMonitor -# metrics from Postgres. -# -# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries -# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - -# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. -# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values -# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. - - sql: > - SELECT - current_database() as dbname - , p.schemaname - , p.relname - , p.seq_scan - , p.seq_tup_read - , COALESCE(p.idx_scan, 0) AS idx_scan - , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch - , p.n_tup_ins - , p.n_tup_upd - , p.n_tup_del - , p.n_tup_hot_upd - , p.n_tup_newpage_upd - , p.n_live_tup - , p.n_dead_tup - , p.vacuum_count - , p.autovacuum_count - , p.analyze_count - , p.autoanalyze_count - FROM pg_catalog.pg_stat_user_tables p; - metrics: - - metric_name: ccp_stat_user_tables_analyze_count - data_type: sum - value_column: analyze_count - description: Number of times this table has been manually analyzed - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autoanalyze_count - data_type: sum - value_column: autoanalyze_count - description: Number of times this table has been analyzed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autovacuum_count - data_type: sum - value_column: autovacuum_count - description: Number of times this table has been vacuumed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_scan - data_type: sum - value_column: idx_scan - description: Number of index scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_tup_fetch - data_type: sum - value_column: idx_tup_fetch - description: Number of live rows fetched by index scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_dead_tup - value_column: n_dead_tup - description: Estimated number of dead rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_live_tup - value_column: n_live_tup - description: Estimated number of live rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_del - data_type: sum - value_column: n_tup_del - description: Number of rows deleted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_hot_upd - data_type: sum - value_column: n_tup_hot_upd - description: Number of rows HOT updated (i.e., with no separate index update required) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_ins - data_type: sum - value_column: n_tup_ins - description: Number of rows inserted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_upd - data_type: sum - value_column: n_tup_upd - description: Number of rows updated - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_seq_scan - data_type: sum - value_column: seq_scan - description: Number of sequential scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_seq_tup_read - data_type: sum - value_column: seq_tup_read - description: Number of live rows fetched by sequential scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_vacuum_count - data_type: sum - value_column: vacuum_count - description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/lt_pg16_slow_metrics.yaml b/internal/collector/lt_pg16_slow_metrics.yaml deleted file mode 100644 index ca9fe8a0c8..0000000000 --- a/internal/collector/lt_pg16_slow_metrics.yaml +++ /dev/null @@ -1,135 +0,0 @@ -# This list of queries configures an OTel SQL Query Receiver to read pgMonitor -# metrics from Postgres. -# -# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries -# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - -# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. -# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values -# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. - - sql: > - SELECT - current_database() as dbname - , p.schemaname - , p.relname - , p.seq_scan - , p.seq_tup_read - , COALESCE(p.idx_scan, 0) AS idx_scan - , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch - , p.n_tup_ins - , p.n_tup_upd - , p.n_tup_del - , p.n_tup_hot_upd - , 0::bigint AS n_tup_newpage_upd - , p.n_live_tup - , p.n_dead_tup - , p.vacuum_count - , p.autovacuum_count - , p.analyze_count - , p.autoanalyze_count - FROM pg_catalog.pg_stat_user_tables p; - metrics: - - metric_name: ccp_stat_user_tables_analyze_count - data_type: sum - value_column: analyze_count - description: Number of times this table has been manually analyzed - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autoanalyze_count - data_type: sum - value_column: autoanalyze_count - description: Number of times this table has been analyzed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autovacuum_count - data_type: sum - value_column: autovacuum_count - description: Number of times this table has been vacuumed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_scan - data_type: sum - value_column: idx_scan - description: Number of index scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_tup_fetch - data_type: sum - value_column: idx_tup_fetch - description: Number of live rows fetched by index scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_dead_tup - value_column: n_dead_tup - description: Estimated number of dead rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_n_live_tup - value_column: n_live_tup - description: Estimated number of live rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_del - data_type: sum - value_column: n_tup_del - description: Number of rows deleted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_hot_upd - data_type: sum - value_column: n_tup_hot_upd - description: Number of rows HOT updated (i.e., with no separate index update required) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_n_tup_ins - data_type: sum - value_column: n_tup_ins - description: Number of rows inserted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_upd - data_type: sum - value_column: n_tup_upd - description: Number of rows updated - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_seq_scan - data_type: sum - value_column: seq_scan - description: Number of sequential scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_seq_tup_read - data_type: sum - value_column: seq_tup_read - description: Number of live rows fetched by sequential scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_vacuum_count - data_type: sum - value_column: vacuum_count - description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/postgres_5m_per_db_metrics.yaml b/internal/collector/postgres_5m_per_db_metrics.yaml new file mode 100644 index 0000000000..6fcefcf9d0 --- /dev/null +++ b/internal/collector/postgres_5m_per_db_metrics.yaml @@ -0,0 +1,161 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/v5.2.1/sql_exporter/common/crunchy_per_db_collector.yml +# +# Note: Several metrics in the `crunchy_per_db_collector` track the materialized views and +# pgMonitor-extension version -- metrics that aren't meaningful in the CPK environment. +# The list of metrics that fall into this category include +# * ccp_metric_matview_refresh_last_run_fail_count +# * ccp_metric_matview_refresh_longest_runtime_seconds +# * ccp_metric_matview_refresh_longest_runtime +# * ccp_metric_table_refresh_longest_runtime +# * ccp_pgmonitor_extension_per_db + + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT current_database() as dbname + , p.schemaname + , p.relname + , p.seq_scan + , p.seq_tup_read + , COALESCE(p.idx_scan, 0) AS idx_scan + , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch + , p.n_tup_ins + , p.n_tup_upd + , p.n_tup_del + , p.n_tup_hot_upd + , CASE + WHEN current_setting('server_version_num')::int >= 160000 + THEN p.n_tup_newpage_upd + ELSE 0::bigint + END AS n_tup_newpage_upd + , p.n_live_tup + , p.n_dead_tup + , p.vacuum_count + , p.autovacuum_count + , p.analyze_count + , p.autoanalyze_count + FROM pg_catalog.pg_stat_user_tables p; + metrics: + - metric_name: ccp_stat_user_tables_seq_scan + data_type: sum + value_column: seq_scan + description: "Number of sequential scans initiated on this table" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_seq_tup_read + data_type: sum + value_column: seq_tup_read + description: "Number of live rows fetched by sequential scans" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_scan + data_type: sum + description: "Number of index scans initiated on this table" + value_column: idx_scan + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_idx_tup_fetch + data_type: sum + description: "Number of live rows fetched by index scans" + value_column: idx_tup_fetch + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_ins + data_type: sum + description: "Number of rows inserted" + value_column: n_tup_ins + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_upd + data_type: sum + description: "Number of rows updated" + value_column: n_tup_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_del + data_type: sum + description: "Number of rows deleted" + value_column: n_tup_del + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_hot_upd + data_type: sum + description: "Number of rows HOT updated (i.e., with no separate index update required)" + value_column: n_tup_hot_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_newpage_upd + data_type: sum + description: "Number of rows updated where the successor version goes onto a new heap page, leaving behind an original version with a t_ctid field that points to a different heap page. These are always non-HOT updates." + value_column: n_tup_newpage_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_live_tup + description: "Estimated number of live rows" + value_column: n_live_tup + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_dead_tup + description: "Estimated number of dead rows" + value_column: n_dead_tup + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_vacuum_count + data_type: sum + description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" + value_column: vacuum_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_autovacuum_count + data_type: sum + description: "Number of times this table has been vacuumed by the autovacuum daemon" + value_column: autovacuum_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_analyze_count + data_type: sum + description: "Number of times this table has been manually analyzed" + value_column: analyze_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_autoanalyze_count + data_type: sum + description: "Number of times this table has been analyzed by the autovacuum daemon" + value_column: autoanalyze_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index 098d1ff2be..072ec6987a 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -21,6 +21,9 @@ import ( //go:embed "generated/postgres_5s_metrics.json" var fiveSecondMetrics json.RawMessage +//go:embed "generated/postgres_5m_per_db_metrics.json" +var fiveMinutePerDBMetrics json.RawMessage + //go:embed "generated/postgres_5m_metrics.json" var fiveMinuteMetrics json.RawMessage @@ -33,15 +36,9 @@ var ltPG17Fast json.RawMessage //go:embed "generated/eq_pg16_fast_metrics.json" var eqPG16Fast json.RawMessage -//go:embed "generated/gte_pg16_slow_metrics.json" -var gtePG16Slow json.RawMessage - //go:embed "generated/lt_pg16_fast_metrics.json" var ltPG16Fast json.RawMessage -//go:embed "generated/lt_pg16_slow_metrics.json" -var ltPG16Slow json.RawMessage - type queryMetrics struct { Metrics []*metric `json:"metrics"` Query string `json:"sql"` @@ -71,6 +68,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust // will continually append to it and blow up our ConfigMap fiveSecondMetricsClone := slices.Clone(fiveSecondMetrics) fiveMinuteMetricsClone := slices.Clone(fiveMinuteMetrics) + fiveMinutePerDBMetricsClone := slices.Clone(fiveMinutePerDBMetrics) if inCluster.Spec.PostgresVersion >= 17 { fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG17Fast) @@ -91,20 +89,11 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust log.Error(err, "error compiling metrics for postgres 16") } - if inCluster.Spec.PostgresVersion >= 16 { - fiveMinuteMetricsClone, err = appendToJSONArray(fiveMinuteMetricsClone, gtePG16Slow) - if err != nil { - log.Error(err, "error compiling metrics for postgres 16 and greater") - } - } else { + if inCluster.Spec.PostgresVersion < 16 { fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG16Fast) if err != nil { log.Error(err, "error compiling fast metrics for postgres versions less than 16") } - fiveMinuteMetricsClone, err = appendToJSONArray(fiveMinuteMetricsClone, ltPG16Slow) - if err != nil { - log.Error(err, "error compiling slow metrics for postgres versions less than 16") - } } // Remove any queries that user has specified in the spec @@ -117,7 +106,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust var fiveSecondMetricsArr []queryMetrics err := json.Unmarshal(fiveSecondMetricsClone, &fiveSecondMetricsArr) if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling five second postgres metrics") } // Remove any specified metrics from the five second metrics @@ -128,19 +117,31 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust var fiveMinuteMetricsArr []queryMetrics err = json.Unmarshal(fiveMinuteMetricsClone, &fiveMinuteMetricsArr) if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling five minute postgres metrics") } // Remove any specified metrics from the five minute metrics fiveMinuteMetricsArr = removeMetricsFromQueries( inCluster.Spec.Instrumentation.Metrics.CustomQueries.Remove, fiveMinuteMetricsArr) + // Convert json to array of queryMetrics objects + var fiveMinutePerDBMetricsArr []queryMetrics + err = json.Unmarshal(fiveMinutePerDBMetricsClone, &fiveMinutePerDBMetricsArr) + if err != nil { + log.Error(err, "error compiling per-db postgres metrics") + } + + // Remove any specified metrics from the five minute per-db metrics + fiveMinutePerDBMetricsArr = removeMetricsFromQueries( + inCluster.Spec.Instrumentation.Metrics.CustomQueries.Remove, fiveMinutePerDBMetricsArr) + // Convert back to json data // The error return value can be ignored as the errchkjson linter // deems the []queryMetrics to be a safe argument: // https://github.com/breml/errchkjson fiveSecondMetricsClone, _ = json.Marshal(fiveSecondMetricsArr) fiveMinuteMetricsClone, _ = json.Marshal(fiveMinuteMetricsArr) + fiveMinutePerDBMetricsClone, _ = json.Marshal(fiveMinutePerDBMetricsArr) } // Add Prometheus exporter @@ -180,31 +181,65 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust Exporters: []ComponentID{Prometheus}, } - // Add custom queries if they are defined in the spec + // Add custom queries and per-db metrics if they are defined in the spec if inCluster.Spec.Instrumentation != nil && - inCluster.Spec.Instrumentation.Metrics != nil && - inCluster.Spec.Instrumentation.Metrics.CustomQueries != nil && - inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add != nil { - - for _, querySet := range inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add { - // Create a receiver for the query set - receiverName := "sqlquery/" + querySet.Name - config.Receivers[receiverName] = map[string]any{ - "driver": "postgres", - "datasource": fmt.Sprintf( - `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - MonitoringUser), - "collection_interval": querySet.CollectionInterval, - // Give Postgres time to finish setup. - "initial_delay": "15s", - "queries": "${file:/etc/otel-collector/" + - querySet.Name + "/" + querySet.Queries.Key + "}", + inCluster.Spec.Instrumentation.Metrics != nil { + + if inCluster.Spec.Instrumentation.Metrics.CustomQueries != nil && + inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add != nil { + + for _, querySet := range inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add { + // Create a receiver for the query set + + dbs := []string{"postgres"} + if len(querySet.Databases) != 0 { + dbs = querySet.Databases + } + for _, db := range dbs { + receiverName := fmt.Sprintf( + "sqlquery/%s-%s", querySet.Name, db) + config.Receivers[receiverName] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf( + `host=localhost dbname=%s port=5432 user=%s password=${env:PGPASSWORD}`, + db, + MonitoringUser), + "collection_interval": querySet.CollectionInterval, + // Give Postgres time to finish setup. + "initial_delay": "15s", + "queries": "${file:/etc/otel-collector/" + + querySet.Name + "/" + querySet.Queries.Key + "}", + } + + // Add the receiver to the pipeline + pipeline := config.Pipelines[PostgresMetrics] + pipeline.Receivers = append(pipeline.Receivers, receiverName) + config.Pipelines[PostgresMetrics] = pipeline + } } + } + if inCluster.Spec.Instrumentation.Metrics.PerDBMetricTargets != nil { + + for _, db := range inCluster.Spec.Instrumentation.Metrics.PerDBMetricTargets { + // Create a receiver for the query set for the db + receiverName := "sqlquery/" + db + config.Receivers[receiverName] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf( + `host=localhost dbname=%s port=5432 user=%s password=${env:PGPASSWORD}`, + db, + MonitoringUser), + "collection_interval": "5m", + // Give Postgres time to finish setup. + "initial_delay": "15s", + "queries": slices.Clone(fiveMinutePerDBMetricsClone), + } - // Add the receiver to the pipeline - pipeline := config.Pipelines[PostgresMetrics] - pipeline.Receivers = append(pipeline.Receivers, receiverName) - config.Pipelines[PostgresMetrics] = pipeline + // Add the receiver to the pipeline + pipeline := config.Pipelines[PostgresMetrics] + pipeline.Receivers = append(pipeline.Receivers, receiverName) + config.Pipelines[PostgresMetrics] = pipeline + } } } } diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go index d3f6882271..9481d748cc 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -107,6 +107,11 @@ type InstrumentationMetricsSpec struct { // --- // +optional CustomQueries *InstrumentationCustomQueriesSpec `json:"customQueries,omitempty"` + + // User defined databases to target for default per-db metrics + // --- + // +optional + PerDBMetricTargets []string `json:"perDBMetricTargets,omitempty"` } type InstrumentationCustomQueriesSpec struct { @@ -159,6 +164,12 @@ type InstrumentationCustomQueries struct { // +default="5s" // +optional CollectionInterval *Duration `json:"collectionInterval,omitempty"` + + // The databases to target with added custom queries. + // Default behavior is to target `postgres`. + // --- + // +optional + Databases []string `json:"databases,omitempty"` } // --- diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 8ee494d5f8..7d12eccefb 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -481,6 +481,11 @@ func (in *InstrumentationCustomQueries) DeepCopyInto(out *InstrumentationCustomQ *out = new(Duration) **out = **in } + if in.Databases != nil { + in, out := &in.Databases, &out.Databases + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationCustomQueries. @@ -558,6 +563,11 @@ func (in *InstrumentationMetricsSpec) DeepCopyInto(out *InstrumentationMetricsSp *out = new(InstrumentationCustomQueriesSpec) (*in).DeepCopyInto(*out) } + if in.PerDBMetricTargets != nil { + in, out := &in.PerDBMetricTargets, &out.PerDBMetricTargets + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationMetricsSpec. diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml deleted file mode 100644 index 298adb06b4..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/11--add-logs-exporter.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/11--add-logs-exporter.yaml -assert: -- files/11-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml new file mode 100644 index 0000000000..8e73e1874e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/11--add-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml new file mode 100644 index 0000000000..10a8645b32 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are present for the single added target. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes not found for pikachu" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml new file mode 100644 index 0000000000..12791e5066 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/13--add-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml deleted file mode 100644 index b4c6f272f6..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/13--cluster-no-backups.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/13--create-cluster.yaml -assert: -- files/13-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml new file mode 100644 index 0000000000..bfbe2b1578 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are present for both added targets. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes not found for pikachu" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes not found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml new file mode 100644 index 0000000000..549f21d55e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/15--remove-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml new file mode 100644 index 0000000000..d75c06827d --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are absent from the targets since they've been removed. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes found for pikachu" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml new file mode 100644 index 0000000000..e1b2ebfeb3 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/17--add-custom-queries-per-db.yaml +assert: +- files/17-custom-queries-per-db-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml new file mode 100644 index 0000000000..a4631bf9e8 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml @@ -0,0 +1,42 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the two metrics that we +# checked for earlier are no longer there. +# Then, check that the two custom metrics that we added are present +# only for the targets that were specified. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { contains "${scrape_metrics}" 'ccp_table_size_bytes_1{dbname="pikachu"'; } || { + retry "custom metric not found for pikachu db" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes_1{dbname="onix"'; } || { + retry "custom metric found for onix db" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes_2{dbname="onix"'; } || { + retry "custom metric not found for onix db" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes_2{dbname="pikachu"'; } || { + retry "custom metric found for pikachu db" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml new file mode 100644 index 0000000000..dc85f9707c --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/19--add-logs-exporter.yaml +assert: +- files/19-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-logs-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/12-assert-logs-exported.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml new file mode 100644 index 0000000000..a24e1c8f2d --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/21--create-cluster.yaml +assert: +- files/21-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/14-assert-instance.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/23--cluster-add-backups.yaml similarity index 50% rename from testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/23--cluster-add-backups.yaml index 3bdd0b37e8..52990e4372 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/15--cluster-add-backups.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/23--cluster-add-backups.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep apply: -- files/15--add-backups.yaml +- files/23--add-backups.yaml assert: -- files/15-backups-added.yaml +- files/23-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/16--remove-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/24--remove-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/16--remove-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/24--remove-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/25--annotate-cluster.yaml similarity index 86% rename from testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/25--annotate-cluster.yaml index 2da3da58a3..d017479ca3 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/17--annotate-cluster.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/25--annotate-cluster.yaml @@ -4,4 +4,4 @@ commands: - command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true" namespaced: true assert: -- files/17-backups-removed.yaml +- files/25-backups-removed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/README.md b/testing/kuttl/e2e/otel-logging-and-metrics/README.md index e14bdd899c..46e3169e71 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/README.md +++ b/testing/kuttl/e2e/otel-logging-and-metrics/README.md @@ -6,9 +6,9 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry ## Process -1. Create a basic cluster with pgbouncer and pgadmin in place. +1. Create a basic cluster with pgbouncer and pgadmin in place. (00) 1. Ensure cluster comes up, that all containers are running and ready, and that the initial backup is complete. -2. Add the `instrumentation` spec to both PostgresCluster and PGAdmin manifests. +2. Add the `instrumentation` spec to both PostgresCluster and PGAdmin manifests. (01-08) 1. Ensure that OTel collector containers and `crunchy-otel-collector` labels are added to the four pods (postgres instance, repo-host, pgbouncer, & pgadmin) and that the collector containers are running and ready. 2. Assert that the instance pod collector is getting postgres and patroni metrics and postgres, patroni, and pgbackrest logs. 3. Assert that the pgbouncer pod collector is getting pgbouncer metrics and logs. @@ -16,14 +16,19 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry 5. Assert that the repo-host pod collector is NOT getting pgbackrest logs. We do not expect logs yet as the initial backup completed and created a log file; however, we configure the collector to only ingest new logs after it has started up. 6. Create a manual backup and ensure that it completes successfully. 7. Ensure that the repo-host pod collector is now getting pgbackrest logs. -3. Add both "add" and "remove" custom queries to the PostgresCluster `instrumentation` spec and create a ConfigMap that holds the custom queries to add. +3. Add both "add" and "remove" custom queries to the PostgresCluster `instrumentation` spec and create a ConfigMap that holds the custom queries to add. (09-10) 1. Ensure that the ConfigMap is created. 2. Assert that the metrics that were removed (which we checked for earlier) are in fact no longer present in the collector metrics. 3. Assert that the custom metrics that were added are present in the collector metrics. -4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors. +4. Exercise per-db metric functionality by adding users, per-db targets, removing metrics from per-db defaults, adding custom metric db target. (11-18) + 1. Add users and per-db target, assert that per-db default metric is available for named target. + 2. Add second per-db target, assert that per-db default metric is available for all named targets. + 3. Remove per-db metric, assert that the per-db default metric is absent for all targets. + 4. Add custom metrics with a specified db, assert that we get that metric just for the specified target. +5. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors. (9-20) 1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready. 2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn). -5. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups. +6. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups. (21-25) 1. Ensure that the cluster comes up and the database and collector containers are running and ready. 2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods. 3. Remove the backups spec from the new cluster. diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml new file mode 100644 index 0000000000..1cf4c28a83 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + users: + - name: ash + databases: + - pikachu + - name: brock + databases: + - onix + instrumentation: + metrics: + perDBMetricTargets: + - pikachu diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml new file mode 100644 index 0000000000..c383238be9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + perDBMetricTargets: + - pikachu + - onix diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml new file mode 100644 index 0000000000..4421de8482 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + customQueries: + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes + - ccp_table_size_bytes diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml new file mode 100644 index 0000000000..92360a4a9a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + customQueries: + add: + - name: custom1 + databases: [pikachu, onix] + queries: + name: my-custom-queries2 + key: custom1.yaml + - name: custom2 + databases: [onix] + queries: + name: my-custom-queries2 + key: custom2.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries2 +data: + custom1.yaml: | + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes_1 + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + custom2.yaml: | + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes_2 + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" \ No newline at end of file diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml new file mode 100644 index 0000000000..5bd9cec286 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml @@ -0,0 +1,124 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries2 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-logs-exporter.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/11-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/11-logs-exporter-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/13--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/21--create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/13--create-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/21--create-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/13-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/21-cluster-created.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/13-cluster-created.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/21-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/15--add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/23--add-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/15--add-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/23--add-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/15-backups-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/23-backups-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/15-backups-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/23-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/17-backups-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/25-backups-removed.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/17-backups-removed.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/25-backups-removed.yaml From c1c68025735b3d7b2578ba2b36dd12ad9f6840ae Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 28 May 2025 12:56:09 -0700 Subject: [PATCH 39/79] Add ability to add environment variables to the collector container. Allows users to authenticate exporters with major cloud providers. --- ...res-operator.crunchydata.com_pgadmins.yaml | 129 ++++++++++++++++++ ...ator.crunchydata.com_postgresclusters.yaml | 129 ++++++++++++++++++ internal/collector/instance.go | 5 + .../v1beta1/instrumentation_types.go | 9 ++ .../v1beta1/zz_generated.deepcopy.go | 7 + 5 files changed, 279 insertions(+) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index c61f9a000a..f38e46bfbf 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -1692,6 +1692,135 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + environmentVariables: + description: |- + EnvironmentVariables allows the user to add environment variables to the + collector container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must + be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: Cannot overwrite environment variables set by + operator + rule: self.name != 'K8S_POD_NAMESPACE' && self.name != + 'K8S_POD_NAME' && self.name != 'PGPASSWORD' + minItems: 1 + type: array + x-kubernetes-list-type: atomic exporters: description: |- Exporters allows users to configure OpenTelemetry exporters that exist diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index c56769b5c0..60cfcf0f08 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -11534,6 +11534,135 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + environmentVariables: + description: |- + EnvironmentVariables allows the user to add environment variables to the + collector container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must + be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: Cannot overwrite environment variables set by + operator + rule: self.name != 'K8S_POD_NAMESPACE' && self.name != + 'K8S_POD_NAME' && self.name != 'PGPASSWORD' + minItems: 1 + type: array + x-kubernetes-list-type: atomic exporters: description: |- Exporters allows users to configure OpenTelemetry exporters that exist diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 8158d9dda3..9cb1708042 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -116,6 +116,11 @@ func AddToPod( VolumeMounts: append(volumeMounts, configVolumeMount), } + // Add any user specified environment variables to the collector container + if spec.Config != nil && spec.Config.EnvironmentVariables != nil { + container.Env = append(container.Env, spec.Config.EnvironmentVariables...) + } + // If metrics feature is enabled and this Pod serves metrics, add the // Prometheus port to this container if feature.Enabled(ctx, feature.OpenTelemetryMetrics) && thisPodServesMetrics { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go index 9481d748cc..dfefccd6de 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -65,6 +65,15 @@ type InstrumentationConfigSpec struct { // +listType=atomic // +optional Files []corev1.VolumeProjection `json:"files,omitempty"` + + // EnvironmentVariables allows the user to add environment variables to the + // collector container. + // --- + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:items:XValidation:rule=`self.name != 'K8S_POD_NAMESPACE' && self.name != 'K8S_POD_NAME' && self.name != 'PGPASSWORD'`,message="Cannot overwrite environment variables set by operator" + // +listType=atomic + // +optional + EnvironmentVariables []corev1.EnvVar `json:"environmentVariables,omitempty"` } // InstrumentationLogsSpec defines the configuration for collecting logs via diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 7d12eccefb..d25ac44d1e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -460,6 +460,13 @@ func (in *InstrumentationConfigSpec) DeepCopyInto(out *InstrumentationConfigSpec (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.EnvironmentVariables != nil { + in, out := &in.EnvironmentVariables, &out.EnvironmentVariables + *out = make([]corev1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationConfigSpec. From 0719236515ceda53c36c630e87df17be8787b1cb Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 30 May 2025 14:54:50 -0700 Subject: [PATCH 40/79] OTel: Allow users to add metrics exporters. --- .../postgres-operator.crunchydata.com_pgadmins.yaml | 7 +++++++ ...gres-operator.crunchydata.com_postgresclusters.yaml | 7 +++++++ internal/collector/naming.go | 2 +- internal/collector/patroni.go | 10 +++++++++- internal/collector/pgbouncer.go | 10 +++++++++- internal/collector/postgres_metrics.go | 10 +++++++++- .../v1beta1/instrumentation_types.go | 7 +++++++ .../v1beta1/zz_generated.deepcopy.go | 5 +++++ 8 files changed, 54 insertions(+), 4 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index f38e46bfbf..6b3dcca78b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -2301,6 +2301,13 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set perDBMetricTargets: description: User defined databases to target for default per-db metrics diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 60cfcf0f08..483bce91ad 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -12143,6 +12143,13 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set perDBMetricTargets: description: User defined databases to target for default per-db metrics diff --git a/internal/collector/naming.go b/internal/collector/naming.go index 801d61e8ce..c12ed89ebc 100644 --- a/internal/collector/naming.go +++ b/internal/collector/naming.go @@ -9,7 +9,7 @@ const DebugExporter = "debug" const LogsBatchProcessor = "batch/logs" const OneSecondBatchProcessor = "batch/1s" const SubSecondBatchProcessor = "batch/200ms" -const Prometheus = "prometheus" +const Prometheus = "prometheus/cpk-monitoring" const PrometheusPort = 9187 const PGBouncerMetrics = "metrics/pgbouncer" const PostgresMetrics = "metrics/postgres" diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index 2e0edb0d15..ea11c7a2f9 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -168,6 +168,14 @@ func EnablePatroniMetrics(ctx context.Context, }, } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline outConfig.Pipelines[PatroniMetrics] = Pipeline{ Receivers: []ComponentID{Prometheus}, @@ -175,7 +183,7 @@ func EnablePatroniMetrics(ctx context.Context, SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 700b9a3725..2e2bb99c56 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -187,6 +187,14 @@ func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresClus "queries": slices.Clone(pgBouncerMetricsQueries), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PGBouncerMetrics] = Pipeline{ Receivers: []ComponentID{SqlQuery}, @@ -194,7 +202,7 @@ func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresClus SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index 072ec6987a..f3d5371cc6 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -171,6 +171,14 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "queries": slices.Clone(fiveMinuteMetricsClone), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PostgresMetrics] = Pipeline{ Receivers: []ComponentID{FiveSecondSqlQuery, FiveMinuteSqlQuery}, @@ -178,7 +186,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } // Add custom queries and per-db metrics if they are defined in the spec diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go index dfefccd6de..7c90b6f65e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -117,6 +117,13 @@ type InstrumentationMetricsSpec struct { // +optional CustomQueries *InstrumentationCustomQueriesSpec `json:"customQueries,omitempty"` + // The names of exporters that should send metrics. + // --- + // +kubebuilder:validation:MinItems=1 + // +listType=set + // +optional + Exporters []string `json:"exporters,omitempty"` + // User defined databases to target for default per-db metrics // --- // +optional diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index d25ac44d1e..747e363854 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -570,6 +570,11 @@ func (in *InstrumentationMetricsSpec) DeepCopyInto(out *InstrumentationMetricsSp *out = new(InstrumentationCustomQueriesSpec) (*in).DeepCopyInto(*out) } + if in.Exporters != nil { + in, out := &in.Exporters, &out.Exporters + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.PerDBMetricTargets != nil { in, out := &in.PerDBMetricTargets, &out.PerDBMetricTargets *out = make([]string, len(*in)) From b85859f577d671cf436668b6c6b0995ac8628145 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 4 Jun 2025 00:03:11 -0700 Subject: [PATCH 41/79] OTel: Add tests for metrics exporters. --- internal/collector/helpers_test.go | 3 + internal/collector/patroni_test.go | 140 ++++++- internal/collector/pgadmin_test.go | 6 +- internal/collector/pgbackrest_test.go | 6 +- internal/collector/pgbouncer_test.go | 364 +++++++++++++++++- internal/collector/postgres_test.go | 140 ++++++- .../19--add-logs-exporter.yaml | 6 - .../19--add-logs-metrics-exporter.yaml | 6 + ...l => 20-assert-logs-metrics-exported.yaml} | 6 + ...aml => 19--add-logs-metrics-exporter.yaml} | 4 + ...ml => 19-logs-metrics-exporter-added.yaml} | 0 11 files changed, 655 insertions(+), 26 deletions(-) delete mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml create mode 100644 testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml rename testing/kuttl/e2e/otel-logging-and-metrics/{20-assert-logs-exported.yaml => 20-assert-logs-metrics-exported.yaml} (90%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{19--add-logs-exporter.yaml => 19--add-logs-metrics-exporter.yaml} (98%) rename testing/kuttl/e2e/otel-logging-and-metrics/files/{19-logs-exporter-added.yaml => 19-logs-metrics-exporter-added.yaml} (100%) diff --git a/internal/collector/helpers_test.go b/internal/collector/helpers_test.go index 7f1e277e9b..1f174ebcda 100644 --- a/internal/collector/helpers_test.go +++ b/internal/collector/helpers_test.go @@ -23,6 +23,9 @@ func testInstrumentationSpec() *v1beta1.InstrumentationSpec { Logs: &v1beta1.InstrumentationLogsSpec{ Exporters: []string{"googlecloud"}, }, + Metrics: &v1beta1.InstrumentationMetricsSpec{ + Exporters: []string{"googlecloud"}, + }, } return spec.DeepCopy() diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index 20dd8096eb..1626f92256 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -16,7 +16,7 @@ import ( ) func TestEnablePatroniLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -26,9 +26,7 @@ func TestEnablePatroniLogging(t *testing.T) { config := NewConfig(nil) cluster := new(v1beta1.PostgresCluster) require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) EnablePatroniLogging(ctx, cluster, config) @@ -216,3 +214,137 @@ service: `) }) } + +func TestEnablePatroniMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + + }) +} diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index b856baab0c..2c1a4eb05a 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -20,7 +20,7 @@ import ( ) func TestEnablePgAdminLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -31,9 +31,7 @@ func TestEnablePgAdminLogging(t *testing.T) { configmap := new(corev1.ConfigMap) initialize.Map(&configmap.Data) var instrumentation *v1beta1.InstrumentationSpec - require.UnmarshalInto(t, &instrumentation, `{ - logs: { retentionPeriod: 12h }, - }`) + require.UnmarshalInto(t, &instrumentation, `{}`) err := collector.EnablePgAdminLogging(ctx, instrumentation, configmap) assert.NilError(t, err) diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index 66e180ef1f..911f0f0909 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -16,7 +16,7 @@ import ( ) func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -29,9 +29,7 @@ func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { }, } var instrumentation *v1beta1.InstrumentationSpec - require.UnmarshalInto(t, &instrumentation, `{ - logs: { retentionPeriod: 12h }, - }`) + require.UnmarshalInto(t, &instrumentation, `{}`) config := NewConfigForPgBackrestRepoHostPod(ctx, instrumentation, repos) diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index cbd69cbd03..1589c27079 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -16,7 +16,7 @@ import ( ) func TestEnablePgBouncerLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -26,9 +26,7 @@ func TestEnablePgBouncerLogging(t *testing.T) { config := NewConfig(nil) cluster := new(v1beta1.PostgresCluster) require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) EnablePgBouncerLogging(ctx, cluster, config) @@ -214,3 +212,361 @@ service: `) }) } + +func TestEnablePgBouncerMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(testInstrumentationSpec()) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + + }) +} diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 222b263e25..a36a827b3b 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -17,7 +17,7 @@ import ( ) func TestEnablePostgresLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -27,9 +27,7 @@ func TestEnablePostgresLogging(t *testing.T) { cluster := new(v1beta1.PostgresCluster) cluster.Spec.PostgresVersion = 99 require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) config := NewConfig(nil) @@ -537,3 +535,137 @@ service: `) }) } + +func TestEnablePostgresMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + config := NewConfig(nil) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + cluster.Spec.Instrumentation = testInstrumentationSpec() + + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + + }) +} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml deleted file mode 100644 index dc85f9707c..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/19--add-logs-exporter.yaml -assert: -- files/19-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml new file mode 100644 index 0000000000..7b21e0ef50 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/19--add-logs-metrics-exporter.yaml +assert: +- files/19-logs-metrics-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml similarity index 90% rename from testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml index 8b86743cc0..2022397ce9 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml @@ -44,3 +44,9 @@ commands: retry "gunicorn logs not found" exit 1 } + + metrics=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep ccp) + { contains "${metrics}" 'ccp_stat'; } || { + retry "metrics not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml similarity index 98% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml index 9943f61341..67926505c0 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml @@ -35,6 +35,7 @@ spec: pgBouncer: {} instrumentation: metrics: + exporters: ['otlp'] customQueries: add: - name: slow-custom-queries @@ -121,6 +122,9 @@ data: logs/1: receivers: [otlp] exporters: [debug] + metrics/1: + receivers: [otlp] + exporters: [debug] --- apiVersion: v1 kind: Service diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml From a90041f2dd3a1943ec3a81d9d95ade6d37838569 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 23 May 2025 13:38:04 -0500 Subject: [PATCH 42/79] Scan for secrets during every merge request Issue: PGO-2490 --- .gitlab-ci.yml | 11 +++++++++-- .trivyignore.yaml | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 .trivyignore.yaml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0e611a6309..371a58d2f9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,6 +29,13 @@ workflow: ($CI_PIPELINE_SOURCE == "schedule") || ($CI_PIPELINE_SOURCE == "web") +include: + - component: ${CI_SERVER_FQDN}/containers/gitlab/check-directory-secrets@main + inputs: + job-name: must-not-commit-secrets + job-stage: build + trivy-ignore: .trivyignore.yaml + variables: # https://docs.gitlab.com/runner/configuration/feature-flags # Show the duration of individual script items in the job log. @@ -192,12 +199,12 @@ trivy: bash 'contrib/install.sh' -b "${HOME}/bin" "${VERSION}" ) - # Generate a report and fail when there are issues that can be fixed. + # Generate a report and fail when there are issues with dependencies. # Trivy needs a populated Go module cache to detect Go module licenses. - go mod download - >- trivy filesystem . --exit-code 1 - --scanners license,secret,vuln + --scanners license,vuln --ignore-unfixed --no-progress --format template diff --git a/.trivyignore.yaml b/.trivyignore.yaml new file mode 100644 index 0000000000..b275e406fa --- /dev/null +++ b/.trivyignore.yaml @@ -0,0 +1,12 @@ +# Copyright Crunchy Data Solutions, Inc. All rights reserved. +# +# https://trivy.dev/latest/docs/configuration/filtering/#trivyignoreyaml + +secrets: + - id: jwt-token + paths: + - internal/testing/token_* + + - id: private-key + paths: + - internal/pki/*_test.go From c06abf9c2da4ecb591e6312bf99d08ef3976b67c Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Mon, 9 Jun 2025 13:50:08 -0500 Subject: [PATCH 43/79] Add process name to OTel (#4192) * Add process name to OTel Issue: [PGO-2426] --- internal/collector/patroni.go | 1 + internal/collector/patroni_test.go | 6 ++++++ internal/collector/pgadmin.go | 1 + internal/collector/pgadmin_test.go | 6 ++++++ internal/collector/pgbackrest.go | 1 + internal/collector/pgbackrest_test.go | 6 ++++++ internal/collector/pgbouncer.go | 1 + internal/collector/pgbouncer_test.go | 6 ++++++ internal/collector/postgres.go | 2 ++ internal/collector/postgres_test.go | 12 ++++++++++++ 10 files changed, 42 insertions(+) diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index ea11c7a2f9..e3b56718d8 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -65,6 +65,7 @@ func EnablePatroniLogging(ctx context.Context, {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "patroni"}, }, } diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index 1626f92256..3a37b14697 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -63,6 +63,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: patroni resourcedetection: detectors: [] override: false @@ -163,6 +166,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: patroni resourcedetection: detectors: [] override: false diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go index c5cd147df8..244fc57546 100644 --- a/internal/collector/pgadmin.go +++ b/internal/collector/pgadmin.go @@ -54,6 +54,7 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGAdmin}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgadmin"}, }, } diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index 2c1a4eb05a..c6c86b4b37 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -67,6 +67,9 @@ collector.yaml: | - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgadmin resourcedetection: detectors: [] override: false @@ -192,6 +195,9 @@ collector.yaml: | - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgadmin resourcedetection: detectors: [] override: false diff --git a/internal/collector/pgbackrest.go b/internal/collector/pgbackrest.go index 4fa6f5c1fc..75cc9a55c1 100644 --- a/internal/collector/pgbackrest.go +++ b/internal/collector/pgbackrest.go @@ -87,6 +87,7 @@ func NewConfigForPgBackrestRepoHostPod( {"action": "insert", "key": "k8s.container.name", "value": naming.PGBackRestRepoContainerName}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbackrest"}, }, } diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index 911f0f0909..2b26d40531 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -65,6 +65,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resourcedetection: detectors: [] override: false @@ -172,6 +175,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resourcedetection: detectors: [] override: false diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 2e2bb99c56..785b2b187e 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -90,6 +90,7 @@ func EnablePgBouncerLogging(ctx context.Context, {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGBouncer}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbouncer"}, }, } diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index 1589c27079..34f2ccf328 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -62,6 +62,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbouncer resourcedetection: detectors: [] override: false @@ -162,6 +165,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbouncer resourcedetection: detectors: [] override: false diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index c98ba4e98b..a926639097 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -197,6 +197,7 @@ func EnablePostgresLogging( {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "postgres"}, // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database#readme {"action": "insert", "key": "db.system", "value": "postgresql"}, @@ -276,6 +277,7 @@ func EnablePostgresLogging( {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbackrest"}, }, } diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index a36a827b3b..89f5f52255 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -71,6 +71,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resource/postgres: attributes: - action: insert @@ -82,6 +85,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: postgres - action: insert key: db.system value: postgresql @@ -331,6 +337,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resource/postgres: attributes: - action: insert @@ -342,6 +351,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: postgres - action: insert key: db.system value: postgresql From 55fa1ca4c47166447ec9311302a81d568ee37935 Mon Sep 17 00:00:00 2001 From: vbeaucha Date: Tue, 17 Jun 2025 11:01:06 +0200 Subject: [PATCH 44/79] Include logrotate on collector if local volumes are setup --- internal/controller/postgrescluster/instance.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index e24c0aca7b..eed6b7f538 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1220,10 +1220,12 @@ func (r *Reconciler) reconcileInstance( // For now, we are not using logrotate to rotate postgres or patroni logs, // but we are using it for pgbackrest logs in the postgres pod, so we will - // set includeLogrotate to true, but only if backups are enabled. + // set includeLogrotate to true, but only if backups are enabled + // and local volumes are available. + includeLogrotate := backupsSpecFound && pgbackrest.RepoHostVolumeDefined(cluster) collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template, []corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword, - []string{naming.PGBackRestPGDataLogPath}, backupsSpecFound, true) + []string{naming.PGBackRestPGDataLogPath}, includeLogrotate, true) } // Add postgres-exporter to the instance Pod spec From df6bdf5841adf038b180f795070bdd282f150ff9 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 21 Jul 2025 10:38:37 -0700 Subject: [PATCH 45/79] Remove permanent dedicated repo host and make archiving to cloud repos go through pg instance. This essentially reverts commit cfa2839. Clean up some markdown files. Add test case for pgbackrest conf files when cloud repo present but no dedicated repo host present. --- .../controller/postgrescluster/instance.go | 6 +- .../postgrescluster/instance_test.go | 105 +----------- .../controller/postgrescluster/pgbackrest.go | 159 +++++++++++++----- .../postgrescluster/pgbackrest_test.go | 148 ++++++++++++---- internal/naming/annotations.go | 8 + internal/naming/annotations_test.go | 1 + internal/naming/selectors.go | 7 + internal/naming/selectors_test.go | 10 ++ internal/pgbackrest/config.go | 8 +- internal/pgbackrest/config.md | 44 +++-- internal/pgbackrest/config_test.go | 48 ++++++ internal/pgbackrest/reconcile.go | 33 ++-- internal/pgbackrest/reconcile_test.go | 25 +-- internal/pgbackrest/tls-server.md | 39 ++--- 14 files changed, 372 insertions(+), 269 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index eed6b7f538..2d50bdd843 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1397,8 +1397,10 @@ func addPGBackRestToInstancePodSpec( ctx context.Context, cluster *v1beta1.PostgresCluster, instanceCertificates *corev1.Secret, instancePod *corev1.PodSpec, ) { - pgbackrest.AddServerToInstancePod(ctx, cluster, instancePod, - instanceCertificates.Name) + if pgbackrest.RepoHostVolumeDefined(cluster) { + pgbackrest.AddServerToInstancePod(ctx, cluster, instancePod, + instanceCertificates.Name) + } pgbackrest.AddConfigToInstancePod(cluster, instancePod) } diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index 2381b4cb5b..f31b38624c 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -565,104 +565,14 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { readOnly: true - name: other resources: {} -- command: - - pgbackrest - - server - livenessProbe: - exec: - command: - - pgbackrest - - server-ping - name: pgbackrest - resources: {} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - seccompProfile: - type: RuntimeDefault - volumeMounts: - - mountPath: /etc/pgbackrest/server - name: pgbackrest-server - readOnly: true - - mountPath: /pgdata - name: postgres-data - - mountPath: /pgwal - name: postgres-wal - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true -- command: - - bash - - -ceu - - -- - - |- - monitor() { - exec {fd}<> <(:||:) - until read -r -t 5 -u "${fd}"; do - if - [[ "${filename}" -nt "/proc/self/fd/${fd}" ]] && - pkill -HUP --exact --parent=0 pgbackrest - then - exec {fd}>&- && exec {fd}<> <(:||:) - stat --dereference --format='Loaded configuration dated %y' "${filename}" - elif - { [[ "${directory}" -nt "/proc/self/fd/${fd}" ]] || - [[ "${authority}" -nt "/proc/self/fd/${fd}" ]] - } && - pkill -HUP --exact --parent=0 pgbackrest - then - exec {fd}>&- && exec {fd}<> <(:||:) - stat --format='Loaded certificates dated %y' "${directory}" - fi - done - }; export directory="$1" authority="$2" filename="$3"; export -f monitor; exec -a "$0" bash -ceu monitor - - pgbackrest-config - - /etc/pgbackrest/server - - /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt - - /etc/pgbackrest/conf.d/~postgres-operator_server.conf - name: pgbackrest-config - resources: {} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - seccompProfile: - type: RuntimeDefault - volumeMounts: - - mountPath: /etc/pgbackrest/server - name: pgbackrest-server - readOnly: true - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true `)) - // Instance configuration files with certificates. + // Instance configuration files but no certificates. // Other volumes are ignored. assert.Assert(t, cmp.MarshalMatches(out.Volumes, ` - name: other - name: postgres-data - name: postgres-wal -- name: pgbackrest-server - projected: - sources: - - secret: - items: - - key: pgbackrest-server.crt - path: server-tls.crt - - key: pgbackrest-server.key - mode: 384 - path: server-tls.key - name: some-secret - name: pgbackrest-config projected: sources: @@ -672,19 +582,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { path: pgbackrest_instance.conf - key: config-hash path: config-hash - - key: pgbackrest-server.conf - path: ~postgres-operator_server.conf name: hippo-pgbackrest-config - - secret: - items: - - key: pgbackrest.ca-roots - path: ~postgres-operator/tls-ca.crt - - key: pgbackrest-client.crt - path: ~postgres-operator/client-tls.crt - - key: pgbackrest-client.key - mode: 384 - path: ~postgres-operator/client-tls.key - name: hippo-pgbackrest `)) }) @@ -733,6 +631,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) } diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index 454c308bc9..f235d8e828 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -323,10 +324,12 @@ func (r *Reconciler) cleanupRepoResources(ctx context.Context, // TODO(tjmoore4): This can be removed once 5.0 is EOL. if owned.GetName() != naming.PGBackRestSSHConfig(postgresCluster).Name && owned.GetName() != naming.PGBackRestSSHSecret(postgresCluster).Name { - // If a dedicated repo host resource and a dedicated repo host is enabled, then - // add to the slice and do not delete. - ownedNoDelete = append(ownedNoDelete, owned) - delete = false + // If it is a dedicated repo host resource and a dedicated repo + // host is enabled, then add to the slice and do not delete. + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + ownedNoDelete = append(ownedNoDelete, owned) + delete = false + } } case hasLabel(naming.LabelPGBackRestRepoVolume): if !backupsSpecFound { @@ -686,30 +689,29 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster pgbackrest.AddServerToRepoPod(ctx, postgresCluster, &repo.Spec.Template.Spec) - if pgbackrest.RepoHostVolumeDefined(postgresCluster) { - // add the init container to make the pgBackRest repo volume log directory - pgBackRestLogPath := pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) + // add the init container to make the pgBackRest repo volume log directory + pgBackRestLogPath := pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) - containersToAdd := []string{naming.PGBackRestRepoContainerName} + containersToAdd := []string{naming.PGBackRestRepoContainerName} - // If OpenTelemetryLogs is enabled, we want to add the collector to the pod - // and also add the RepoVolumes to the container. - if collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { - collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, - &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, - &repo.Spec.Template, []corev1.VolumeMount{}, "", - []string{pgBackRestLogPath}, true, false) + // If OpenTelemetryLogs is enabled, we want to add the collector to the pod + // and also add the RepoVolumes to the container. + if collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { + collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, + &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, + &repo.Spec.Template, []corev1.VolumeMount{}, "", + []string{pgBackRestLogPath}, true, false) - containersToAdd = append(containersToAdd, naming.ContainerCollector) - } + containersToAdd = append(containersToAdd, naming.ContainerCollector) + } - // add pgBackRest repo volumes to pod and to containers - if err := pgbackrest.AddRepoVolumesToPod(postgresCluster, &repo.Spec.Template, - getRepoPVCNames(postgresCluster, repoResources.pvcs), - containersToAdd...); err != nil { - return nil, errors.WithStack(err) - } + // add pgBackRest repo volumes to pod and to containers + if err := pgbackrest.AddRepoVolumesToPod(postgresCluster, &repo.Spec.Template, + getRepoPVCNames(postgresCluster, repoResources.pvcs), + containersToAdd...); err != nil { + return nil, errors.WithStack(err) } + // add configs to pod pgbackrest.AddConfigToRepoPod(postgresCluster, &repo.Spec.Template.Spec) @@ -778,7 +780,12 @@ func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresC // generateBackupJobSpecIntent generates a JobSpec for a pgBackRest backup job func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repo v1beta1.PGBackRestRepo, serviceAccountName string, - labels, annotations map[string]string, opts ...string) *batchv1.JobSpec { + labels, annotations map[string]string, opts ...string) (*batchv1.JobSpec, error) { + + selector, containerName, err := getPGBackRestExecSelector(postgresCluster, repo) + if err != nil { + return nil, errors.WithStack(err) + } repoIndex := regexRepoIndex.FindString(repo.Name) cmdOpts := []string{ @@ -798,9 +805,9 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P {Name: "COMMAND", Value: "backup"}, {Name: "COMMAND_OPTS", Value: strings.Join(cmdOpts, " ")}, {Name: "COMPARE_HASH", Value: "true"}, - {Name: "CONTAINER", Value: naming.PGBackRestRepoContainerName}, + {Name: "CONTAINER", Value: containerName}, {Name: "NAMESPACE", Value: postgresCluster.GetNamespace()}, - {Name: "SELECTOR", Value: naming.PGBackRestDedicatedSelector(postgresCluster.GetName()).String()}, + {Name: "SELECTOR", Value: selector.String()}, }, Image: config.PGBackRestContainerImage(postgresCluster), ImagePullPolicy: postgresCluster.Spec.ImagePullPolicy, @@ -861,9 +868,13 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P jobSpec.Template.Spec.ImagePullSecrets = postgresCluster.Spec.ImagePullSecrets // add pgBackRest configs to template - pgbackrest.AddConfigToRepoPod(postgresCluster, &jobSpec.Template.Spec) + if containerName == naming.PGBackRestRepoContainerName { + pgbackrest.AddConfigToRepoPod(postgresCluster, &jobSpec.Template.Spec) + } else { + pgbackrest.AddConfigToInstancePod(postgresCluster, &jobSpec.Template.Spec) + } - return jobSpec + return jobSpec, nil } // +kubebuilder:rbac:groups="",resources="configmaps",verbs={delete,list} @@ -1412,14 +1423,19 @@ func (r *Reconciler) reconcilePGBackRest(ctx context.Context, var repoHost *appsv1.StatefulSet var repoHostName string - // reconcile the pgbackrest repository host - repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances, repoHostSA.GetName()) - if err != nil { - log.Error(err, "unable to reconcile pgBackRest repo host") - result.Requeue = true - return result, nil + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + // reconcile the pgbackrest repository host + repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances, repoHostSA.GetName()) + if err != nil { + log.Error(err, "unable to reconcile pgBackRest repo host") + result.Requeue = true + return result, nil + } + repoHostName = repoHost.GetName() + } else { + // remove the dedicated repo host status if a dedicated host is not enabled + meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, ConditionRepoHostReady) } - repoHostName = repoHost.GetName() if err := r.reconcilePGBackRestSecret(ctx, postgresCluster, repoHost, rootCA); err != nil { log.Error(err, "unable to reconcile pgBackRest secret") @@ -2023,6 +2039,8 @@ func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) error { + log := logging.FromContext(ctx).WithValues("reconcileResource", "repoConfig") + backrestConfig, err := pgbackrest.CreatePGBackRestConfigMapIntent(ctx, postgresCluster, repoHostName, configHash, serviceName, serviceNamespace, instanceNames) if err != nil { @@ -2036,6 +2054,12 @@ func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, return errors.WithStack(err) } + repoHostConfigured := pgbackrest.RepoHostVolumeDefined(postgresCluster) + if !repoHostConfigured { + log.V(1).Info("skipping SSH reconciliation, no repo hosts configured") + return nil + } + return nil } @@ -2349,11 +2373,13 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, return nil } - // determine if the dedicated repository host is ready using the repo host ready + // determine if the dedicated repository host is ready (if enabled) using the repo host ready // condition, and return if not - repoCondition := meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) - if repoCondition == nil || repoCondition.Status != metav1.ConditionTrue { - return nil + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + repoCondition := meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) + if repoCondition == nil || repoCondition.Status != metav1.ConditionTrue { + return nil + } } // Determine if the replica create backup is complete and return if not. This allows for proper @@ -2435,8 +2461,11 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, repo, + spec, err := generateBackupJobSpecIntent(ctx, postgresCluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) + if err != nil { + return errors.WithStack(err) + } backupJob.Spec = *spec @@ -2524,6 +2553,13 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, replicaRepoReady = (condition.Status == metav1.ConditionTrue) } + // get pod name and container name as needed to exec into the proper pod and create + // the pgBackRest backup + _, containerName, err := getPGBackRestExecSelector(postgresCluster, replicaCreateRepo) + if err != nil { + return errors.WithStack(err) + } + // determine if the dedicated repository host is ready using the repo host ready status var dedicatedRepoReady bool condition = meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) @@ -2550,10 +2586,14 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, // - The job has failed. The Job will be deleted and recreated to try again. // - The replica creation repo has changed since the Job was created. Delete and recreate // with the Job with the proper repo configured. + // - The "config" annotation has changed, indicating there is a new primary. Delete and + // recreate the Job with the proper config mounted (applicable when a dedicated repo + // host is not enabled). // - The "config hash" annotation has changed, indicating a configuration change has been // made in the spec (specifically a change to the config for an external repo). Delete // and recreate the Job with proper hash per the current config. if failed || replicaCreateRepoChanged || + (job.GetAnnotations()[naming.PGBackRestCurrentConfig] != containerName) || (job.GetAnnotations()[naming.PGBackRestConfigHash] != configHash) { if err := r.Client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { @@ -2569,9 +2609,10 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, } } + dedicatedEnabled := pgbackrest.RepoHostVolumeDefined(postgresCluster) // return if no job has been created and the replica repo or the dedicated // repo host is not ready - if job == nil && (!dedicatedRepoReady || !replicaRepoReady) { + if job == nil && ((dedicatedEnabled && !dedicatedRepoReady) || !replicaRepoReady) { return nil } @@ -2590,13 +2631,17 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, annotations = naming.Merge(postgresCluster.Spec.Metadata.GetAnnotationsOrNil(), postgresCluster.Spec.Backups.PGBackRest.Metadata.GetAnnotationsOrNil(), map[string]string{ - naming.PGBackRestConfigHash: configHash, + naming.PGBackRestCurrentConfig: containerName, + naming.PGBackRestConfigHash: configHash, }) backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, + spec, err := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, serviceAccount.GetName(), labels, annotations) + if err != nil { + return errors.WithStack(err) + } backupJob.Spec = *spec @@ -2778,6 +2823,27 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, return false, nil } +// getPGBackRestExecSelector returns a selector and container name that allows the proper +// Pod (along with a specific container within it) to be found within the Kubernetes +// cluster as needed to exec into the container and run a pgBackRest command. +func getPGBackRestExecSelector(postgresCluster *v1beta1.PostgresCluster, + repo v1beta1.PGBackRestRepo) (labels.Selector, string, error) { + + var err error + var podSelector labels.Selector + var containerName string + + if repo.Volume != nil { + podSelector = naming.PGBackRestDedicatedSelector(postgresCluster.GetName()) + containerName = naming.PGBackRestRepoContainerName + } else { + podSelector, err = naming.AsSelector(naming.ClusterPrimary(postgresCluster.GetName())) + containerName = naming.ContainerDatabase + } + + return podSelector, containerName, err +} + // getRepoHostStatus is responsible for returning the pgBackRest status for the // provided pgBackRest repository host func getRepoHostStatus(repoHost *appsv1.StatefulSet) *v1beta1.RepoHostStatus { @@ -3022,8 +3088,11 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set backup type (i.e. "full", "diff", "incr") backupOpts := []string{"--type=" + backupType} - jobSpec := generateBackupJobSpecIntent(ctx, cluster, repo, + jobSpec, err := generateBackupJobSpecIntent(ctx, cluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) + if err != nil { + return errors.WithStack(err) + } // Suspend cronjobs when shutdown or read-only. Any jobs that have already // started will continue. @@ -3056,7 +3125,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set metadata pgBackRestCronJob.SetGroupVersionKind(batchv1.SchemeGroupVersion.WithKind("CronJob")) - err := errors.WithStack(r.setControllerReference(cluster, pgBackRestCronJob)) + err = errors.WithStack(r.setControllerReference(cluster, pgBackRestCronJob)) if err == nil { err = r.apply(ctx, pgBackRestCronJob) diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index 582fda0773..a4f2e87a93 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -886,6 +886,52 @@ func TestReconcileStanzaCreate(t *testing.T) { } } +func TestGetPGBackRestExecSelector(t *testing.T) { + + testCases := []struct { + cluster *v1beta1.PostgresCluster + repo v1beta1.PGBackRestRepo + desc string + expectedSelector string + expectedContainer string + }{{ + desc: "volume repo defined dedicated repo host enabled", + cluster: &v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "hippo"}, + }, + repo: v1beta1.PGBackRestRepo{ + Name: "repo1", + Volume: &v1beta1.RepoPVC{}, + }, + expectedSelector: "postgres-operator.crunchydata.com/cluster=hippo," + + "postgres-operator.crunchydata.com/pgbackrest=," + + "postgres-operator.crunchydata.com/pgbackrest-dedicated=", + expectedContainer: "pgbackrest", + }, { + desc: "cloud repo defined no repo host enabled", + cluster: &v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "hippo"}, + }, + repo: v1beta1.PGBackRestRepo{ + Name: "repo1", + S3: &v1beta1.RepoS3{}, + }, + expectedSelector: "postgres-operator.crunchydata.com/cluster=hippo," + + "postgres-operator.crunchydata.com/instance," + + "postgres-operator.crunchydata.com/role=master", + expectedContainer: "database", + }} + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + selector, container, err := getPGBackRestExecSelector(tc.cluster, tc.repo) + assert.NilError(t, err) + assert.Assert(t, selector.String() == tc.expectedSelector) + assert.Assert(t, container == tc.expectedContainer) + }) + } +} + func TestReconcileReplicaCreateBackup(t *testing.T) { // Garbage collector cleans up test resources before the test completes if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { @@ -970,13 +1016,17 @@ func TestReconcileReplicaCreateBackup(t *testing.T) { } assert.Assert(t, foundOwnershipRef) - var foundHashAnnotation bool + var foundConfigAnnotation, foundHashAnnotation bool // verify annotations for k, v := range backupJob.GetAnnotations() { + if k == naming.PGBackRestCurrentConfig && v == naming.PGBackRestRepoContainerName { + foundConfigAnnotation = true + } if k == naming.PGBackRestConfigHash && v == configHash { foundHashAnnotation = true } } + assert.Assert(t, foundConfigAnnotation) assert.Assert(t, foundHashAnnotation) // verify container & env vars @@ -1697,11 +1747,11 @@ func TestGetPGBackRestResources(t *testing.T) { jobCount: 0, pvcCount: 0, hostCount: 1, }, }, { - desc: "no dedicated repo host defined, dedicated sts not deleted", + desc: "no dedicated repo host defined delete dedicated sts", createResources: []client.Object{ &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: "keep-dedicated-two", + Name: "delete-dedicated", Namespace: namespace, Labels: naming.PGBackRestDedicatedLabels(clusterName), }, @@ -1730,8 +1780,43 @@ func TestGetPGBackRestResources(t *testing.T) { }, }, result: testResult{ - // Host count is 2 due to previous repo host sts not being deleted. - jobCount: 0, pvcCount: 0, hostCount: 2, + jobCount: 0, pvcCount: 0, hostCount: 0, + }, + }, { + desc: "no repo host defined delete dedicated sts", + createResources: []client.Object{ + &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "delete-dedicated-no-repo-host", + Namespace: namespace, + Labels: naming.PGBackRestDedicatedLabels(clusterName), + }, + Spec: appsv1.StatefulSetSpec{ + Selector: metav1.SetAsLabelSelector( + naming.PGBackRestDedicatedLabels(clusterName)), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: naming.PGBackRestDedicatedLabels(clusterName), + }, + Spec: corev1.PodSpec{}, + }, + }, + }, + }, + cluster: &v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + UID: types.UID(clusterUID), + }, + Spec: v1beta1.PostgresClusterSpec{ + Backups: v1beta1.Backups{ + PGBackRest: v1beta1.PGBackRestArchive{}, + }, + }, + }, + result: testResult{ + jobCount: 0, pvcCount: 0, hostCount: 0, }, }} @@ -2563,11 +2648,12 @@ func TestCopyConfigurationResources(t *testing.T) { func TestGenerateBackupJobIntent(t *testing.T) { ctx := context.Background() t.Run("empty", func(t *testing.T) { - spec := generateBackupJobSpecIntent(ctx, + spec, err := generateBackupJobSpecIntent(ctx, &v1beta1.PostgresCluster{}, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` containers: - command: @@ -2580,10 +2666,10 @@ containers: - name: COMPARE_HASH value: "true" - name: CONTAINER - value: pgbackrest + value: database - name: NAMESPACE - name: SELECTOR - value: postgres-operator.crunchydata.com/cluster=,postgres-operator.crunchydata.com/pgbackrest=,postgres-operator.crunchydata.com/pgbackrest-dedicated= + value: postgres-operator.crunchydata.com/cluster=,postgres-operator.crunchydata.com/instance,postgres-operator.crunchydata.com/role=master name: pgbackrest resources: {} securityContext: @@ -2610,23 +2696,11 @@ volumes: sources: - configMap: items: - - key: pgbackrest_repo.conf - path: pgbackrest_repo.conf + - key: pgbackrest_instance.conf + path: pgbackrest_instance.conf - key: config-hash path: config-hash - - key: pgbackrest-server.conf - path: ~postgres-operator_server.conf name: -pgbackrest-config - - secret: - items: - - key: pgbackrest.ca-roots - path: ~postgres-operator/tls-ca.crt - - key: pgbackrest-client.crt - path: ~postgres-operator/client-tls.crt - - key: pgbackrest-client.key - mode: 384 - path: ~postgres-operator/client-tls.key - name: -pgbackrest `)) }) @@ -2636,11 +2710,12 @@ volumes: ImagePullPolicy: corev1.PullAlways, }, } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Equal(t, job.Template.Spec.Containers[0].ImagePullPolicy, corev1.PullAlways) }) @@ -2651,11 +2726,12 @@ volumes: cluster.Spec.Backups = v1beta1.Backups{ PGBackRest: v1beta1.PGBackRestArchive{}, } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Containers[0].Resources, corev1.ResourceRequirements{}) }) @@ -2668,11 +2744,12 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Containers[0].Resources, corev1.ResourceRequirements{ Requests: corev1.ResourceList{ @@ -2707,11 +2784,12 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Equal(t, job.Template.Spec.Affinity, affinity) }) @@ -2720,11 +2798,12 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ PriorityClassName: initialize.String("some-priority-class"), } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Equal(t, job.Template.Spec.PriorityClassName, "some-priority-class") }) @@ -2738,11 +2817,12 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ Tolerations: tolerations, } - job := generateBackupJobSpecIntent(ctx, + job, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Tolerations, tolerations) }) @@ -2752,16 +2832,18 @@ volumes: t.Run("Undefined", func(t *testing.T) { cluster.Spec.Backups.PGBackRest.Jobs = nil - spec := generateBackupJobSpecIntent(ctx, + spec, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{} - spec = generateBackupJobSpecIntent(ctx, + spec, err = generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) }) @@ -2770,9 +2852,10 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(0), } - spec := generateBackupJobSpecIntent(ctx, + spec, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { assert.Equal(t, *spec.TTLSecondsAfterFinished, int32(0)) } @@ -2783,9 +2866,10 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(100), } - spec := generateBackupJobSpecIntent(ctx, + spec, err := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) + assert.NilError(t, err) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { assert.Equal(t, *spec.TTLSecondsAfterFinished, int32(100)) } diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index a2fedb5747..38d30926d9 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -32,6 +32,14 @@ const ( // (and therefore must be recreated) PGBackRestConfigHash = annotationPrefix + "pgbackrest-hash" + // PGBackRestCurrentConfig is an annotation used to indicate the name of the pgBackRest + // configuration associated with a specific Job as determined by either the current primary + // (if no dedicated repository host is enabled), or the dedicated repository host. This helps + // in detecting pgBackRest backup Jobs that no longer mount the proper pgBackRest + // configuration, e.g. because a failover has occurred, or because dedicated repo host has been + // enabled or disabled. + PGBackRestCurrentConfig = annotationPrefix + "pgbackrest-config" + // PGBackRestRestore is the annotation that is added to a PostgresCluster to initiate an in-place // restore. The value of the annotation will be a unique identifier for a restore Job (e.g. a // timestamp), which will be stored in the PostgresCluster status to properly track completion diff --git a/internal/naming/annotations_test.go b/internal/naming/annotations_test.go index f64004557f..593d000984 100644 --- a/internal/naming/annotations_test.go +++ b/internal/naming/annotations_test.go @@ -20,6 +20,7 @@ func TestAnnotationsValid(t *testing.T) { assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestBackup)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestBackupJobCompletion)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestConfigHash)) + assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestCurrentConfig)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestIPVersion)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestRestore)) assert.Assert(t, nil == validation.IsQualifiedName(PostgresExporterCollectorsAnnotation)) diff --git a/internal/naming/selectors.go b/internal/naming/selectors.go index a7b105de4b..c51f2d0262 100644 --- a/internal/naming/selectors.go +++ b/internal/naming/selectors.go @@ -152,6 +152,13 @@ func ClusterPostgresUsers(cluster string) metav1.LabelSelector { } } +// ClusterPrimary selects things for the Primary PostgreSQL instance. +func ClusterPrimary(cluster string) metav1.LabelSelector { + s := ClusterInstances(cluster) + s.MatchLabels[LabelRole] = RolePatroniLeader + return s +} + // CrunchyBridgeClusterPostgresRoles selects things labeled for CrunchyBridgeCluster // PostgreSQL roles in cluster. func CrunchyBridgeClusterPostgresRoles(clusterName string) metav1.LabelSelector { diff --git a/internal/naming/selectors_test.go b/internal/naming/selectors_test.go index a9d2ce987d..c8617bcb78 100644 --- a/internal/naming/selectors_test.go +++ b/internal/naming/selectors_test.go @@ -148,6 +148,16 @@ func TestClusterPostgresUsers(t *testing.T) { assert.ErrorContains(t, err, "Invalid") } +func TestClusterPrimary(t *testing.T) { + s, err := AsSelector(ClusterPrimary("something")) + assert.NilError(t, err) + assert.DeepEqual(t, s.String(), strings.Join([]string{ + "postgres-operator.crunchydata.com/cluster=something", + "postgres-operator.crunchydata.com/instance", + "postgres-operator.crunchydata.com/role=master", + }, ",")) +} + func TestCrunchyBridgeClusterPostgresRoles(t *testing.T) { s, err := AsSelector(CrunchyBridgeClusterPostgresRoles("something")) assert.NilError(t, err) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index c99e952afc..f4b66fad70 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -96,6 +96,7 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet // create an empty map for the config data initialize.Map(&cm.Data) + addDedicatedHost := RepoHostVolumeDefined(postgresCluster) pgdataDir := postgres.DataDirectory(postgresCluster) // Port will always be populated, since the API will set a default of 5432 if not provided pgPort := *postgresCluster.Spec.Port @@ -108,14 +109,13 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet postgresCluster.Spec.Backups.PGBackRest.Global, ).String() + // As the cluster transitions from having a repository host to having none, // PostgreSQL instances that have not rolled out expect to mount a server // config file. Always populate that file so those volumes stay valid and - // Kubernetes propagates their contents to those pods. The repo host name - // given below should always be set, but this guards for cases when it might - // not be. + // Kubernetes propagates their contents to those pods. cm.Data[serverConfigMapKey] = "" - if repoHostName != "" { + if addDedicatedHost && repoHostName != "" { cm.Data[serverConfigMapKey] = iniGeneratedWarning + serverConfig(postgresCluster).String() diff --git a/internal/pgbackrest/config.md b/internal/pgbackrest/config.md index dd1127643a..f19c9ac1e4 100644 --- a/internal/pgbackrest/config.md +++ b/internal/pgbackrest/config.md @@ -6,17 +6,17 @@ # pgBackRest Configuration Overview -The initial pgBackRest configuration for the Postgres Clusters is designed to stand up a +The initial pgBackRest configuration for the Postgres Clusters is designed to stand up a minimal configuration for use by the various pgBackRest functions needed by the Postgres cluster. These settings are meant to be the minimally required settings, with other settings supported through the use of custom configurations. -During initial cluster creation, four pgBackRest use cases are involved. +During initial cluster creation, four pgBackRest use cases are involved. -These settings are configured in either the [global] or [stanza] sections of the +These settings are configured in either the [global] or [stanza] sections of the pgBackRest configuration based on their designation in the pgBackRest code. For more information on the above, and other settings, please see -https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c + As shown, the settings with the `cfgSectionGlobal` designation are @@ -24,18 +24,17 @@ As shown, the settings with the `cfgSectionGlobal` designation are `log-level-file`: Level for file logging. Set to 'off' when the repo host has no volume. -`repo-path`: Path where backups and archive are stored. +`repo-path`: Path where backups and archive are stored. The repository is where pgBackRest stores backups and archives WAL segments. `repo-host`: Repository host when operating remotely via TLS. - The settings with the `cfgSectionStanza` designation are `pg-host`: PostgreSQL host for operating remotely via TLS. `pg-path`: The path of the PostgreSQL data directory. - This should be the same as the data_directory setting in postgresql.conf. + This should be the same as the data_directory setting in postgresql.conf. `pg-port`: The port that PostgreSQL is running on. @@ -44,14 +43,13 @@ The settings with the `cfgSectionStanza` designation are For more information on these and other configuration settings, please see `https://pgbackrest.org/configuration.html`. -# Configuration Per Function +## Configuration Per Function -Below, each of the four configuration sets is outlined by use case. Please note that certain -settings have acceptable defaults for the cluster's usage (such as for `repo1-type` which +Below, each of the four configuration sets is outlined by use case. Please note that certain +settings have acceptable defaults for the cluster's usage (such as for `repo1-type` which defaults to `posix`), so those settings are not included. - -1. Primary Database Pod +1. Primary Database Pod [global] log-path @@ -86,28 +84,26 @@ log-path [global] log-path - -# Initial pgBackRest Configuration +## Initial pgBackRest Configuration In order to be used by the Postgres cluster, these default configurations are stored in -a configmap. This configmap is named with the following convention `-pgbackrest-config`, +a configmap. This configmap is named with the following convention `-pgbackrest-config`, such that a cluster named 'mycluster' would have a configuration configmap named `mycluster-pgbackrest-config`. -As noted above, there are three distinct default configurations, each of which is referenced +As noted above, there are three distinct default configurations, each of which is referenced by a key value in the configmap's data section. For the primary database pod, the key is `pgbackrest_primary.conf`. For the pgBackRest repo pod, the key is `pgbackrest_repo.conf`. Finally, for the pgBackRest stanza job pod and the initial pgBackRest backup job pod, the key is `pgbackrest_job.conf`. - -For each pod, the relevant configuration file is mounted as a projected volume named + +For each pod, the relevant configuration file is mounted as a projected volume named `pgbackrest-config-vol`. The configuration file will be found in the `/etc/pgbackrest` directory -of the relevant container and is named `pgbackrest.conf`, matching the default pgBackRest location. -For more information, please see +of the relevant container and is named `pgbackrest.conf`, matching the default pgBackRest location. +For more information, please see `https://pgbackrest.org/configuration.html#introduction` - -# Custom Configuration Support +## Custom Configuration Support TODO(tjmoore4): Document custom configuration solution once implemented @@ -116,7 +112,7 @@ flag with the desired pgBackRest command. This should point to the directory pat where the `*.conf` file with the custom configuration is located. This file will be added as a projected volume and must be formatted in the standard -pgBackRest INI convention. Please note that any of the configuration settings listed +pgBackRest INI convention. Please note that any of the configuration settings listed above MUST BE CONFIGURED VIA THE POSTGRESCLUSTER SPEC so as to avoid errors. For more information, please see @@ -140,7 +136,7 @@ command-line or top-to-bottom in INI files. The remaining options must be set exactly once. `pgbackrest` exits non-zero when the option occurs twice on the command-line or twice in a file: -``` +```text ERROR: [031]: option 'io-timeout' cannot be set multiple times ``` diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index a314ad3102..cdbaa725a4 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -46,6 +46,54 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") }) + t.Run("NoVolumeRepoCloudRepoPresent", func(t *testing.T) { + cluster := cluster.DeepCopy() + cluster.Spec.Backups.PGBackRest.Global = map[string]string{ + "repo1-test": "something", + } + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + GCS: &v1beta1.RepoGCS{Bucket: "g-bucket"}, + }, + } + + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "", "anumber", "pod-service-name", "test-ns", + []string{"some-instance"}) + + assert.NilError(t, err) + assert.DeepEqual(t, configmap.Annotations, map[string]string{}) + assert.DeepEqual(t, configmap.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) + + assert.Equal(t, configmap.Data["config-hash"], "anumber") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + }) + t.Run("DedicatedRepoHost", func(t *testing.T) { cluster := cluster.DeepCopy() cluster.Spec.Backups.PGBackRest.Global = map[string]string{ diff --git a/internal/pgbackrest/reconcile.go b/internal/pgbackrest/reconcile.go index 4e789d137e..907012ac1a 100644 --- a/internal/pgbackrest/reconcile.go +++ b/internal/pgbackrest/reconcile.go @@ -105,15 +105,22 @@ func AddConfigToInstancePod( {Key: ConfigHashKey, Path: ConfigHashKey}, } + // As the cluster transitions from having a repository host to having none, + // PostgreSQL instances that have not rolled out expect to mount client + // certificates. Specify those files are optional so the configuration + // volumes stay valid and Kubernetes propagates their contents to those pods. secret := corev1.VolumeProjection{Secret: &corev1.SecretProjection{}} secret.Secret.Name = naming.PGBackRestSecret(cluster).Name + secret.Secret.Optional = initialize.Bool(true) - configmap.ConfigMap.Items = append( - configmap.ConfigMap.Items, corev1.KeyToPath{ - Key: serverConfigMapKey, - Path: serverConfigProjectionPath, - }) - secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) + if RepoHostVolumeDefined(cluster) { + configmap.ConfigMap.Items = append( + configmap.ConfigMap.Items, corev1.KeyToPath{ + Key: serverConfigMapKey, + Path: serverConfigProjectionPath, + }) + secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) + } // Start with a copy of projections specified in the cluster. Items later in // the list take precedence over earlier items (that is, last write wins). @@ -406,13 +413,15 @@ func InstanceCertificates(ctx context.Context, ) error { var err error - initialize.Map(&outInstanceCertificates.Data) + if RepoHostVolumeDefined(inCluster) { + initialize.Map(&outInstanceCertificates.Data) - if err == nil { - outInstanceCertificates.Data[certInstanceSecretKey], err = certFile(inDNS) - } - if err == nil { - outInstanceCertificates.Data[certInstancePrivateKeySecretKey], err = certFile(inDNSKey) + if err == nil { + outInstanceCertificates.Data[certInstanceSecretKey], err = certFile(inDNS) + } + if err == nil { + outInstanceCertificates.Data[certInstancePrivateKeySecretKey], err = certFile(inDNSKey) + } } return err diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index 6104a4e2a2..e60d48843f 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -231,19 +231,7 @@ func TestAddConfigToInstancePod(t *testing.T) { path: pgbackrest_instance.conf - key: config-hash path: config-hash - - key: pgbackrest-server.conf - path: ~postgres-operator_server.conf name: hippo-pgbackrest-config - - secret: - items: - - key: pgbackrest.ca-roots - path: ~postgres-operator/tls-ca.crt - - key: pgbackrest-client.crt - path: ~postgres-operator/client-tls.crt - - key: pgbackrest-client.key - mode: 384 - path: ~postgres-operator/client-tls.key - name: hippo-pgbackrest `)) }) @@ -266,19 +254,7 @@ func TestAddConfigToInstancePod(t *testing.T) { path: pgbackrest_instance.conf - key: config-hash path: config-hash - - key: pgbackrest-server.conf - path: ~postgres-operator_server.conf name: hippo-pgbackrest-config - - secret: - items: - - key: pgbackrest.ca-roots - path: ~postgres-operator/tls-ca.crt - - key: pgbackrest-client.crt - path: ~postgres-operator/client-tls.crt - - key: pgbackrest-client.key - mode: 384 - path: ~postgres-operator/client-tls.key - name: hippo-pgbackrest `)) }) @@ -319,6 +295,7 @@ func TestAddConfigToInstancePod(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) }) } diff --git a/internal/pgbackrest/tls-server.md b/internal/pgbackrest/tls-server.md index 7c8f191c35..56af386d5b 100644 --- a/internal/pgbackrest/tls-server.md +++ b/internal/pgbackrest/tls-server.md @@ -12,10 +12,8 @@ on different pods: - [dedicated repository host](https://pgbackrest.org/user-guide.html#repo-host) - [backup from standby](https://pgbackrest.org/user-guide.html#standby-backup) -When a PostgresCluster is configured to store backups on a PVC, the dedicated -repository host is used to make that PVC available to all PostgreSQL instances -in the cluster. Regardless of whether the repo host has a defined PVC, it -functions as the server for the pgBackRest clients that run on the Instances. +When a PostgresCluster is configured to store backups on a PVC, we start a dedicated +repository host to make that PVC available to all PostgreSQL instances in the cluster. The repository host runs a `pgbackrest` server that is secured through TLS and [certificates][]. When performing backups, it connects to `pgbackrest` servers @@ -26,32 +24,30 @@ to the repository host to [send and receive WAL files][archiving]. [archiving]: https://www.postgresql.org/docs/current/continuous-archiving.html [certificates]: certificates.md - The `pgbackrest` command acts as a TLS client and connects to a pgBackRest TLS server when `pg-host-type=tls` and/or `repo-host-type=tls`. The default for these is `ssh`: -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L3771 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L6137 - +- +- The pgBackRest TLS server is configured through the `tls-server-*` [options](config.md). In pgBackRest 2.38, changing any of these options or changing certificate contents requires a reload of the server, as shown in the "Setup TLS Server" section of the documentation, with the command configured as -``` +```text ExecReload=kill -HUP $MAINPID ``` -- https://pgbackrest.org/user-guide-rhel.html#repo-host/setup-tls +- - `tls-server-address`, `tls-server-port`
The network address and port on which to listen. pgBackRest 2.38 listens on the *first* address returned by `getaddrinfo()`. There is no way to listen on all interfaces. - - https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/io/socket/server.c#L172 - - https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/io/socket/common.c#L87 + - + - - `tls-server-cert-file`, `tls-server-key-file`
The [certificate chain][certificates] and private key pair used to encrypt connections. @@ -65,12 +61,11 @@ ExecReload=kill -HUP $MAINPID to interact with. [Required](https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L8751). - In pgBackRest 2.38, as mentioned above, sending SIGHUP causes a configuration reload. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/command/server/server.c#L178 +- -``` +```text P00 DETAIL: configuration reload begin P00 INFO: server command begin 2.38... P00 DETAIL: configuration reload end @@ -78,20 +73,18 @@ P00 DETAIL: configuration reload end Sending SIGINT to the TLS server causes it to exit with code 63, TermError. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/exit.c#L73-L75 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/exit.c#L62 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/error.auto.c#L48 +- +- +- - -``` +```text P00 INFO: server command end: terminated on signal [SIGINT] ``` Sending SIGTERM exits the signal loop and lead to the command termination. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/command/server/server.c#L194 +- - -``` +```text P00 INFO: server command end: completed successfully ``` From eebb90fb343d887574e78c0a24c2418f33e12bd7 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 7 Jul 2025 10:18:49 -0700 Subject: [PATCH 46/79] Fix noctx linter error: change http.NewRequest to http.NewRequestWithContext. --- internal/upgradecheck/http.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/upgradecheck/http.go b/internal/upgradecheck/http.go index fe8585d42d..c2796ffe54 100644 --- a/internal/upgradecheck/http.go +++ b/internal/upgradecheck/http.go @@ -70,7 +70,7 @@ func checkForUpgrades(ctx context.Context, url, versionString string, backoff wa var headerPayloadStruct *clientUpgradeData // Prep request - req, err := http.NewRequest("GET", url, nil) + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err == nil { // generateHeader always returns some sort of struct, using defaults/nil values // in case some of the checks return errors From 828dd97314477d4430a65613fb9e63d3287fe3b6 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 9 Jul 2025 13:38:26 -0700 Subject: [PATCH 47/79] Run backup command for cloud-based repos in the backup job. Add/adjust tests for cloud repo backup job changes. --- .../controller/postgrescluster/instance.go | 12 +- .../postgrescluster/instance_test.go | 79 ++----- .../controller/postgrescluster/pgbackrest.go | 111 ++++----- .../postgrescluster/pgbackrest_test.go | 174 ++++++++------ internal/pgbackrest/config.go | 87 ++++++- internal/pgbackrest/config_test.go | 223 +++++++++++++++++- internal/pgbackrest/reconcile.go | 114 +++++---- internal/pgbackrest/reconcile_test.go | 113 ++++++++- internal/pgbackrest/util.go | 54 ++++- 9 files changed, 697 insertions(+), 270 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 2d50bdd843..726c786720 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1172,7 +1172,7 @@ func (r *Reconciler) reconcileInstance( } if err == nil { instanceCertificates, err = r.reconcileInstanceCertificates( - ctx, cluster, spec, instance, rootCA) + ctx, cluster, spec, instance, rootCA, backupsSpecFound) } if err == nil { postgresDataVolume, err = r.reconcilePostgresDataVolume(ctx, cluster, spec, instance, clusterVolumes, nil) @@ -1397,10 +1397,8 @@ func addPGBackRestToInstancePodSpec( ctx context.Context, cluster *v1beta1.PostgresCluster, instanceCertificates *corev1.Secret, instancePod *corev1.PodSpec, ) { - if pgbackrest.RepoHostVolumeDefined(cluster) { - pgbackrest.AddServerToInstancePod(ctx, cluster, instancePod, - instanceCertificates.Name) - } + pgbackrest.AddServerToInstancePod(ctx, cluster, instancePod, + instanceCertificates.Name) pgbackrest.AddConfigToInstancePod(cluster, instancePod) } @@ -1469,7 +1467,7 @@ func (r *Reconciler) reconcileInstanceConfigMap( func (r *Reconciler) reconcileInstanceCertificates( ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, instance *appsv1.StatefulSet, - root *pki.RootCertificateAuthority, + root *pki.RootCertificateAuthority, backupsSpecFound bool, ) (*corev1.Secret, error) { existing := &corev1.Secret{ObjectMeta: naming.InstanceCertificates(instance)} err := errors.WithStack(client.IgnoreNotFound( @@ -1512,7 +1510,7 @@ func (r *Reconciler) reconcileInstanceCertificates( root.Certificate, leafCert.Certificate, leafCert.PrivateKey, instanceCerts) } - if err == nil { + if err == nil && backupsSpecFound { err = pgbackrest.InstanceCertificates(ctx, cluster, root.Certificate, leafCert.Certificate, leafCert.PrivateKey, instanceCerts) diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index f31b38624c..bbabffdafb 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -544,49 +544,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { }, } - t.Run("NoVolumeRepo", func(t *testing.T) { - cluster := cluster.DeepCopy() - cluster.Spec.Backups.PGBackRest.Repos = nil - - out := pod.DeepCopy() - addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) - - // Only Containers and Volumes fields have changed. - assert.DeepEqual(t, pod, *out, cmpopts.IgnoreFields(pod, "Containers", "Volumes")) - - // Only database container has mounts. - // Other containers are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Containers, ` -- name: database - resources: {} - volumeMounts: - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true -- name: other - resources: {} - `)) - - // Instance configuration files but no certificates. - // Other volumes are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Volumes, ` -- name: other -- name: postgres-data -- name: postgres-wal -- name: pgbackrest-config - projected: - sources: - - configMap: - items: - - key: pgbackrest_instance.conf - path: pgbackrest_instance.conf - - key: config-hash - path: config-hash - name: hippo-pgbackrest-config - `)) - }) - - t.Run("OneVolumeRepo", func(t *testing.T) { + t.Run("CloudOrVolumeSameBehavior", func(t *testing.T) { alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { // Only Containers and Volumes fields have changed. assert.DeepEqual(t, pod, *result, cmpopts.IgnoreFields(pod, "Containers", "Volumes")) @@ -635,21 +593,31 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { `)) } - cluster := cluster.DeepCopy() - cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + clusterWithVolume := cluster.DeepCopy() + clusterWithVolume.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ { Name: "repo1", Volume: new(v1beta1.RepoPVC), }, } - out := pod.DeepCopy() - addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) - alwaysExpect(t, out) + clusterWithCloudRepo := cluster.DeepCopy() + clusterWithCloudRepo.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + GCS: new(v1beta1.RepoGCS), + }, + } + + outWithVolume := pod.DeepCopy() + addPGBackRestToInstancePodSpec(ctx, clusterWithVolume, &certificates, outWithVolume) + alwaysExpect(t, outWithVolume) - // The TLS server is added and configuration mounted. - // It has PostgreSQL volumes mounted while other volumes are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Containers, ` + outWithCloudRepo := pod.DeepCopy() + addPGBackRestToInstancePodSpec(ctx, clusterWithCloudRepo, &certificates, outWithCloudRepo) + alwaysExpect(t, outWithCloudRepo) + + outContainers := ` - name: database resources: {} volumeMounts: @@ -737,7 +705,12 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { - mountPath: /etc/pgbackrest/conf.d name: pgbackrest-config readOnly: true - `)) + ` + + // The TLS server is added and configuration mounted. + // It has PostgreSQL volumes mounted while other volumes are ignored. + assert.Assert(t, cmp.MarshalMatches(outWithVolume.Containers, outContainers)) + assert.Assert(t, cmp.MarshalMatches(outWithCloudRepo.Containers, outContainers)) t.Run("CustomResources", func(t *testing.T) { cluster := cluster.DeepCopy() @@ -754,7 +727,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { }, } - before := out.DeepCopy() + before := outWithVolume.DeepCopy() out := pod.DeepCopy() addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) alwaysExpect(t, out) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index f235d8e828..a4269c5234 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -23,7 +23,6 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -780,12 +779,7 @@ func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresC // generateBackupJobSpecIntent generates a JobSpec for a pgBackRest backup job func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repo v1beta1.PGBackRestRepo, serviceAccountName string, - labels, annotations map[string]string, opts ...string) (*batchv1.JobSpec, error) { - - selector, containerName, err := getPGBackRestExecSelector(postgresCluster, repo) - if err != nil { - return nil, errors.WithStack(err) - } + labels, annotations map[string]string, opts ...string) *batchv1.JobSpec { repoIndex := regexRepoIndex.FindString(repo.Name) cmdOpts := []string{ @@ -800,21 +794,31 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P cmdOpts = append(cmdOpts, opts...) container := corev1.Container{ - Command: []string{"/opt/crunchy/bin/pgbackrest"}, - Env: []corev1.EnvVar{ - {Name: "COMMAND", Value: "backup"}, - {Name: "COMMAND_OPTS", Value: strings.Join(cmdOpts, " ")}, - {Name: "COMPARE_HASH", Value: "true"}, - {Name: "CONTAINER", Value: containerName}, - {Name: "NAMESPACE", Value: postgresCluster.GetNamespace()}, - {Name: "SELECTOR", Value: selector.String()}, - }, Image: config.PGBackRestContainerImage(postgresCluster), ImagePullPolicy: postgresCluster.Spec.ImagePullPolicy, Name: naming.PGBackRestRepoContainerName, SecurityContext: initialize.RestrictedSecurityContext(), } + // If the repo that we are backing up to is a local volume, we will configure + // the job to use the pgbackrest go binary to exec into the repo host and run + // the backup. If the repo is a cloud-based repo, we will run the pgbackrest + // backup command directly in the job pod. + if repo.Volume != nil { + container.Command = []string{"/opt/crunchy/bin/pgbackrest"} + container.Env = []corev1.EnvVar{ + {Name: "COMMAND", Value: "backup"}, + {Name: "COMMAND_OPTS", Value: strings.Join(cmdOpts, " ")}, + {Name: "COMPARE_HASH", Value: "true"}, + {Name: "CONTAINER", Value: naming.PGBackRestRepoContainerName}, + {Name: "NAMESPACE", Value: postgresCluster.GetNamespace()}, + {Name: "SELECTOR", Value: naming.PGBackRestDedicatedSelector(postgresCluster.GetName()).String()}, + } + } else { + container.Command = []string{"/bin/pgbackrest", "backup"} + container.Command = append(container.Command, cmdOpts...) + } + if postgresCluster.Spec.Backups.PGBackRest.Jobs != nil { container.Resources = postgresCluster.Spec.Backups.PGBackRest.Jobs.Resources } @@ -868,13 +872,16 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P jobSpec.Template.Spec.ImagePullSecrets = postgresCluster.Spec.ImagePullSecrets // add pgBackRest configs to template - if containerName == naming.PGBackRestRepoContainerName { + if repo.Volume != nil { pgbackrest.AddConfigToRepoPod(postgresCluster, &jobSpec.Template.Spec) } else { - pgbackrest.AddConfigToInstancePod(postgresCluster, &jobSpec.Template.Spec) + // If we are doing a cloud repo backup, we need to give pgbackrest proper permissions + // to read certificate files + jobSpec.Template.Spec.SecurityContext = postgres.PodSecurityContext(postgresCluster) + pgbackrest.AddConfigToCloudBackupJob(postgresCluster, &jobSpec.Template) } - return jobSpec, nil + return jobSpec } // +kubebuilder:rbac:groups="",resources="configmaps",verbs={delete,list} @@ -2033,14 +2040,12 @@ func (r *Reconciler) copyConfigurationResources(ctx context.Context, cluster, return nil } -// reconcilePGBackRestConfig is responsible for reconciling the pgBackRest ConfigMaps and Secrets. +// reconcilePGBackRestConfig is responsible for reconciling the pgBackRest ConfigMaps. func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) error { - log := logging.FromContext(ctx).WithValues("reconcileResource", "repoConfig") - backrestConfig, err := pgbackrest.CreatePGBackRestConfigMapIntent(ctx, postgresCluster, repoHostName, configHash, serviceName, serviceNamespace, instanceNames) if err != nil { @@ -2054,12 +2059,6 @@ func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, return errors.WithStack(err) } - repoHostConfigured := pgbackrest.RepoHostVolumeDefined(postgresCluster) - if !repoHostConfigured { - log.V(1).Info("skipping SSH reconciliation, no repo hosts configured") - return nil - } - return nil } @@ -2461,11 +2460,8 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec, err := generateBackupJobSpecIntent(ctx, postgresCluster, repo, + spec := generateBackupJobSpecIntent(ctx, postgresCluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) - if err != nil { - return errors.WithStack(err) - } backupJob.Spec = *spec @@ -2553,11 +2549,15 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, replicaRepoReady = (condition.Status == metav1.ConditionTrue) } - // get pod name and container name as needed to exec into the proper pod and create - // the pgBackRest backup - _, containerName, err := getPGBackRestExecSelector(postgresCluster, replicaCreateRepo) - if err != nil { - return errors.WithStack(err) + // TODO: Since we now only exec into the repo host when backing up to a local volume and + // run the backup in the job pod when backing up to a cloud-based repo, we should consider + // using a different value than the container name for the "pgbackrest-config" annotation + // that we attach to these backups + var containerName string + if replicaCreateRepo.Volume != nil { + containerName = naming.PGBackRestRepoContainerName + } else { + containerName = naming.ContainerDatabase } // determine if the dedicated repository host is ready using the repo host ready status @@ -2609,10 +2609,10 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, } } - dedicatedEnabled := pgbackrest.RepoHostVolumeDefined(postgresCluster) // return if no job has been created and the replica repo or the dedicated // repo host is not ready - if job == nil && ((dedicatedEnabled && !dedicatedRepoReady) || !replicaRepoReady) { + if job == nil && ((pgbackrest.RepoHostVolumeDefined(postgresCluster) && !dedicatedRepoReady) || + !replicaRepoReady) { return nil } @@ -2637,11 +2637,8 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec, err := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, + spec := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, serviceAccount.GetName(), labels, annotations) - if err != nil { - return errors.WithStack(err) - } backupJob.Spec = *spec @@ -2823,27 +2820,6 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, return false, nil } -// getPGBackRestExecSelector returns a selector and container name that allows the proper -// Pod (along with a specific container within it) to be found within the Kubernetes -// cluster as needed to exec into the container and run a pgBackRest command. -func getPGBackRestExecSelector(postgresCluster *v1beta1.PostgresCluster, - repo v1beta1.PGBackRestRepo) (labels.Selector, string, error) { - - var err error - var podSelector labels.Selector - var containerName string - - if repo.Volume != nil { - podSelector = naming.PGBackRestDedicatedSelector(postgresCluster.GetName()) - containerName = naming.PGBackRestRepoContainerName - } else { - podSelector, err = naming.AsSelector(naming.ClusterPrimary(postgresCluster.GetName())) - containerName = naming.ContainerDatabase - } - - return podSelector, containerName, err -} - // getRepoHostStatus is responsible for returning the pgBackRest status for the // provided pgBackRest repository host func getRepoHostStatus(repoHost *appsv1.StatefulSet) *v1beta1.RepoHostStatus { @@ -3088,11 +3064,8 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set backup type (i.e. "full", "diff", "incr") backupOpts := []string{"--type=" + backupType} - jobSpec, err := generateBackupJobSpecIntent(ctx, cluster, repo, + jobSpec := generateBackupJobSpecIntent(ctx, cluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) - if err != nil { - return errors.WithStack(err) - } // Suspend cronjobs when shutdown or read-only. Any jobs that have already // started will continue. @@ -3125,7 +3098,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set metadata pgBackRestCronJob.SetGroupVersionKind(batchv1.SchemeGroupVersion.WithKind("CronJob")) - err = errors.WithStack(r.setControllerReference(cluster, pgBackRestCronJob)) + err := errors.WithStack(r.setControllerReference(cluster, pgBackRestCronJob)) if err == nil { err = r.apply(ctx, pgBackRestCronJob) diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index a4f2e87a93..8a2e6e3cd5 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -886,52 +886,6 @@ func TestReconcileStanzaCreate(t *testing.T) { } } -func TestGetPGBackRestExecSelector(t *testing.T) { - - testCases := []struct { - cluster *v1beta1.PostgresCluster - repo v1beta1.PGBackRestRepo - desc string - expectedSelector string - expectedContainer string - }{{ - desc: "volume repo defined dedicated repo host enabled", - cluster: &v1beta1.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{Name: "hippo"}, - }, - repo: v1beta1.PGBackRestRepo{ - Name: "repo1", - Volume: &v1beta1.RepoPVC{}, - }, - expectedSelector: "postgres-operator.crunchydata.com/cluster=hippo," + - "postgres-operator.crunchydata.com/pgbackrest=," + - "postgres-operator.crunchydata.com/pgbackrest-dedicated=", - expectedContainer: "pgbackrest", - }, { - desc: "cloud repo defined no repo host enabled", - cluster: &v1beta1.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{Name: "hippo"}, - }, - repo: v1beta1.PGBackRestRepo{ - Name: "repo1", - S3: &v1beta1.RepoS3{}, - }, - expectedSelector: "postgres-operator.crunchydata.com/cluster=hippo," + - "postgres-operator.crunchydata.com/instance," + - "postgres-operator.crunchydata.com/role=master", - expectedContainer: "database", - }} - - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - selector, container, err := getPGBackRestExecSelector(tc.cluster, tc.repo) - assert.NilError(t, err) - assert.Assert(t, selector.String() == tc.expectedSelector) - assert.Assert(t, container == tc.expectedContainer) - }) - } -} - func TestReconcileReplicaCreateBackup(t *testing.T) { // Garbage collector cleans up test resources before the test completes if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { @@ -2647,13 +2601,83 @@ func TestCopyConfigurationResources(t *testing.T) { func TestGenerateBackupJobIntent(t *testing.T) { ctx := context.Background() + cluster := v1beta1.PostgresCluster{} + cluster.Name = "hippo-test" + cluster.Default() + + // If repo.Volume is nil, the code interprets this as a cloud repo backup, + // therefore, an "empty" input results in a job spec for a cloud repo backup t.Run("empty", func(t *testing.T) { - spec, err := generateBackupJobSpecIntent(ctx, - &v1beta1.PostgresCluster{}, v1beta1.PGBackRestRepo{}, + spec := generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp + `)) + }) + + t.Run("volumeRepo", func(t *testing.T) { + spec := generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{ + Volume: &v1beta1.RepoPVC{ + VolumeClaimSpec: v1beta1.VolumeClaimSpec{}, + }, + }, "", nil, nil, ) - assert.NilError(t, err) assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` containers: - command: @@ -2666,10 +2690,10 @@ containers: - name: COMPARE_HASH value: "true" - name: CONTAINER - value: database + value: pgbackrest - name: NAMESPACE - name: SELECTOR - value: postgres-operator.crunchydata.com/cluster=,postgres-operator.crunchydata.com/instance,postgres-operator.crunchydata.com/role=master + value: postgres-operator.crunchydata.com/cluster=hippo-test,postgres-operator.crunchydata.com/pgbackrest=,postgres-operator.crunchydata.com/pgbackrest-dedicated= name: pgbackrest resources: {} securityContext: @@ -2696,11 +2720,23 @@ volumes: sources: - configMap: items: - - key: pgbackrest_instance.conf - path: pgbackrest_instance.conf + - key: pgbackrest_repo.conf + path: pgbackrest_repo.conf - key: config-hash path: config-hash - name: -pgbackrest-config + - key: pgbackrest-server.conf + path: ~postgres-operator_server.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest `)) }) @@ -2710,12 +2746,11 @@ volumes: ImagePullPolicy: corev1.PullAlways, }, } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.Equal(t, job.Template.Spec.Containers[0].ImagePullPolicy, corev1.PullAlways) }) @@ -2726,12 +2761,11 @@ volumes: cluster.Spec.Backups = v1beta1.Backups{ PGBackRest: v1beta1.PGBackRestArchive{}, } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Containers[0].Resources, corev1.ResourceRequirements{}) }) @@ -2744,12 +2778,11 @@ volumes: }, }, } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Containers[0].Resources, corev1.ResourceRequirements{ Requests: corev1.ResourceList{ @@ -2784,12 +2817,11 @@ volumes: }, }, } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.Equal(t, job.Template.Spec.Affinity, affinity) }) @@ -2798,12 +2830,11 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ PriorityClassName: initialize.String("some-priority-class"), } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.Equal(t, job.Template.Spec.PriorityClassName, "some-priority-class") }) @@ -2817,12 +2848,11 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ Tolerations: tolerations, } - job, err := generateBackupJobSpecIntent(ctx, + job := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.DeepEqual(t, job.Template.Spec.Tolerations, tolerations) }) @@ -2832,18 +2862,16 @@ volumes: t.Run("Undefined", func(t *testing.T) { cluster.Spec.Backups.PGBackRest.Jobs = nil - spec, err := generateBackupJobSpecIntent(ctx, + spec := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{} - spec, err = generateBackupJobSpecIntent(ctx, + spec = generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) }) @@ -2852,10 +2880,9 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(0), } - spec, err := generateBackupJobSpecIntent(ctx, + spec := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { assert.Equal(t, *spec.TTLSecondsAfterFinished, int32(0)) } @@ -2866,10 +2893,9 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(100), } - spec, err := generateBackupJobSpecIntent(ctx, + spec := generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) - assert.NilError(t, err) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { assert.Equal(t, *spec.TTLSecondsAfterFinished, int32(100)) } diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index f4b66fad70..17749277df 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -38,6 +38,10 @@ const ( // repository host CMRepoKey = "pgbackrest_repo.conf" + // CMCloudRepoKey is the name of the pgBackRest configuration file used by backup jobs + // for cloud repos + CMCloudRepoKey = "pgbackrest_cloud.conf" + // configDirectory is the pgBackRest configuration directory. configDirectory = "/etc/pgbackrest/conf.d" @@ -69,6 +73,7 @@ const ( // pgbackrest_job.conf is used by certain jobs, such as stanza create and backup // pgbackrest_primary.conf is used by the primary database pod // pgbackrest_repo.conf is used by the pgBackRest repository pod +// pgbackrest_cloud.conf is used by cloud repo backup jobs func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) (*corev1.ConfigMap, error) { @@ -96,7 +101,6 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet // create an empty map for the config data initialize.Map(&cm.Data) - addDedicatedHost := RepoHostVolumeDefined(postgresCluster) pgdataDir := postgres.DataDirectory(postgresCluster) // Port will always be populated, since the API will set a default of 5432 if not provided pgPort := *postgresCluster.Spec.Port @@ -113,12 +117,10 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet // PostgreSQL instances that have not rolled out expect to mount a server // config file. Always populate that file so those volumes stay valid and // Kubernetes propagates their contents to those pods. - cm.Data[serverConfigMapKey] = "" - - if addDedicatedHost && repoHostName != "" { - cm.Data[serverConfigMapKey] = iniGeneratedWarning + - serverConfig(postgresCluster).String() + cm.Data[serverConfigMapKey] = iniGeneratedWarning + + serverConfig(postgresCluster).String() + if RepoHostVolumeDefined(postgresCluster) && repoHostName != "" { cm.Data[CMRepoKey] = iniGeneratedWarning + populateRepoHostConfigurationMap( serviceName, serviceNamespace, @@ -129,8 +131,7 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet postgresCluster.Spec.Backups.PGBackRest.Global, ).String() - if RepoHostVolumeDefined(postgresCluster) && - collector.OpenTelemetryLogsOrMetricsEnabled(ctx, postgresCluster) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, postgresCluster) { err = collector.AddToConfigMap(ctx, collector.NewConfigForPgBackrestRepoHostPod( ctx, @@ -156,6 +157,18 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet } } + if CloudRepoDefined(postgresCluster) { + cm.Data[CMCloudRepoKey] = iniGeneratedWarning + + populateCloudRepoConfigurationMap( + serviceName, serviceNamespace, pgdataDir, + config.FetchKeyCommand(&postgresCluster.Spec), + strconv.Itoa(postgresCluster.Spec.PostgresVersion), + pgPort, instanceNames, + postgresCluster.Spec.Backups.PGBackRest.Repos, + postgresCluster.Spec.Backups.PGBackRest.Global, + ).String() + } + cm.Data[ConfigHashKey] = configHash return cm, err @@ -504,6 +517,64 @@ func populateRepoHostConfigurationMap( } } +func populateCloudRepoConfigurationMap( + serviceName, serviceNamespace, pgdataDir, + fetchKeyCommand, postgresVersion string, + pgPort int32, pgHosts []string, repos []v1beta1.PGBackRestRepo, + globalConfig map[string]string, +) iniSectionSet { + + global := iniMultiSet{} + stanza := iniMultiSet{} + + for _, repo := range repos { + if repo.Volume != nil { + continue + } + + global.Set(repo.Name+"-path", defaultRepo1Path+repo.Name) + + for option, val := range getExternalRepoConfigs(repo) { + global.Set(option, val) + } + } + + global.Set("log-level-file", "off") + + for option, val := range globalConfig { + global.Set(option, val) + } + + // set the configs for all PG hosts + for i, pgHost := range pgHosts { + // TODO(cbandy): pass a FQDN in already. + pgHostFQDN := pgHost + "-0." + + serviceName + "." + serviceNamespace + ".svc." + + naming.KubernetesClusterDomain(context.Background()) + + stanza.Set(fmt.Sprintf("pg%d-host", i+1), pgHostFQDN) + stanza.Set(fmt.Sprintf("pg%d-host-type", i+1), "tls") + stanza.Set(fmt.Sprintf("pg%d-host-ca-file", i+1), certAuthorityAbsolutePath) + stanza.Set(fmt.Sprintf("pg%d-host-cert-file", i+1), certClientAbsolutePath) + stanza.Set(fmt.Sprintf("pg%d-host-key-file", i+1), certClientPrivateKeyAbsolutePath) + + stanza.Set(fmt.Sprintf("pg%d-path", i+1), pgdataDir) + stanza.Set(fmt.Sprintf("pg%d-port", i+1), fmt.Sprint(pgPort)) + stanza.Set(fmt.Sprintf("pg%d-socket-path", i+1), postgres.SocketDirectory) + + if fetchKeyCommand != "" { + stanza.Set("archive-header-check", "n") + stanza.Set("page-header-check", "n") + stanza.Set("pg-version-force", postgresVersion) + } + } + + return iniSectionSet{ + "global": global, + DefaultStanzaName: stanza, + } +} + // getExternalRepoConfigs returns a map containing the configuration settings for an external // pgBackRest repository as defined in the PostgresCluster spec func getExternalRepoConfigs(repo v1beta1.PGBackRestRepo) map[string]string { diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index cdbaa725a4..b56beaa8ca 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -33,9 +33,11 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { domain := naming.KubernetesClusterDomain(context.Background()) - t.Run("NoVolumeRepo", func(t *testing.T) { + t.Run("NoRepos", func(t *testing.T) { + // We always create the config for the pgbackrest instance and server cluster := cluster.DeepCopy() cluster.Spec.Backups.PGBackRest.Repos = nil + cluster.UID = "piano" configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, "", "number", "pod-service-name", "test-ns", @@ -43,11 +45,46 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { assert.NilError(t, err) assert.Equal(t, configmap.Data["config-hash"], "number") - assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@piano=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], "") }) - t.Run("NoVolumeRepoCloudRepoPresent", func(t *testing.T) { + t.Run("CloudRepoPresentNoVolumeRepo", func(t *testing.T) { cluster := cluster.DeepCopy() + cluster.UID = "ukulele" cluster.Spec.Backups.PGBackRest.Global = map[string]string{ "repo1-test": "something", } @@ -71,8 +108,23 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { }) assert.Equal(t, configmap.Data["config-hash"], "anumber") - assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") - assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@ukulele=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. @@ -92,10 +144,120 @@ pg1-path = /pgdata/pg12 pg1-port = 2345 pg1-socket-path = /tmp/postgres `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-level-file = off +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + }) + + t.Run("VolumeRepoPresentNoCloudRepo", func(t *testing.T) { + cluster := cluster.DeepCopy() + cluster.UID = "guitar" + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + Volume: &v1beta1.RepoPVC{}, + }, + } + + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "repo-hostname", "anumber", "pod-service-name", "test-ns", + []string{"some-instance"}) + + assert.NilError(t, err) + assert.DeepEqual(t, configmap.Annotations, map[string]string{}) + assert.DeepEqual(t, configmap.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) + + assert.Equal(t, configmap.Data["config-hash"], "anumber") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@guitar=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +repo1-host = repo-hostname-0.pod-service-name.test-ns.svc.`+domain+` +repo1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +repo1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +repo1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +repo1-host-type = tls +repo1-host-user = postgres +repo1-path = /pgbackrest/repo1 +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-path = /pgbackrest/repo1/log +repo1-path = /pgbackrest/repo1 + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], "") }) - t.Run("DedicatedRepoHost", func(t *testing.T) { + t.Run("DedicatedRepoHostAndCloudRepos", func(t *testing.T) { cluster := cluster.DeepCopy() + cluster.UID = "bass" cluster.Spec.Backups.PGBackRest.Global = map[string]string{ "repo3-test": "something", } @@ -133,6 +295,25 @@ pg1-socket-path = /tmp/postgres }) assert.Equal(t, configmap.Data["config-hash"], "abcde12345") + + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@bass=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -195,6 +376,36 @@ spool-path = /pgdata/pgbackrest-spool [db] pg1-path = /pgdata/pg12 pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-level-file = off +repo2-azure-container = a-container +repo2-path = /pgbackrest/repo2 +repo2-type = azure +repo3-gcs-bucket = g-bucket +repo3-path = /pgbackrest/repo3 +repo3-test = something +repo3-type = gcs +repo4-path = /pgbackrest/repo4 +repo4-s3-bucket = s-bucket +repo4-s3-endpoint = endpoint-s +repo4-s3-region = earth +repo4-type = s3 + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 pg1-socket-path = /tmp/postgres `, "\t\n")+"\n") }) diff --git a/internal/pgbackrest/reconcile.go b/internal/pgbackrest/reconcile.go index 907012ac1a..426e1312f6 100644 --- a/internal/pgbackrest/reconcile.go +++ b/internal/pgbackrest/reconcile.go @@ -103,6 +103,7 @@ func AddConfigToInstancePod( configmap.ConfigMap.Items = []corev1.KeyToPath{ {Key: CMInstanceKey, Path: CMInstanceKey}, {Key: ConfigHashKey, Path: ConfigHashKey}, + {Key: serverConfigMapKey, Path: serverConfigProjectionPath}, } // As the cluster transitions from having a repository host to having none, @@ -111,17 +112,9 @@ func AddConfigToInstancePod( // volumes stay valid and Kubernetes propagates their contents to those pods. secret := corev1.VolumeProjection{Secret: &corev1.SecretProjection{}} secret.Secret.Name = naming.PGBackRestSecret(cluster).Name + secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) secret.Secret.Optional = initialize.Bool(true) - if RepoHostVolumeDefined(cluster) { - configmap.ConfigMap.Items = append( - configmap.ConfigMap.Items, corev1.KeyToPath{ - Key: serverConfigMapKey, - Path: serverConfigProjectionPath, - }) - secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) - } - // Start with a copy of projections specified in the cluster. Items later in // the list take precedence over earlier items (that is, last write wins). // - https://kubernetes.io/docs/concepts/storage/volumes/#projected @@ -137,7 +130,7 @@ func AddConfigToInstancePod( addConfigVolumeAndMounts(pod, sources) } -// AddConfigToRepoPod adds and mounts the pgBackRest configuration volume for +// AddConfigToRepoPod adds and mounts the pgBackRest configuration volumes for // the dedicated repository host of cluster to pod. The pgBackRest containers // must already be in pod. func AddConfigToRepoPod( @@ -164,6 +157,33 @@ func AddConfigToRepoPod( addConfigVolumeAndMounts(pod, append(sources, configmap, secret)) } +// AddConfigToCloudBackupJob adds and mounts the pgBackRest configuration volumes +// to the backup job for creating a backup to a cloud repo. +func AddConfigToCloudBackupJob( + cluster *v1beta1.PostgresCluster, podTemplateSpec *corev1.PodTemplateSpec, +) { + configmap := corev1.VolumeProjection{ConfigMap: &corev1.ConfigMapProjection{}} + configmap.ConfigMap.Name = naming.PGBackRestConfig(cluster).Name + configmap.ConfigMap.Items = []corev1.KeyToPath{ + {Key: CMCloudRepoKey, Path: CMCloudRepoKey}, + } + + secret := corev1.VolumeProjection{Secret: &corev1.SecretProjection{}} + secret.Secret.Name = naming.PGBackRestSecret(cluster).Name + secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) + + // Start with a copy of projections specified in the cluster. Items later in + // the list take precedence over earlier items (that is, last write wins). + // - https://kubernetes.io/docs/concepts/storage/volumes/#projected + sources := append([]corev1.VolumeProjection{}, + cluster.Spec.Backups.PGBackRest.Configuration...) + + addConfigVolumeAndMounts(&podTemplateSpec.Spec, append(sources, configmap, secret)) + + // Add tmp directory for pgbackrest lock files + AddTMPEmptyDir(podTemplateSpec) +} + // AddConfigToRestorePod adds and mounts the pgBackRest configuration volume // for the restore job of cluster to pod. The pgBackRest containers must // already be in pod. @@ -413,15 +433,13 @@ func InstanceCertificates(ctx context.Context, ) error { var err error - if RepoHostVolumeDefined(inCluster) { - initialize.Map(&outInstanceCertificates.Data) + initialize.Map(&outInstanceCertificates.Data) - if err == nil { - outInstanceCertificates.Data[certInstanceSecretKey], err = certFile(inDNS) - } - if err == nil { - outInstanceCertificates.Data[certInstancePrivateKeySecretKey], err = certFile(inDNSKey) - } + if err == nil { + outInstanceCertificates.Data[certInstanceSecretKey], err = certFile(inDNS) + } + if err == nil { + outInstanceCertificates.Data[certInstancePrivateKeySecretKey], err = certFile(inDNSKey) } return err @@ -517,38 +535,36 @@ func Secret(ctx context.Context, var err error // Save the CA and generate a TLS client certificate for the entire cluster. - if inRepoHost != nil { - initialize.Map(&outSecret.Data) - - // The server verifies its "tls-server-auth" option contains the common - // name (CN) of the certificate presented by a client. The entire - // cluster uses a single client certificate so the "tls-server-auth" - // option can stay the same when PostgreSQL instances and repository - // hosts are added or removed. - leaf := &pki.LeafCertificate{} - commonName := clientCommonName(inCluster) - dnsNames := []string{commonName} - - if err == nil { - // Unmarshal and validate the stored leaf. These first errors can - // be ignored because they result in an invalid leaf which is then - // correctly regenerated. - _ = leaf.Certificate.UnmarshalText(inSecret.Data[certClientSecretKey]) - _ = leaf.PrivateKey.UnmarshalText(inSecret.Data[certClientPrivateKeySecretKey]) - - leaf, err = inRoot.RegenerateLeafWhenNecessary(leaf, commonName, dnsNames) - err = errors.WithStack(err) - } + initialize.Map(&outSecret.Data) + + // The server verifies its "tls-server-auth" option contains the common + // name (CN) of the certificate presented by a client. The entire + // cluster uses a single client certificate so the "tls-server-auth" + // option can stay the same when PostgreSQL instances and repository + // hosts are added or removed. + leaf := &pki.LeafCertificate{} + commonName := clientCommonName(inCluster) + dnsNames := []string{commonName} + + if err == nil { + // Unmarshal and validate the stored leaf. These first errors can + // be ignored because they result in an invalid leaf which is then + // correctly regenerated. + _ = leaf.Certificate.UnmarshalText(inSecret.Data[certClientSecretKey]) + _ = leaf.PrivateKey.UnmarshalText(inSecret.Data[certClientPrivateKeySecretKey]) + + leaf, err = inRoot.RegenerateLeafWhenNecessary(leaf, commonName, dnsNames) + err = errors.WithStack(err) + } - if err == nil { - outSecret.Data[certAuthoritySecretKey], err = certFile(inRoot.Certificate) - } - if err == nil { - outSecret.Data[certClientPrivateKeySecretKey], err = certFile(leaf.PrivateKey) - } - if err == nil { - outSecret.Data[certClientSecretKey], err = certFile(leaf.Certificate) - } + if err == nil { + outSecret.Data[certAuthoritySecretKey], err = certFile(inRoot.Certificate) + } + if err == nil { + outSecret.Data[certClientPrivateKeySecretKey], err = certFile(leaf.PrivateKey) + } + if err == nil { + outSecret.Data[certClientSecretKey], err = certFile(leaf.Certificate) } // Generate a TLS server certificate for each repository host. diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index e60d48843f..18bbfc455d 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -231,7 +231,20 @@ func TestAddConfigToInstancePod(t *testing.T) { path: pgbackrest_instance.conf - key: config-hash path: config-hash + - key: pgbackrest-server.conf + path: ~postgres-operator_server.conf name: hippo-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-pgbackrest + optional: true `)) }) @@ -254,7 +267,20 @@ func TestAddConfigToInstancePod(t *testing.T) { path: pgbackrest_instance.conf - key: config-hash path: config-hash + - key: pgbackrest-server.conf + path: ~postgres-operator_server.conf name: hippo-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-pgbackrest + optional: true `)) }) @@ -373,6 +399,84 @@ func TestAddConfigToRepoPod(t *testing.T) { }) } +func TestAddConfigToCloudBackupJob(t *testing.T) { + cluster := v1beta1.PostgresCluster{} + cluster.Name = "hippo" + cluster.Default() + + podTemplate := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "other"}, + {Name: "pgbackrest"}, + }, + }, + } + + alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { + // Only Containers and Volumes fields have changed. + assert.DeepEqual(t, podTemplate.Spec, *result, cmpopts.IgnoreFields(podTemplate.Spec, "Containers", "Volumes")) + + // Only pgBackRest container has config mount, but tmp dir is mounted to all containers + assert.Assert(t, cmp.MarshalMatches(result.Containers, ` +- name: other + resources: {} + volumeMounts: + - mountPath: /tmp + name: tmp +- name: pgbackrest + resources: {} + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp + `)) + } + + t.Run("CustomProjections", func(t *testing.T) { + custom := corev1.ConfigMapProjection{} + custom.Name = "custom-configmap" + + cluster := cluster.DeepCopy() + cluster.Spec.Backups.PGBackRest.Configuration = []corev1.VolumeProjection{ + {ConfigMap: &custom}, + } + + out := podTemplate.DeepCopy() + AddConfigToCloudBackupJob(cluster, out) + alwaysExpect(t, &out.Spec) + + // Cloud backup configuration files and client certificates + // after custom projections. + assert.Assert(t, cmp.MarshalMatches(out.Spec.Volumes, ` +- name: pgbackrest-config + projected: + sources: + - configMap: + name: custom-configmap + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp`)) + }) +} + func TestAddConfigToRestorePod(t *testing.T) { cluster := v1beta1.PostgresCluster{} cluster.Name = "source" @@ -1004,10 +1108,13 @@ func TestSecret(t *testing.T) { assert.NilError(t, err) t.Run("NoRepoHost", func(t *testing.T) { - // Nothing happens when there is no repository host. - constant := intent.DeepCopy() + // We always add the pgbackrest server certs assert.NilError(t, Secret(ctx, cluster, nil, root, existing, intent)) - assert.DeepEqual(t, constant, intent) + assert.Assert(t, len(intent.Data["pgbackrest-client.crt"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest-client.key"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest.ca-roots"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest-repo-host.crt"]) == 0) + assert.Assert(t, len(intent.Data["pgbackrest-repo-host.key"]) == 0) }) host := new(appsv1.StatefulSet) diff --git a/internal/pgbackrest/util.go b/internal/pgbackrest/util.go index a3b515ec5d..cd5fd11261 100644 --- a/internal/pgbackrest/util.go +++ b/internal/pgbackrest/util.go @@ -10,16 +10,21 @@ import ( "io" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/rand" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +// TODO: Provide explanation for this specific size. Should a tmp dir ever be smaller or larger? +var tmpDirSizeLimit = resource.MustParse("16Mi") + // maxPGBackrestRepos is the maximum number of repositories that can be configured according to the // multi-repository solution implemented within pgBackRest const maxPGBackrestRepos = 4 -// RepoHostVolumeDefined determines whether not at least one pgBackRest dedicated +// RepoHostVolumeDefined determines whether or not at least one pgBackRest dedicated // repository host volume has been defined in the PostgresCluster manifest. func RepoHostVolumeDefined(postgresCluster *v1beta1.PostgresCluster) bool { for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { @@ -30,6 +35,17 @@ func RepoHostVolumeDefined(postgresCluster *v1beta1.PostgresCluster) bool { return false } +// CloudRepoDefined determines whether or not at least one pgBackRest cloud-based +// repository has been defined in the PostgresCluster manifest. +func CloudRepoDefined(postgresCluster *v1beta1.PostgresCluster) bool { + for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { + if repo.Volume == nil { + return true + } + } + return false +} + // CalculateConfigHashes calculates hashes for any external pgBackRest repository configuration // present in the PostgresCluster spec (e.g. configuration for Azure, GCR and/or S3 repositories). // Additionally it returns a hash of the hashes for each external repository. @@ -100,3 +116,39 @@ func safeHash32(content func(w io.Writer) error) (string, error) { } return rand.SafeEncodeString(fmt.Sprint(hash.Sum32())), nil } + +// AddTMPEmptyDir adds a "tmp" EmptyDir volume to the provided Pod template, while then also adding a +// volume mount at /tmp for all containers defined within the Pod template +// The '/tmp' directory is currently utilized for the following: +// - As the pgBackRest lock directory (this is the default lock location for pgBackRest) +// - The location where the replication client certificates can be loaded with the proper +// permissions set +// +// This function was copied from the postgrescluster package. +func AddTMPEmptyDir(template *corev1.PodTemplateSpec) { + + template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{ + Name: "tmp", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: &tmpDirSizeLimit, + }, + }, + }) + + for i := range template.Spec.Containers { + template.Spec.Containers[i].VolumeMounts = append(template.Spec.Containers[i].VolumeMounts, + corev1.VolumeMount{ + Name: "tmp", + MountPath: "/tmp", + }) + } + + for i := range template.Spec.InitContainers { + template.Spec.InitContainers[i].VolumeMounts = append(template.Spec.InitContainers[i].VolumeMounts, + corev1.VolumeMount{ + Name: "tmp", + MountPath: "/tmp", + }) + } +} From 87bc04b14b5e3bfa4a053a0dc98a8cab2e7dc06e Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Tue, 15 Jul 2025 23:31:19 -0700 Subject: [PATCH 48/79] Fix bug where logrotate config was only added when a volume repo existed. --- internal/controller/postgrescluster/instance.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 726c786720..0c4f506c35 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1168,7 +1168,7 @@ func (r *Reconciler) reconcileInstance( ) if err == nil { - instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig) + instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, backupsSpecFound) } if err == nil { instanceCertificates, err = r.reconcileInstanceCertificates( @@ -1409,7 +1409,7 @@ func addPGBackRestToInstancePodSpec( // files (etc) that apply to instance of cluster. func (r *Reconciler) reconcileInstanceConfigMap( ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, - instance *appsv1.StatefulSet, otelConfig *collector.Config, + instance *appsv1.StatefulSet, otelConfig *collector.Config, backupsSpecFound bool, ) (*corev1.ConfigMap, error) { instanceConfigMap := &corev1.ConfigMap{ObjectMeta: naming.InstanceConfigMap(instance)} instanceConfigMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) @@ -1436,11 +1436,9 @@ func (r *Reconciler) reconcileInstanceConfigMap( err = collector.AddToConfigMap(ctx, otelConfig, instanceConfigMap) // Add pgbackrest logrotate if OpenTelemetryLogs is enabled and - // local volumes are available + // backups are enabled if err == nil && - feature.Enabled(ctx, feature.OpenTelemetryLogs) && - pgbackrest.RepoHostVolumeDefined(cluster) && - cluster.Spec.Instrumentation != nil { + collector.OpenTelemetryLogsEnabled(ctx, cluster) && backupsSpecFound { collector.AddLogrotateConfigs(ctx, cluster.Spec.Instrumentation, instanceConfigMap, From 9e6752973a733545711910980bfe3a03eb388e1e Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 18 Jul 2025 16:45:11 -0700 Subject: [PATCH 49/79] Add a test for reconcileInstanceConfigMap --- .../postgrescluster/instance_test.go | 285 ++++++++++++++++++ 1 file changed, 285 insertions(+) diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index bbabffdafb..5eb2e479dd 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -32,7 +32,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/controller/runtime" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" @@ -2019,3 +2021,286 @@ func TestCleanupDisruptionBudgets(t *testing.T) { }) }) } + +func TestReconcileInstanceConfigMap(t *testing.T) { + ctx := context.Background() + _, cc := setupKubernetes(t) + require.ParallelCapacity(t, 1) + + r := &Reconciler{ + Client: cc, + Owner: client.FieldOwner(t.Name()), + } + + t.Run("LocalVolumeOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-1" + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-1-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("CloudRepoOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-2" + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{{ + Name: "repo1", + GCS: &v1beta1.RepoGCS{ + Bucket: "test-bucket", + }, + }} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-2-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("LocalVolumeOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-3" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-3-instance-config") + // We test the contents of the collector yaml elsewhere, I just want to + // make sure that it isn't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("LocalVolumeOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-4" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-4-instance-config") + // We test the contents of the collector and logrotate configs elsewhere, + // I just want to test that they aren't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Assert(t, len(cm.Data["logrotate.conf"]) > 0) + }) + + t.Run("CloudRepoOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-5" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-5-instance-config") + // We test the contents of the collector yaml elsewhere, I just want to + // make sure that it isn't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("CloudRepoOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-6" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-6-instance-config") + // We test the contents of the collector and logrotate configs elsewhere, + // I just want to test that they aren't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Assert(t, len(cm.Data["logrotate.conf"]) > 0) + }) + + t.Run("BackupsDisabledOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-7" + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-7-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("BackupsDisabledOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-8" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-8-instance-config") + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("BackupsDisabledOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-9" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-9-instance-config") + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) +} From 082a55793f85522c0e56865d6520f0828f77d0f4 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 28 Jul 2025 11:44:00 -0500 Subject: [PATCH 50/79] Ignore pipeline files when scanning with Trivy Trivy was detecting dependencies inside the Git checkout for Trivy report templates. Also switch to "repository" scanning as it is more appropriate here. See: https://trivy.dev/v0.64/docs/target/repository#rationale --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 371a58d2f9..ce00e578ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -203,7 +203,7 @@ trivy: # Trivy needs a populated Go module cache to detect Go module licenses. - go mod download - >- - trivy filesystem . --exit-code 1 + trivy repository . --exit-code 1 --skip-dirs .gitlab-remotes --scanners license,vuln --ignore-unfixed --no-progress From 7b0cafca1bd45c841b0139462652b4b3fad85fc4 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 28 Jul 2025 10:39:07 -0500 Subject: [PATCH 51/79] Bump golang.org/x/oauth2 to v0.30.0 This matches our other branches which needed the bump for CVE-2025-22868. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b28ed642c1..6ea5a8d11e 100644 --- a/go.mod +++ b/go.mod @@ -105,7 +105,7 @@ require ( golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect golang.org/x/mod v0.22.0 // indirect golang.org/x/net v0.38.0 // indirect - golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sync v0.12.0 // indirect golang.org/x/sys v0.31.0 // indirect golang.org/x/term v0.30.0 // indirect diff --git a/go.sum b/go.sum index 8aa5d6edac..89177ce6c8 100644 --- a/go.sum +++ b/go.sum @@ -224,8 +224,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= -golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= -golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From 3947ce7f629c904e0939b18b25ee0933fb650568 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 23 Jul 2025 17:47:18 -0700 Subject: [PATCH 52/79] Allow user to set an annotation that will specify an existing PVC to be mounted to cloud backup jobs so that the backup logs can be persisted. --- .../controller/postgrescluster/pgbackrest.go | 55 ++++- .../postgrescluster/pgbackrest_test.go | 192 ++++++++++++++++-- internal/naming/annotations.go | 4 + internal/naming/annotations_test.go | 1 + internal/pgbackrest/config.go | 13 +- internal/pgbackrest/config_test.go | 63 +++++- internal/util/volumes.go | 42 ++++ internal/util/volumes_test.go | 78 +++++++ 8 files changed, 416 insertions(+), 32 deletions(-) create mode 100644 internal/util/volumes.go create mode 100644 internal/util/volumes_test.go diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index a4269c5234..e77456af63 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -38,6 +38,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/pgbackrest" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/util" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -777,7 +778,7 @@ func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresC } // generateBackupJobSpecIntent generates a JobSpec for a pgBackRest backup job -func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, +func (r *Reconciler) generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repo v1beta1.PGBackRestRepo, serviceAccountName string, labels, annotations map[string]string, opts ...string) *batchv1.JobSpec { @@ -879,6 +880,27 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P // to read certificate files jobSpec.Template.Spec.SecurityContext = postgres.PodSecurityContext(postgresCluster) pgbackrest.AddConfigToCloudBackupJob(postgresCluster, &jobSpec.Template) + + // If the user has specified a PVC to use as a log volume via the PGBackRestCloudLogVolume + // annotation, check for the PVC. If we find it, mount it to the backup job. + // Otherwise, create a warning event. + if logVolumeName := postgresCluster.Annotations[naming.PGBackRestCloudLogVolume]; logVolumeName != "" { + logVolume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: logVolumeName, + Namespace: postgresCluster.GetNamespace(), + }, + } + err := errors.WithStack(r.Client.Get(ctx, + client.ObjectKeyFromObject(logVolume), logVolume)) + if err != nil { + // PVC not retrieved, create warning event + r.Recorder.Event(postgresCluster, corev1.EventTypeWarning, "PGBackRestCloudLogVolumeNotFound", err.Error()) + } else { + // We successfully found the specified PVC, so we will add it to the backup job + util.AddVolumeAndMountsToPod(&jobSpec.Template.Spec, logVolume) + } + } } return jobSpec @@ -2046,8 +2068,31 @@ func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) error { + // If the user has specified a PVC to use as a log volume for cloud backups via the + // PGBackRestCloudLogVolume annotation, check for the PVC. If we find it, set the cloud + // log path. If the user has specified a PVC, but we can't find it, create a warning event. + cloudLogPath := "" + if logVolumeName := postgresCluster.Annotations[naming.PGBackRestCloudLogVolume]; logVolumeName != "" { + logVolume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: logVolumeName, + Namespace: postgresCluster.GetNamespace(), + }, + } + err := errors.WithStack(r.Client.Get(ctx, + client.ObjectKeyFromObject(logVolume), logVolume)) + if err != nil { + // PVC not retrieved, create warning event + r.Recorder.Event(postgresCluster, corev1.EventTypeWarning, + "PGBackRestCloudLogVolumeNotFound", err.Error()) + } else { + // We successfully found the specified PVC, so we will set the log path + cloudLogPath = "/volumes/" + logVolumeName + } + } + backrestConfig, err := pgbackrest.CreatePGBackRestConfigMapIntent(ctx, postgresCluster, repoHostName, - configHash, serviceName, serviceNamespace, instanceNames) + configHash, serviceName, serviceNamespace, cloudLogPath, instanceNames) if err != nil { return err } @@ -2460,7 +2505,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, repo, + spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) backupJob.Spec = *spec @@ -2637,7 +2682,7 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, + spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, serviceAccount.GetName(), labels, annotations) backupJob.Spec = *spec @@ -3064,7 +3109,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set backup type (i.e. "full", "diff", "incr") backupOpts := []string{"--type=" + backupType} - jobSpec := generateBackupJobSpecIntent(ctx, cluster, repo, + jobSpec := r.generateBackupJobSpecIntent(ctx, cluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) // Suspend cronjobs when shutdown or read-only. Any jobs that have already diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index 8a2e6e3cd5..eec1b05deb 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -40,6 +40,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/pgbackrest" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -2600,6 +2601,15 @@ func TestCopyConfigurationResources(t *testing.T) { } func TestGenerateBackupJobIntent(t *testing.T) { + _, cc := setupKubernetes(t) + require.ParallelCapacity(t, 0) + ns := setupNamespace(t, cc) + + r := &Reconciler{ + Client: cc, + Owner: ControllerName, + } + ctx := context.Background() cluster := v1beta1.PostgresCluster{} cluster.Name = "hippo-test" @@ -2608,7 +2618,7 @@ func TestGenerateBackupJobIntent(t *testing.T) { // If repo.Volume is nil, the code interprets this as a cloud repo backup, // therefore, an "empty" input results in a job spec for a cloud repo backup t.Run("empty", func(t *testing.T) { - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, &cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2669,7 +2679,7 @@ volumes: }) t.Run("volumeRepo", func(t *testing.T) { - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, &cluster, v1beta1.PGBackRestRepo{ Volume: &v1beta1.RepoPVC{ VolumeClaimSpec: v1beta1.VolumeClaimSpec{}, @@ -2746,7 +2756,7 @@ volumes: ImagePullPolicy: corev1.PullAlways, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2761,7 +2771,7 @@ volumes: cluster.Spec.Backups = v1beta1.Backups{ PGBackRest: v1beta1.PGBackRestArchive{}, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2778,7 +2788,7 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2817,7 +2827,7 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2830,7 +2840,7 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ PriorityClassName: initialize.String("some-priority-class"), } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2848,7 +2858,7 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ Tolerations: tolerations, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2862,14 +2872,14 @@ volumes: t.Run("Undefined", func(t *testing.T) { cluster.Spec.Backups.PGBackRest.Jobs = nil - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{} - spec = generateBackupJobSpecIntent(ctx, + spec = r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) @@ -2880,7 +2890,7 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(0), } - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { @@ -2893,7 +2903,7 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(100), } - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { @@ -2901,6 +2911,164 @@ volumes: } }) }) + + t.Run("CloudLogVolumeAnnotationNoPvc", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + r.Recorder = recorder + + cluster.Namespace = ns.Name + cluster.Annotations = map[string]string{} + cluster.Annotations[naming.PGBackRestCloudLogVolume] = "some-pvc" + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp + `)) + + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "PGBackRestCloudLogVolumeNotFound") + assert.Equal(t, recorder.Events[0].Note, "persistentvolumeclaims \"some-pvc\" not found") + }) + + t.Run("CloudLogVolumeAnnotationPvcInPlace", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + r.Recorder = recorder + + cluster.Namespace = ns.Name + cluster.Annotations = map[string]string{} + cluster.Annotations[naming.PGBackRestCloudLogVolume] = "another-pvc" + + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "another-pvc", + Namespace: ns.Name, + }, + Spec: corev1.PersistentVolumeClaimSpec(testVolumeClaimSpec()), + } + err := r.Client.Create(ctx, pvc) + assert.NilError(t, err) + + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp + - mountPath: /volumes/another-pvc + name: another-pvc +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp +- name: another-pvc + persistentVolumeClaim: + claimName: another-pvc + `)) + + // No events created + assert.Equal(t, len(recorder.Events), 0) + }) } func TestGenerateRepoHostIntent(t *testing.T) { diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index 38d30926d9..61a5438908 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -54,6 +54,10 @@ const ( // bind all addresses does not work in certain IPv6 environments. PGBackRestIPVersion = annotationPrefix + "pgbackrest-ip-version" + // PGBackRestCloudLogVolume is an annotation used to indicate which persistent volume claim + // should be mounted to cloud repo backup jobs so that the backup logs can be persisted. + PGBackRestCloudLogVolume = annotationPrefix + "pgbackrest-cloud-log-volume" + // PostgresExporterCollectorsAnnotation is an annotation used to allow users to control whether or // not postgres_exporter default metrics, settings, and collectors are enabled. The value "None" // disables all postgres_exporter defaults. Disabling the defaults may cause errors in dashboards. diff --git a/internal/naming/annotations_test.go b/internal/naming/annotations_test.go index 593d000984..9553e5e72a 100644 --- a/internal/naming/annotations_test.go +++ b/internal/naming/annotations_test.go @@ -22,6 +22,7 @@ func TestAnnotationsValid(t *testing.T) { assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestConfigHash)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestCurrentConfig)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestIPVersion)) + assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestCloudLogVolume)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestRestore)) assert.Assert(t, nil == validation.IsQualifiedName(PostgresExporterCollectorsAnnotation)) } diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 17749277df..744537fb20 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -75,7 +75,7 @@ const ( // pgbackrest_repo.conf is used by the pgBackRest repository pod // pgbackrest_cloud.conf is used by cloud repo backup jobs func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, - repoHostName, configHash, serviceName, serviceNamespace string, + repoHostName, configHash, serviceName, serviceNamespace, cloudLogPath string, instanceNames []string) (*corev1.ConfigMap, error) { var err error @@ -163,7 +163,7 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet serviceName, serviceNamespace, pgdataDir, config.FetchKeyCommand(&postgresCluster.Spec), strconv.Itoa(postgresCluster.Spec.PostgresVersion), - pgPort, instanceNames, + cloudLogPath, pgPort, instanceNames, postgresCluster.Spec.Backups.PGBackRest.Repos, postgresCluster.Spec.Backups.PGBackRest.Global, ).String() @@ -519,7 +519,7 @@ func populateRepoHostConfigurationMap( func populateCloudRepoConfigurationMap( serviceName, serviceNamespace, pgdataDir, - fetchKeyCommand, postgresVersion string, + fetchKeyCommand, postgresVersion, logPath string, pgPort int32, pgHosts []string, repos []v1beta1.PGBackRestRepo, globalConfig map[string]string, ) iniSectionSet { @@ -539,7 +539,12 @@ func populateCloudRepoConfigurationMap( } } - global.Set("log-level-file", "off") + // If we are given a log path, set it in the config. Otherwise, turn off logging to file. + if logPath != "" { + global.Set("log-path", logPath) + } else { + global.Set("log-level-file", "off") + } for option, val := range globalConfig { global.Set(option, val) diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index b56beaa8ca..0f518ab7bc 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -40,7 +40,7 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { cluster.UID = "piano" configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "", "number", "pod-service-name", "test-ns", + "", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -96,19 +96,33 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "", "anumber", "pod-service-name", "test-ns", + "", "anumber", "pod-service-name", "test-ns", "", + []string{"some-instance"}) + assert.NilError(t, err) + + configmapWithCloudLogging, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "", "anumber", "pod-service-name", "test-ns", "/a/log/path", []string{"some-instance"}) assert.NilError(t, err) assert.DeepEqual(t, configmap.Annotations, map[string]string{}) + assert.DeepEqual(t, configmapWithCloudLogging.Annotations, map[string]string{}) + assert.DeepEqual(t, configmap.Labels, map[string]string{ "postgres-operator.crunchydata.com/cluster": "hippo-dance", "postgres-operator.crunchydata.com/pgbackrest": "", "postgres-operator.crunchydata.com/pgbackrest-config": "", }) + assert.DeepEqual(t, configmapWithCloudLogging.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) assert.Equal(t, configmap.Data["config-hash"], "anumber") - assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` + assert.Equal(t, configmapWithCloudLogging.Data["config-hash"], "anumber") + + serverConfigExpectation := strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -124,9 +138,11 @@ log-level-console = detail log-level-file = off log-level-stderr = error log-timestamp = n - `, "\t\n")+"\n") + `, "\t\n") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], serverConfigExpectation+"\n") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest-server.conf"], serverConfigExpectation+"\n") - assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` + instanceConfigExpectation := strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -143,7 +159,9 @@ spool-path = /pgdata/pgbackrest-spool pg1-path = /pgdata/pg12 pg1-port = 2345 pg1-socket-path = /tmp/postgres - `, "\t\n")+"\n") + `, "\t\n") + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], instanceConfigExpectation+"\n") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_instance.conf"], instanceConfigExpectation+"\n") assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. @@ -156,6 +174,28 @@ repo1-path = /pgbackrest/repo1 repo1-test = something repo1-type = gcs +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-path = /a/log/path +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs + [db] pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt @@ -168,6 +208,7 @@ pg1-socket-path = /tmp/postgres `, "\t\n")+"\n") assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_repo.conf"], "") }) t.Run("VolumeRepoPresentNoCloudRepo", func(t *testing.T) { @@ -181,7 +222,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "repo-hostname", "anumber", "pod-service-name", "test-ns", + "repo-hostname", "anumber", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -283,7 +324,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "repo-hostname", "abcde12345", "pod-service-name", "test-ns", + "repo-hostname", "abcde12345", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -438,7 +479,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "any", "any", "any", "any", nil) + "any", "any", "any", "any", "any", nil) assert.NilError(t, err) assert.DeepEqual(t, configmap.Annotations, map[string]string{ @@ -470,7 +511,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "", "number", "pod-service-name", "test-ns", + "", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -492,7 +533,7 @@ pg1-socket-path = /tmp/postgres } configmap, err = CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "repo1", "number", "pod-service-name", "test-ns", + "repo1", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) diff --git a/internal/util/volumes.go b/internal/util/volumes.go new file mode 100644 index 0000000000..34e2699b54 --- /dev/null +++ b/internal/util/volumes.go @@ -0,0 +1,42 @@ +// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" +) + +// AddVolumeAndMountsToPod takes a Pod spec and a PVC and adds a Volume to the Pod spec with +// the PVC as the VolumeSource and mounts the volume to all containers and init containers +// in the Pod spec. +func AddVolumeAndMountsToPod(podSpec *corev1.PodSpec, volume *corev1.PersistentVolumeClaim) { + + podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ + Name: volume.Name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: volume.Name, + }, + }, + }) + + for i := range podSpec.Containers { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, + corev1.VolumeMount{ + Name: volume.Name, + MountPath: fmt.Sprintf("/volumes/%s", volume.Name), + }) + } + + for i := range podSpec.InitContainers { + podSpec.InitContainers[i].VolumeMounts = append(podSpec.InitContainers[i].VolumeMounts, + corev1.VolumeMount{ + Name: volume.Name, + MountPath: fmt.Sprintf("/volumes/%s", volume.Name), + }) + } +} diff --git a/internal/util/volumes_test.go b/internal/util/volumes_test.go new file mode 100644 index 0000000000..b438943e3a --- /dev/null +++ b/internal/util/volumes_test.go @@ -0,0 +1,78 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "testing" + + "github.com/google/go-cmp/cmp/cmpopts" + "gotest.tools/v3/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/crunchydata/postgres-operator/internal/testing/cmp" +) + +func TestAddVolumeAndMountsToPod(t *testing.T) { + pod := &corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "database"}, + {Name: "other"}, + {Name: "pgbackrest"}, + }, + InitContainers: []corev1.Container{ + {Name: "initializer"}, + {Name: "another"}, + }, + } + + volume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "volume-name", + }, + } + + alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { + // Only Containers, InitContainers, and Volumes fields have changed. + assert.DeepEqual(t, *pod, *result, cmpopts.IgnoreFields(*pod, "Containers", "InitContainers", "Volumes")) + + // Volume is mounted to all containers + assert.Assert(t, cmp.MarshalMatches(result.Containers, ` +- name: database + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: other + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: pgbackrest + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name + `)) + + // Volume is mounted to all init containers + assert.Assert(t, cmp.MarshalMatches(result.InitContainers, ` +- name: initializer + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: another + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name + `)) + } + + out := pod.DeepCopy() + AddVolumeAndMountsToPod(out, volume) + alwaysExpect(t, out) +} From 5bbb52aed3bde464c3f63a5d88241966c20de6ca Mon Sep 17 00:00:00 2001 From: Andrew LEcuyer Date: Tue, 5 Aug 2025 22:02:23 +0000 Subject: [PATCH 53/79] Remove Divisor from Downward API resrouceFieldRefs This means CPU and memory information will now be exposed using the default divisor for CPU and memory (with default being "1" for both). This means memory information will now be represented in bytes, as expected by pgMonitor and the CPK Metrics & Monitoring stack when consuming Downward API information. Issue: PGO-2604 --- internal/postgres/reconcile.go | 10 ---------- internal/postgres/reconcile_test.go | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index 5041140b0d..81c6cc31fa 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -8,7 +8,6 @@ import ( "context" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" @@ -17,11 +16,6 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) -var ( - oneMillicore = resource.MustParse("1m") - oneMebibyte = resource.MustParse("1Mi") -) - // DataVolumeMount returns the name and mount path of the PostgreSQL data volume. func DataVolumeMount() corev1.VolumeMount { return corev1.VolumeMount{Name: "postgres-data", MountPath: dataMountPath} @@ -116,28 +110,24 @@ func InstancePod(ctx context.Context, ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "limits.cpu", - Divisor: oneMillicore, }, }, { Path: "cpu_request", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "requests.cpu", - Divisor: oneMillicore, }, }, { Path: "mem_limit", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "limits.memory", - Divisor: oneMebibyte, }, }, { Path: "mem_request", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "requests.memory", - Divisor: oneMebibyte, }, }, { Path: "labels", diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index ba3a90b57b..aebc5d3121 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -352,22 +352,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 @@ -443,22 +443,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 @@ -667,22 +667,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 From ebb0ec146c3c2542b2c3639d3ac13836ea0749a7 Mon Sep 17 00:00:00 2001 From: jmckulk Date: Tue, 26 Aug 2025 17:23:50 -0400 Subject: [PATCH 54/79] Filter out metrics that we care about using grep --- .../e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml | 2 +- .../e2e/otel-logging-and-metrics/06-assert-instance.yaml | 5 ++++- .../otel-logging-and-metrics/12-assert-per-db-queries.yaml | 2 +- .../14-assert-per-db-queries-for-multiple-targets.yaml | 2 +- .../18-assert-custom-queries-per-db.yaml | 2 +- .../e2e/otel-logging-and-metrics/22-assert-instance.yaml | 4 +++- 6 files changed, 11 insertions(+), 6 deletions(-) diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml index 87188b6f62..2aecbc2f61 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml @@ -21,7 +21,7 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_pgbouncer_clients_wait_seconds') { contains "${scrape_metrics}" 'ccp_pgbouncer_clients_wait_seconds'; } || { retry "pgbouncer metric not found" exit 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml index 096c024d89..67221cf8f5 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml @@ -38,7 +38,10 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep \ + -e 'ccp_connection_stats_active' \ + -e 'patroni_postgres_running' \ + -e 'ccp_database_size_bytes') { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { retry "5 second metric not found" exit 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml index 10a8645b32..9f1f00d40e 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml @@ -21,7 +21,7 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { retry "ccp_table_size_bytes not found for pikachu" exit 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml index bfbe2b1578..234f33ae1b 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml @@ -21,7 +21,7 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { retry "ccp_table_size_bytes not found for pikachu" exit 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml index a4631bf9e8..e6b1365803 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml @@ -23,7 +23,7 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') { contains "${scrape_metrics}" 'ccp_table_size_bytes_1{dbname="pikachu"'; } || { retry "custom metric not found for pikachu db" exit 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml index 411c910486..226bcce3cd 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml @@ -23,7 +23,9 @@ commands: } scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ - curl --insecure --silent http://localhost:9187/metrics) + curl --insecure --silent http://localhost:9187/metrics | grep \ + -e 'ccp_connection_stats_active' \ + -e 'patroni_postgres_running') { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { retry "5 second metric not found" exit 1 From 044ca60d14e4238edf5fc522134b50513ca500b3 Mon Sep 17 00:00:00 2001 From: ValClarkson Date: Mon, 15 Sep 2025 16:30:16 -0400 Subject: [PATCH 55/79] post release updating to august images PGO-2616 --- Makefile | 2 +- config/manager/manager.yaml | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 9a5ef90b0c..f4977bcf99 100644 --- a/Makefile +++ b/Makefile @@ -229,7 +229,7 @@ generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 16 generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 17 generate-kuttl: export KUTTL_PG_VERSION ?= 16 generate-kuttl: export KUTTL_POSTGIS_VERSION ?= 3.4 -generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516 +generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2534 generate-kuttl: export KUTTL_TEST_DELETE_NAMESPACE ?= kuttl-test-delete-namespace generate-kuttl: ## Generate kuttl tests [ ! -d testing/kuttl/e2e-generated ] || rm -r testing/kuttl/e2e-generated diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index ad21ff6ce9..24bf6caadf 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,27 +23,27 @@ spec: - name: CRUNCHY_DEBUG value: "true" - name: RELATED_IMAGE_POSTGRES_16 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.8-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.10-2534" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.3 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.3-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.3-2534" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.8-3.4-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.4-2534" - name: RELATED_IMAGE_POSTGRES_17 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.4-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2534" - name: RELATED_IMAGE_POSTGRES_17_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.4-3.4-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.4-2534" - name: RELATED_IMAGE_PGBACKREST - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.54.2-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2534" - name: RELATED_IMAGE_PGBOUNCER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2534" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2534" - name: RELATED_IMAGE_PGUPGRADE - value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.4-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.6-2534" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2516" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2534" - name: RELATED_IMAGE_COLLECTOR - value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.1-0" + value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.3-0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } From 70f5349c71734bb280e1b83b3f7de03249b3bad9 Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Wed, 10 Sep 2025 03:21:23 -0400 Subject: [PATCH 56/79] Updates to support changes starting in pgAdmin 9.3 Changes in the flags used by pgAdmin's setup.py for user managment start in pgAdmin 9.3. Issue: PGO-2686 --- ...res-operator.crunchydata.com_pgadmins.yaml | 4 + .../controller/standalone_pgadmin/users.go | 58 ++++++++---- .../standalone_pgadmin/users_test.go | 90 +++++++++++++------ .../v1beta1/standalone_pgadmin_types.go | 4 + .../01-assert.yaml | 8 +- .../03-assert.yaml | 10 ++- .../05-assert.yaml | 10 ++- .../07-assert.yaml | 10 ++- 8 files changed, 135 insertions(+), 59 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index 6b3dcca78b..1ac65c6360 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -2705,6 +2705,10 @@ spec: description: MajorVersion represents the major version of the running pgAdmin. type: integer + minorVersion: + description: MinorVersion represents the minor version of the running + pgAdmin. + type: string observedGeneration: description: observedGeneration represents the .metadata.generation on which the status was based. diff --git a/internal/controller/standalone_pgadmin/users.go b/internal/controller/standalone_pgadmin/users.go index 34a9ba8661..d809aa44ef 100644 --- a/internal/controller/standalone_pgadmin/users.go +++ b/internal/controller/standalone_pgadmin/users.go @@ -79,28 +79,43 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * return nil } - // If the pgAdmin version is not in the status or the image SHA has changed, get - // the pgAdmin version and store it in the status. - var pgadminVersion int - if pgadmin.Status.MajorVersion == 0 || pgadmin.Status.ImageSHA != pgAdminImageSha { - pgadminVersion, err = r.reconcilePGAdminMajorVersion(ctx, podExecutor) + // If the pgAdmin major or minor version is not in the status or the image + // SHA has changed, get the pgAdmin version and store it in the status. + var pgadminMajorVersion int + if pgadmin.Status.MajorVersion == 0 || pgadmin.Status.MinorVersion == "" || + pgadmin.Status.ImageSHA != pgAdminImageSha { + + pgadminMinorVersion, err := r.reconcilePGAdminVersion(ctx, podExecutor) if err != nil { return err } - pgadmin.Status.MajorVersion = pgadminVersion + + // ensure minor version is valid before storing in status + parsedMinorVersion, err := strconv.ParseFloat(pgadminMinorVersion, 64) + if err != nil { + return err + } + + // Note: "When converting a floating-point number to an integer, the + // fraction is discarded (truncation towards zero)." + // - https://go.dev/ref/spec#Conversions + pgadminMajorVersion = int(parsedMinorVersion) + + pgadmin.Status.MinorVersion = pgadminMinorVersion + pgadmin.Status.MajorVersion = pgadminMajorVersion pgadmin.Status.ImageSHA = pgAdminImageSha } else { - pgadminVersion = pgadmin.Status.MajorVersion + pgadminMajorVersion = pgadmin.Status.MajorVersion } // If the pgAdmin version is not v8 or higher, return early as user management is // only supported for pgAdmin v8 and higher. - if pgadminVersion < 8 { + if pgadminMajorVersion < 8 { // If pgAdmin version is less than v8 and user management is being attempted, // log a message clarifying that it is only supported for pgAdmin v8 and higher. if len(pgadmin.Spec.Users) > 0 { log.Info("User management is only supported for pgAdmin v8 and higher.", - "pgadminVersion", pgadminVersion) + "pgadminVersion", pgadminMajorVersion) } return err } @@ -108,11 +123,11 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * return r.writePGAdminUsers(ctx, pgadmin, podExecutor) } -// reconcilePGAdminMajorVersion execs into the pgAdmin pod and retrieves the pgAdmin major version -func (r *PGAdminReconciler) reconcilePGAdminMajorVersion(ctx context.Context, exec Executor) (int, error) { +// reconcilePGAdminVersion execs into the pgAdmin pod and retrieves the pgAdmin minor version +func (r *PGAdminReconciler) reconcilePGAdminVersion(ctx context.Context, exec Executor) (string, error) { script := fmt.Sprintf(` PGADMIN_DIR=%s -cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_RELEASE)" +cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_VERSION)" `, pgAdminDir) var stdin, stdout, stderr bytes.Buffer @@ -121,10 +136,10 @@ cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_RELEASE)" []string{"bash", "-ceu", "--", script}...) if err != nil { - return 0, err + return "", err } - return strconv.Atoi(strings.TrimSpace(stdout.String())) + return strings.TrimSpace(stdout.String()), nil } // writePGAdminUsers takes the users in the pgAdmin spec and writes (adds or updates) their data @@ -170,10 +185,23 @@ cd $PGADMIN_DIR for _, user := range existingUsersArr { existingUsersMap[user.Username] = user } + + var olderThan9_3 bool + versionFloat, err := strconv.ParseFloat(pgadmin.Status.MinorVersion, 32) + if err != nil { + return err + } + if versionFloat < 9.3 { + olderThan9_3 = true + } + intentUsers := []pgAdminUserForJson{} for _, user := range pgadmin.Spec.Users { var stdin, stdout, stderr bytes.Buffer - typeFlag := "--nonadmin" + typeFlag := "--role User" + if olderThan9_3 { + typeFlag = "--nonadmin" + } isAdmin := false if user.Role == "Administrator" { typeFlag = "--admin" diff --git a/internal/controller/standalone_pgadmin/users_test.go b/internal/controller/standalone_pgadmin/users_test.go index fb861e17a7..d08f5b74b0 100644 --- a/internal/controller/standalone_pgadmin/users_test.go +++ b/internal/controller/standalone_pgadmin/users_test.go @@ -110,15 +110,16 @@ func TestReconcilePGAdminUsers(t *testing.T) { assert.Equal(t, namespace, pgadmin.Namespace) assert.Equal(t, container, naming.ContainerPGAdmin) - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // Simulate a v7.1 version of pgAdmin by setting stdout to "7.1" + // for podexec call in reconcilePGAdminVersion + _, _ = stdout.Write([]byte("7.1")) return nil } assert.NilError(t, r.reconcilePGAdminUsers(ctx, pgadmin)) assert.Equal(t, calls, 1, "PodExec should be called once") assert.Equal(t, pgadmin.Status.MajorVersion, 7) + assert.Equal(t, pgadmin.Status.MinorVersion, "7.1") assert.Equal(t, pgadmin.Status.ImageSHA, "fakeSHA") }) @@ -145,20 +146,58 @@ func TestReconcilePGAdminUsers(t *testing.T) { ) error { calls++ - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // Simulate a v7.1 version of pgAdmin by setting stdout to "7.1" + // for podexec call in reconcilePGAdminVersion + _, _ = stdout.Write([]byte("7.1")) return nil } assert.NilError(t, r.reconcilePGAdminUsers(ctx, pgadmin)) assert.Equal(t, calls, 1, "PodExec should be called once") assert.Equal(t, pgadmin.Status.MajorVersion, 7) + assert.Equal(t, pgadmin.Status.MinorVersion, "7.1") assert.Equal(t, pgadmin.Status.ImageSHA, "newFakeSHA") }) + + t.Run("PodHealthyBadVersion", func(t *testing.T) { + pgadmin := pgadmin.DeepCopy() + pod := pod.DeepCopy() + + pod.DeletionTimestamp = nil + pod.Status.ContainerStatuses = + []corev1.ContainerStatus{{Name: naming.ContainerPGAdmin}} + pod.Status.ContainerStatuses[0].State.Running = + new(corev1.ContainerStateRunning) + pod.Status.ContainerStatuses[0].ImageID = "fakeSHA" + + r := new(PGAdminReconciler) + r.Client = fake.NewClientBuilder().WithObjects(pod).Build() + + calls := 0 + r.PodExec = func( + ctx context.Context, namespace, pod, container string, + stdin io.Reader, stdout, stderr io.Writer, command ...string, + ) error { + calls++ + + assert.Equal(t, pod, "pgadmin-123-0") + assert.Equal(t, namespace, pgadmin.Namespace) + assert.Equal(t, container, naming.ContainerPGAdmin) + + // set expected version to something completely wrong + _, _ = stdout.Write([]byte("woot")) + return nil + } + + assert.ErrorContains(t, r.reconcilePGAdminUsers(ctx, pgadmin), "strconv.ParseFloat: parsing \"woot\": invalid syntax") + assert.Equal(t, calls, 1, "PodExec should be called once") + assert.Equal(t, pgadmin.Status.MajorVersion, 0) + assert.Equal(t, pgadmin.Status.MinorVersion, "") + assert.Equal(t, pgadmin.Status.ImageSHA, "") + }) } -func TestReconcilePGAdminMajorVersion(t *testing.T) { +func TestReconcilePGAdminVersion(t *testing.T) { ctx := context.Background() pod := corev1.Pod{} pod.Namespace = "test-namespace" @@ -180,30 +219,15 @@ func TestReconcilePGAdminMajorVersion(t *testing.T) { assert.Equal(t, namespace, "test-namespace") assert.Equal(t, container, naming.ContainerPGAdmin) - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // Simulate a v9.3 version of pgAdmin by setting stdout to "9.3" + // for podexec call in reconcilePGAdminVersion + _, _ = stdout.Write([]byte("9.3")) return nil } - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) + version, err := reconciler.reconcilePGAdminVersion(ctx, podExecutor) assert.NilError(t, err) - assert.Equal(t, version, 7) - }) - - t.Run("FailedRetrieval", func(t *testing.T) { - reconciler.PodExec = func( - ctx context.Context, namespace, pod, container string, - stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { - // Simulate the python call giving bad data (not a version int) - _, _ = stdout.Write([]byte("asdfjkl;")) - return nil - } - - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) - assert.Check(t, err != nil) - assert.Equal(t, version, 0) + assert.Equal(t, version, "9.3") }) t.Run("PodExecError", func(t *testing.T) { @@ -214,9 +238,9 @@ func TestReconcilePGAdminMajorVersion(t *testing.T) { return errors.New("PodExecError") } - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) + version, err := reconciler.reconcilePGAdminVersion(ctx, podExecutor) assert.Check(t, err != nil) - assert.Equal(t, version, 0) + assert.Equal(t, version, "") }) } @@ -244,6 +268,14 @@ func TestWritePGAdminUsers(t *testing.T) { }`) assert.NilError(t, cc.Create(ctx, pgadmin)) + // fake the status so that the correct commands will be used when creating + // users. + pgadmin.Status = v1beta1.PGAdminStatus{ + ImageSHA: "fakesha", + MajorVersion: 9, + MinorVersion: "9.3", + } + userPasswordSecret1 := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "user-password-secret1", diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go index 534d792c4f..4467cce28c 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go @@ -227,6 +227,10 @@ type PGAdminStatus struct { // +optional MajorVersion int `json:"majorVersion,omitempty"` + // MinorVersion represents the minor version of the running pgAdmin. + // +optional + MinorVersion string `json:"minorVersion,omitempty"` + // observedGeneration represents the .metadata.generation on which the status was based. // +optional // +kubebuilder:validation:Minimum=0 diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml index 244533b7ee..0290339143 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml @@ -6,12 +6,14 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml index 01aff25b3b..00c3d819fd 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml index 1dca13a7b7..f6eb83b2d9 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml index 5c0e7267e6..3e3d8396b3 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) From 03ff6fc33a0273e95785688318a96f03c8e2f01c Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Fri, 26 Sep 2025 11:37:59 -0400 Subject: [PATCH 57/79] Handle pgAdmin UserWarning Capture the an expected user warning for pgAdmin9.8 using python3.11 and log as an INFO message rather than an ERROR which short-circuits user creation and updating. --- internal/controller/standalone_pgadmin/users.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/controller/standalone_pgadmin/users.go b/internal/controller/standalone_pgadmin/users.go index d809aa44ef..26394c9060 100644 --- a/internal/controller/standalone_pgadmin/users.go +++ b/internal/controller/standalone_pgadmin/users.go @@ -257,6 +257,8 @@ cd $PGADMIN_DIR log.Error(err, "PodExec failed: ") intentUsers = append(intentUsers, existingUser) continue + } else if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + log.Info(stderr.String()) } else if strings.TrimSpace(stderr.String()) != "" { log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", intentUser.Username)) @@ -291,7 +293,9 @@ cd $PGADMIN_DIR log.Error(err, "PodExec failed: ") continue } - if strings.TrimSpace(stderr.String()) != "" { + if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + log.Info(stderr.String()) + } else if strings.TrimSpace(stderr.String()) != "" { log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", intentUser.Username)) continue From c7c07f49274d4e0ab6778b588bdcf4ef665c2232 Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Sat, 27 Sep 2025 19:30:08 -0400 Subject: [PATCH 58/79] Remove reconcilePGAdminVersion, adjust tests and add clarifying comments --- .../controller/standalone_pgadmin/users.go | 46 ++++++++------- .../standalone_pgadmin/users_test.go | 56 ++++++++----------- 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/internal/controller/standalone_pgadmin/users.go b/internal/controller/standalone_pgadmin/users.go index 26394c9060..bfea0f444c 100644 --- a/internal/controller/standalone_pgadmin/users.go +++ b/internal/controller/standalone_pgadmin/users.go @@ -85,11 +85,21 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * if pgadmin.Status.MajorVersion == 0 || pgadmin.Status.MinorVersion == "" || pgadmin.Status.ImageSHA != pgAdminImageSha { - pgadminMinorVersion, err := r.reconcilePGAdminVersion(ctx, podExecutor) - if err != nil { + // exec into the pgAdmin pod and retrieve the pgAdmin minor version + script := fmt.Sprintf(` +PGADMIN_DIR=%s +cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_VERSION)" +`, pgAdminDir) + + var stdin, stdout, stderr bytes.Buffer + + if err := podExecutor(ctx, &stdin, &stdout, &stderr, + []string{"bash", "-ceu", "--", script}...); err != nil { return err } + pgadminMinorVersion := strings.TrimSpace(stdout.String()) + // ensure minor version is valid before storing in status parsedMinorVersion, err := strconv.ParseFloat(pgadminMinorVersion, 64) if err != nil { @@ -123,25 +133,6 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * return r.writePGAdminUsers(ctx, pgadmin, podExecutor) } -// reconcilePGAdminVersion execs into the pgAdmin pod and retrieves the pgAdmin minor version -func (r *PGAdminReconciler) reconcilePGAdminVersion(ctx context.Context, exec Executor) (string, error) { - script := fmt.Sprintf(` -PGADMIN_DIR=%s -cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_VERSION)" -`, pgAdminDir) - - var stdin, stdout, stderr bytes.Buffer - - err := exec(ctx, &stdin, &stdout, &stderr, - []string{"bash", "-ceu", "--", script}...) - - if err != nil { - return "", err - } - - return strings.TrimSpace(stdout.String()), nil -} - // writePGAdminUsers takes the users in the pgAdmin spec and writes (adds or updates) their data // to both pgAdmin and the users.json file that is stored in the pgAdmin secret. If a user is // removed from the spec, its data is removed from users.json, but it is not deleted from pgAdmin. @@ -187,7 +178,7 @@ cd $PGADMIN_DIR } var olderThan9_3 bool - versionFloat, err := strconv.ParseFloat(pgadmin.Status.MinorVersion, 32) + versionFloat, err := strconv.ParseFloat(pgadmin.Status.MinorVersion, 64) if err != nil { return err } @@ -198,6 +189,8 @@ cd $PGADMIN_DIR intentUsers := []pgAdminUserForJson{} for _, user := range pgadmin.Spec.Users { var stdin, stdout, stderr bytes.Buffer + // starting in pgAdmin 9.3, custom roles are supported and a new flag is used + // - https://github.com/pgadmin-org/pgadmin4/pull/8631 typeFlag := "--role User" if olderThan9_3 { typeFlag = "--nonadmin" @@ -257,10 +250,13 @@ cd $PGADMIN_DIR log.Error(err, "PodExec failed: ") intentUsers = append(intentUsers, existingUser) continue + } else if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + // Started seeing this error with pgAdmin 9.7 when using Python 3.11. + // Issue appears to resolve with Python 3.13. log.Info(stderr.String()) } else if strings.TrimSpace(stderr.String()) != "" { - log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", + log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py update-user error for %s: ", intentUser.Username)) intentUsers = append(intentUsers, existingUser) continue @@ -294,9 +290,11 @@ cd $PGADMIN_DIR continue } if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + // Started seeing this error with pgAdmin 9.7 when using Python 3.11. + // Issue appears to resolve with Python 3.13. log.Info(stderr.String()) } else if strings.TrimSpace(stderr.String()) != "" { - log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", + log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py add-user error for %s: ", intentUser.Username)) continue } diff --git a/internal/controller/standalone_pgadmin/users_test.go b/internal/controller/standalone_pgadmin/users_test.go index d08f5b74b0..bcb97a538b 100644 --- a/internal/controller/standalone_pgadmin/users_test.go +++ b/internal/controller/standalone_pgadmin/users_test.go @@ -195,52 +195,40 @@ func TestReconcilePGAdminUsers(t *testing.T) { assert.Equal(t, pgadmin.Status.MinorVersion, "") assert.Equal(t, pgadmin.Status.ImageSHA, "") }) -} -func TestReconcilePGAdminVersion(t *testing.T) { - ctx := context.Background() - pod := corev1.Pod{} - pod.Namespace = "test-namespace" - pod.Name = "pgadmin-123-0" - reconciler := &PGAdminReconciler{} + t.Run("PodExecError", func(t *testing.T) { + pgadmin := pgadmin.DeepCopy() + pod := pod.DeepCopy() - podExecutor := func( - ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { - return reconciler.PodExec(ctx, pod.Namespace, pod.Name, "pgadmin", stdin, stdout, stderr, command...) - } + pod.DeletionTimestamp = nil + pod.Status.ContainerStatuses = + []corev1.ContainerStatus{{Name: naming.ContainerPGAdmin}} + pod.Status.ContainerStatuses[0].State.Running = + new(corev1.ContainerStateRunning) + pod.Status.ContainerStatuses[0].ImageID = "fakeSHA" - t.Run("SuccessfulRetrieval", func(t *testing.T) { - reconciler.PodExec = func( + r := new(PGAdminReconciler) + r.Client = fake.NewClientBuilder().WithObjects(pod).Build() + + calls := 0 + r.PodExec = func( ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error { + calls++ + assert.Equal(t, pod, "pgadmin-123-0") - assert.Equal(t, namespace, "test-namespace") + assert.Equal(t, namespace, pgadmin.Namespace) assert.Equal(t, container, naming.ContainerPGAdmin) - // Simulate a v9.3 version of pgAdmin by setting stdout to "9.3" - // for podexec call in reconcilePGAdminVersion - _, _ = stdout.Write([]byte("9.3")) - return nil - } - - version, err := reconciler.reconcilePGAdminVersion(ctx, podExecutor) - assert.NilError(t, err) - assert.Equal(t, version, "9.3") - }) - - t.Run("PodExecError", func(t *testing.T) { - reconciler.PodExec = func( - ctx context.Context, namespace, pod, container string, - stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { return errors.New("PodExecError") } - version, err := reconciler.reconcilePGAdminVersion(ctx, podExecutor) - assert.Check(t, err != nil) - assert.Equal(t, version, "") + assert.Error(t, r.reconcilePGAdminUsers(ctx, pgadmin), "PodExecError") + assert.Equal(t, calls, 1, "PodExec should be called once") + assert.Equal(t, pgadmin.Status.MajorVersion, 0) + assert.Equal(t, pgadmin.Status.MinorVersion, "") + assert.Equal(t, pgadmin.Status.ImageSHA, "") }) } From cc8f544ce7928951cd713b65ff1d7ead4e5318f7 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Tue, 7 Oct 2025 11:26:10 -0700 Subject: [PATCH 59/79] Update postgrescluster and pgupgrade APIs to allow pg18 --- .../bases/postgres-operator.crunchydata.com_pgupgrades.yaml | 4 ++-- .../postgres-operator.crunchydata.com_postgresclusters.yaml | 2 +- .../v1beta1/pgupgrade_types.go | 4 ++-- .../v1beta1/postgrescluster_types.go | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml index 53d72671bc..5b3baae35b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml @@ -964,7 +964,7 @@ spec: fromPostgresVersion: description: The major version of PostgreSQL before the upgrade. format: int32 - maximum: 17 + maximum: 18 minimum: 11 type: integer image: @@ -1094,7 +1094,7 @@ spec: toPostgresVersion: description: The major version of PostgreSQL to be upgraded to. format: int32 - maximum: 17 + maximum: 18 minimum: 11 type: integer tolerations: diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 483bce91ad..907e569306 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -12823,7 +12823,7 @@ spec: postgresVersion: description: The major version of PostgreSQL installed in the PostgreSQL image - maximum: 17 + maximum: 18 minimum: 11 type: integer proxy: diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go index e0bfe86d5d..6e0267f0bc 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go @@ -84,7 +84,7 @@ type PGUpgradeSettings struct { // The major version of PostgreSQL before the upgrade. // --- // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +required FromPostgresVersion int32 `json:"fromPostgresVersion"` @@ -98,7 +98,7 @@ type PGUpgradeSettings struct { // The major version of PostgreSQL to be upgraded to. // --- // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +required ToPostgresVersion int32 `json:"toPostgresVersion"` diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 71be93a55e..68cc117f68 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -131,7 +131,7 @@ type PostgresClusterSpec struct { // The major version of PostgreSQL installed in the PostgreSQL image // +kubebuilder:validation:Required // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +operator-sdk:csv:customresourcedefinitions:type=spec,order=1 PostgresVersion int `json:"postgresVersion"` From 6c9dc6a8f0bd429a89003f68faa3983756b245bb Mon Sep 17 00:00:00 2001 From: jmckulk Date: Wed, 22 Oct 2025 17:03:04 -0400 Subject: [PATCH 60/79] Update pgbackrest-initialization script to be more portable /bin/bash doesn't work on systems like nix /usr/bin/env bash does other scripts throughout the repo use the more portable option --- testing/kuttl/scripts/pgbackrest-initialization.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/kuttl/scripts/pgbackrest-initialization.sh b/testing/kuttl/scripts/pgbackrest-initialization.sh index ba6cd4a7e5..9d60a4cd9d 100755 --- a/testing/kuttl/scripts/pgbackrest-initialization.sh +++ b/testing/kuttl/scripts/pgbackrest-initialization.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash EXPECTED_STATUS=$1 EXPECTED_NUM_BACKUPS=$2 From f1d0add874ef06553c95ca55ab67a6188708472d Mon Sep 17 00:00:00 2001 From: jmckulk Date: Wed, 22 Oct 2025 17:04:42 -0400 Subject: [PATCH 61/79] Disable AppendCustomQueries flag in deploy-dev Our kuttl tests expect the default behavior of not appending custom queries --- Makefile | 9 ++++----- testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index f4977bcf99..c14ca5f8a1 100644 --- a/Makefile +++ b/Makefile @@ -119,17 +119,16 @@ undeploy: ## Undeploy the PostgreSQL Operator .PHONY: deploy-dev deploy-dev: ## Deploy the PostgreSQL Operator locally -deploy-dev: PGO_FEATURE_GATES ?= "AllAlpha=true" deploy-dev: get-pgmonitor deploy-dev: build-postgres-operator deploy-dev: createnamespaces kubectl apply --server-side -k ./config/dev hack/create-kubeconfig.sh postgres-operator pgo env \ - QUERIES_CONFIG_DIR="${QUERIES_CONFIG_DIR}" \ - CRUNCHY_DEBUG=true \ - PGO_FEATURE_GATES="${PGO_FEATURE_GATES}" \ - CHECK_FOR_UPGRADES='$(if $(CHECK_FOR_UPGRADES),$(CHECK_FOR_UPGRADES),false)' \ + QUERIES_CONFIG_DIR='$(QUERIES_CONFIG_DIR)' \ + CRUNCHY_DEBUG="$${CRUNCHY_DEBUG:-true}" \ + PGO_FEATURE_GATES="$${PGO_FEATURE_GATES:-AllAlpha=true,AppendCustomQueries=false}" \ + CHECK_FOR_UPGRADES="$${CHECK_FOR_UPGRADES:-false}" \ KUBECONFIG=hack/.kube/postgres-operator/pgo \ PGO_NAMESPACE='postgres-operator' \ PGO_INSTALLER='deploy-dev' \ diff --git a/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml b/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml index bbf5c051fd..405969c18c 100644 --- a/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml +++ b/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml @@ -31,7 +31,7 @@ commands: contains "${queries_files}" "queries.yml" && !(contains "${queries_files}" "defaultQueries.yml") } || { - echo >&2 'The /conf directory should contain the queries.yml file. Instead it has:' + echo >&2 'The /conf directory should only contain the queries.yml file. Instead it has:' echo "${queries_files}" exit 1 } From fc311498d8484f78aaa3944f00ea7b7b25e80089 Mon Sep 17 00:00:00 2001 From: jmckulk Date: Thu, 23 Oct 2025 18:44:57 -0400 Subject: [PATCH 62/79] Remove CHECK_FOR_UPGRADES and registration --- Makefile | 1 - cmd/postgres-operator/main.go | 41 +- ...ator.crunchydata.com_postgresclusters.yaml | 7 - go.mod | 3 +- go.sum | 2 - .../pgupgrade/pgupgrade_controller.go | 8 +- internal/controller/pgupgrade/registration.go | 27 - .../controller/pgupgrade/registration_test.go | 95 --- .../controller/postgrescluster/controller.go | 10 +- .../postgrescluster/controller_test.go | 74 --- internal/naming/names.go | 8 - internal/registration/interface.go | 67 -- internal/registration/runner.go | 187 ------ internal/registration/runner_test.go | 574 ------------------ internal/registration/testing.go | 21 - internal/upgradecheck/header.go | 195 ------ internal/upgradecheck/header_test.go | 560 ----------------- internal/upgradecheck/helpers_test.go | 125 ---- internal/upgradecheck/http.go | 196 ------ internal/upgradecheck/http_test.go | 230 ------- .../v1beta1/postgrescluster_types.go | 11 - .../v1beta1/zz_generated.deepcopy.go | 20 - 22 files changed, 11 insertions(+), 2451 deletions(-) delete mode 100644 internal/controller/pgupgrade/registration.go delete mode 100644 internal/controller/pgupgrade/registration_test.go delete mode 100644 internal/registration/interface.go delete mode 100644 internal/registration/runner.go delete mode 100644 internal/registration/runner_test.go delete mode 100644 internal/registration/testing.go delete mode 100644 internal/upgradecheck/header.go delete mode 100644 internal/upgradecheck/header_test.go delete mode 100644 internal/upgradecheck/helpers_test.go delete mode 100644 internal/upgradecheck/http.go delete mode 100644 internal/upgradecheck/http_test.go diff --git a/Makefile b/Makefile index c14ca5f8a1..26302d5f9b 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,6 @@ deploy-dev: createnamespaces QUERIES_CONFIG_DIR='$(QUERIES_CONFIG_DIR)' \ CRUNCHY_DEBUG="$${CRUNCHY_DEBUG:-true}" \ PGO_FEATURE_GATES="$${PGO_FEATURE_GATES:-AllAlpha=true,AppendCustomQueries=false}" \ - CHECK_FOR_UPGRADES="$${CHECK_FOR_UPGRADES:-false}" \ KUBECONFIG=hack/.kube/postgres-operator/pgo \ PGO_NAMESPACE='postgres-operator' \ PGO_INSTALLER='deploy-dev' \ diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 8545e9e241..48ef5de308 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -34,9 +34,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" - "github.com/crunchydata/postgres-operator/internal/upgradecheck" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -223,13 +221,8 @@ func main() { assertNoError(err) assertNoError(mgr.Add(k8s)) - registrar, err := registration.NewRunner(os.Getenv("RSA_KEY"), os.Getenv("TOKEN_PATH"), stopRunning) - assertNoError(err) - assertNoError(mgr.Add(registrar)) - token, _ := registrar.CheckToken() - // add all PostgreSQL Operator controllers to the runtime manager - addControllersToManager(mgr, log, registrar) + addControllersToManager(mgr, log) if features.Enabled(feature.BridgeIdentifiers) { constructor := func() *bridge.Client { @@ -241,22 +234,6 @@ func main() { assertNoError(bridge.ManagedInstallationReconciler(mgr, constructor)) } - // Enable upgrade checking - upgradeCheckingDisabled := strings.EqualFold(os.Getenv("CHECK_FOR_UPGRADES"), "false") - if !upgradeCheckingDisabled { - log.Info("upgrade checking enabled") - // get the URL for the check for upgrades endpoint if set in the env - assertNoError( - upgradecheck.ManagedScheduler( - mgr, - os.Getenv("CHECK_FOR_UPGRADES_URL"), - versionString, - token, - )) - } else { - log.Info("upgrade checking disabled") - } - // Enable health probes assertNoError(mgr.AddHealthzCheck("health", healthz.Ping)) assertNoError(mgr.AddReadyzCheck("check", healthz.Ping)) @@ -288,12 +265,11 @@ func main() { // addControllersToManager adds all PostgreSQL Operator controllers to the provided controller // runtime manager. -func addControllersToManager(mgr runtime.Manager, log logging.Logger, reg registration.Registration) { +func addControllersToManager(mgr runtime.Manager, log logging.Logger) { pgReconciler := &postgrescluster.Reconciler{ - Client: mgr.GetClient(), - Owner: postgrescluster.ControllerName, - Recorder: mgr.GetEventRecorderFor(postgrescluster.ControllerName), - Registration: reg, + Client: mgr.GetClient(), + Owner: postgrescluster.ControllerName, + Recorder: mgr.GetEventRecorderFor(postgrescluster.ControllerName), } if err := pgReconciler.SetupWithManager(mgr); err != nil { @@ -302,10 +278,9 @@ func addControllersToManager(mgr runtime.Manager, log logging.Logger, reg regist } upgradeReconciler := &pgupgrade.PGUpgradeReconciler{ - Client: mgr.GetClient(), - Owner: "pgupgrade-controller", - Recorder: mgr.GetEventRecorderFor("pgupgrade-controller"), - Registration: reg, + Client: mgr.GetClient(), + Owner: "pgupgrade-controller", + Recorder: mgr.GetEventRecorderFor("pgupgrade-controller"), } if err := upgradeReconciler.SetupWithManager(mgr); err != nil { diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 907e569306..bd3b210500 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -18601,11 +18601,6 @@ spec: type: integer type: object type: object - registrationRequired: - properties: - pgoVersion: - type: string - type: object startupInstance: description: |- The instance that should be started first when bootstrapping and/or starting a @@ -18614,8 +18609,6 @@ spec: startupInstanceSet: description: The instance set associated with the startupInstance type: string - tokenRequired: - type: string userInterface: description: Current state of the PostgreSQL user interface. properties: diff --git a/go.mod b/go.mod index 6ea5a8d11e..1216780747 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,7 @@ go 1.23.0 require ( github.com/go-logr/logr v1.4.2 - github.com/golang-jwt/jwt/v5 v5.2.2 github.com/google/go-cmp v0.6.0 - github.com/google/uuid v1.6.0 github.com/kubernetes-csi/external-snapshotter/client/v8 v8.0.0 github.com/onsi/ginkgo/v2 v2.22.0 github.com/onsi/gomega v1.36.1 @@ -59,6 +57,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/imdario/mergo v0.3.16 // indirect diff --git a/go.sum b/go.sum index 89177ce6c8..4886398d01 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,6 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= -github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= diff --git a/internal/controller/pgupgrade/pgupgrade_controller.go b/internal/controller/pgupgrade/pgupgrade_controller.go index 06a36574f0..5fbd8262fc 100644 --- a/internal/controller/pgupgrade/pgupgrade_controller.go +++ b/internal/controller/pgupgrade/pgupgrade_controller.go @@ -21,7 +21,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -35,8 +34,7 @@ type PGUpgradeReconciler struct { Client client.Client Owner client.FieldOwner - Recorder record.EventRecorder - Registration registration.Registration + Recorder record.EventRecorder } //+kubebuilder:rbac:groups="batch",resources="jobs",verbs={list,watch} @@ -145,10 +143,6 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return } - if !r.UpgradeAuthorized(upgrade) { - return ctrl.Result{}, nil - } - // Set progressing condition to true if it doesn't exist already setStatusToProgressingIfReasonWas("", upgrade) diff --git a/internal/controller/pgupgrade/registration.go b/internal/controller/pgupgrade/registration.go deleted file mode 100644 index 4fbf7a7ce1..0000000000 --- a/internal/controller/pgupgrade/registration.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package pgupgrade - -import ( - "k8s.io/apimachinery/pkg/api/meta" - - "github.com/crunchydata/postgres-operator/internal/registration" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func (r *PGUpgradeReconciler) UpgradeAuthorized(upgrade *v1beta1.PGUpgrade) bool { - // Allow an upgrade in progress to complete, when the registration requirement is introduced. - // But don't allow new upgrades to be started until a valid token is applied. - progressing := meta.FindStatusCondition(upgrade.Status.Conditions, ConditionPGUpgradeProgressing) != nil - required := r.Registration.Required(r.Recorder, upgrade, &upgrade.Status.Conditions) - - // If a valid token has not been applied, warn the user. - if required && !progressing { - registration.SetRequiredWarning(r.Recorder, upgrade, &upgrade.Status.Conditions) - return false - } - - return true -} diff --git a/internal/controller/pgupgrade/registration_test.go b/internal/controller/pgupgrade/registration_test.go deleted file mode 100644 index 22903d8cdb..0000000000 --- a/internal/controller/pgupgrade/registration_test.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package pgupgrade - -import ( - "testing" - - "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/internal/controller/runtime" - "github.com/crunchydata/postgres-operator/internal/registration" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" - "github.com/crunchydata/postgres-operator/internal/testing/events" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func TestUpgradeAuthorized(t *testing.T) { - t.Run("UpgradeAlreadyInProgress", func(t *testing.T) { - reconciler := new(PGUpgradeReconciler) - upgrade := new(v1beta1.PGUpgrade) - - for _, required := range []bool{false, true} { - reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return required - }) - - meta.SetStatusCondition(&upgrade.Status.Conditions, metav1.Condition{ - Type: ConditionPGUpgradeProgressing, - Status: metav1.ConditionTrue, - }) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, result, "expected signal to proceed") - - progressing := meta.FindStatusCondition(upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - assert.Equal(t, progressing.Status, metav1.ConditionTrue) - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - upgrade := new(v1beta1.PGUpgrade) - upgrade.Name = "some-upgrade" - - reconciler := PGUpgradeReconciler{ - Recorder: recorder, - Registration: registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return true - }), - } - - meta.RemoveStatusCondition(&upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, !result, "expected signal to not proceed") - - condition := meta.FindStatusCondition(upgrade.Status.Conditions, v1beta1.Registered) - if assert.Check(t, condition != nil) { - assert.Equal(t, condition.Status, metav1.ConditionFalse) - } - - if assert.Check(t, len(recorder.Events) > 0) { - assert.Equal(t, recorder.Events[0].Type, "Warning") - assert.Equal(t, recorder.Events[0].Regarding.Kind, "PGUpgrade") - assert.Equal(t, recorder.Events[0].Regarding.Name, "some-upgrade") - assert.Assert(t, cmp.Contains(recorder.Events[0].Note, "requires")) - } - }) - - t.Run("RegistrationCompleted", func(t *testing.T) { - reconciler := new(PGUpgradeReconciler) - upgrade := new(v1beta1.PGUpgrade) - - called := false - reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - called = true - return false - }) - - meta.RemoveStatusCondition(&upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, result, "expected signal to proceed") - assert.Assert(t, called, "expected registration package to clear conditions") - }) -} diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index bbe141c0b4..b8ede195f5 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -35,7 +35,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/postgres" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -53,8 +52,7 @@ type Reconciler struct { ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error - Recorder record.EventRecorder - Registration registration.Registration + Recorder record.EventRecorder } // +kubebuilder:rbac:groups="",resources="events",verbs={create,patch} @@ -187,12 +185,6 @@ func (r *Reconciler) Reconcile( return nil } - if r.Registration != nil && r.Registration.Required(r.Recorder, cluster, &cluster.Status.Conditions) { - registration.SetAdvanceWarning(r.Recorder, cluster, &cluster.Status.Conditions) - } - cluster.Status.RegistrationRequired = nil - cluster.Status.TokenRequired = "" - // if the cluster is paused, set a condition and return if cluster.Spec.Paused != nil && *cluster.Spec.Paused { meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ diff --git a/internal/controller/postgrescluster/controller_test.go b/internal/controller/postgrescluster/controller_test.go index 9e36d0c2d0..243baef94c 100644 --- a/internal/controller/postgrescluster/controller_test.go +++ b/internal/controller/postgrescluster/controller_test.go @@ -19,7 +19,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/version" @@ -29,7 +28,6 @@ import ( "sigs.k8s.io/yaml" "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -88,34 +86,6 @@ func TestDeleteControlled(t *testing.T) { }) } -var olmClusterYAML = ` -metadata: - name: olm -spec: - postgresVersion: 13 - image: postgres - instances: - - name: register-now - dataVolumeClaimSpec: - accessModes: - - "ReadWriteMany" - resources: - requests: - storage: 1Gi - backups: - pgbackrest: - image: pgbackrest - repos: - - name: repo1 - volume: - volumeClaimSpec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: 1Gi -` - var _ = Describe("PostgresCluster Reconciler", func() { var test struct { Namespace *corev1.Namespace @@ -136,7 +106,6 @@ var _ = Describe("PostgresCluster Reconciler", func() { test.Reconciler.Client = suite.Client test.Reconciler.Owner = "asdf" test.Reconciler.Recorder = test.Recorder - test.Reconciler.Registration = nil }) AfterEach(func() { @@ -176,49 +145,6 @@ var _ = Describe("PostgresCluster Reconciler", func() { return result } - Context("Cluster with Registration Requirement, no token", func() { - var cluster *v1beta1.PostgresCluster - - BeforeEach(func() { - test.Reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return true - }) - - cluster = create(olmClusterYAML) - Expect(reconcile(cluster)).To(BeZero()) - }) - - AfterEach(func() { - ctx := context.Background() - - if cluster != nil { - Expect(client.IgnoreNotFound( - suite.Client.Delete(ctx, cluster), - )).To(Succeed()) - - // Remove finalizers, if any, so the namespace can terminate. - Expect(client.IgnoreNotFound( - suite.Client.Patch(ctx, cluster, client.RawPatch( - client.Merge.Type(), []byte(`{"metadata":{"finalizers":[]}}`))), - )).To(Succeed()) - } - }) - - Specify("Cluster RegistrationRequired Status", func() { - existing := &v1beta1.PostgresCluster{} - Expect(suite.Client.Get( - context.Background(), client.ObjectKeyFromObject(cluster), existing, - )).To(Succeed()) - - Expect(meta.IsStatusConditionFalse(existing.Status.Conditions, v1beta1.Registered)).To(BeTrue()) - - event, ok := <-test.Recorder.Events - Expect(ok).To(BeTrue()) - Expect(event).To(ContainSubstring("Register Soon")) - }) - }) - Context("Cluster", func() { var cluster *v1beta1.PostgresCluster diff --git a/internal/naming/names.go b/internal/naming/names.go index 04923730fb..345967d1a7 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -604,11 +604,3 @@ func StandalonePGAdmin(pgadmin *v1beta1.PGAdmin) metav1.ObjectMeta { Name: fmt.Sprintf("pgadmin-%s", pgadmin.UID), } } - -// UpgradeCheckConfigMap returns the ObjectMeta for the PGO ConfigMap -func UpgradeCheckConfigMap() metav1.ObjectMeta { - return metav1.ObjectMeta{ - Namespace: config.PGONamespace(), - Name: "pgo-upgrade-check", - } -} diff --git a/internal/registration/interface.go b/internal/registration/interface.go deleted file mode 100644 index c0d4e390ad..0000000000 --- a/internal/registration/interface.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "fmt" - "os" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -type Registration interface { - // Required returns true when registration is required but the token is missing or invalid. - Required(record.EventRecorder, client.Object, *[]metav1.Condition) bool -} - -var URL = os.Getenv("REGISTRATION_URL") - -func SetAdvanceWarning(recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition) { - recorder.Eventf(object, corev1.EventTypeWarning, "Register Soon", - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL) - - meta.SetStatusCondition(conditions, metav1.Condition{ - Type: v1beta1.Registered, - Status: metav1.ConditionFalse, - Reason: "TokenRequired", - Message: fmt.Sprintf( - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL), - ObservedGeneration: object.GetGeneration(), - }) -} - -func SetRequiredWarning(recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition) { - recorder.Eventf(object, corev1.EventTypeWarning, "Registration Required", - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL) - - meta.SetStatusCondition(conditions, metav1.Condition{ - Type: v1beta1.Registered, - Status: metav1.ConditionFalse, - Reason: "TokenRequired", - Message: fmt.Sprintf( - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Upgrade suspended. See %s for details.", URL), - ObservedGeneration: object.GetGeneration(), - }) -} - -func emitFailedWarning(recorder record.EventRecorder, object client.Object) { - recorder.Eventf(object, corev1.EventTypeWarning, "Token Authentication Failed", - "See %s for details.", URL) -} - -func emitVerifiedEvent(recorder record.EventRecorder, object client.Object) { - recorder.Event(object, corev1.EventTypeNormal, "Token Verified", - "Thank you for registering your installation of Crunchy Postgres for Kubernetes.") -} diff --git a/internal/registration/runner.go b/internal/registration/runner.go deleted file mode 100644 index b50ceeb4ed..0000000000 --- a/internal/registration/runner.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "context" - "crypto/rsa" - "errors" - "os" - "strings" - "sync" - "time" - - "github.com/golang-jwt/jwt/v5" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -// Runner implements [Registration] by loading and validating the token at a -// fixed path. Its methods are safe to call concurrently. -type Runner struct { - changed func() - enabled bool - publicKey *rsa.PublicKey - refresh time.Duration - tokenPath string - - token struct { - sync.RWMutex - Exists bool `json:"-"` - - jwt.RegisteredClaims - Iteration int `json:"itr"` - } -} - -// Runner implements [Registration] and [manager.Runnable]. -var _ Registration = (*Runner)(nil) -var _ manager.Runnable = (*Runner)(nil) - -// NewRunner creates a [Runner] that periodically checks the validity of the -// token at tokenPath. It calls changed when the validity of the token changes. -func NewRunner(publicKey, tokenPath string, changed func()) (*Runner, error) { - runner := &Runner{ - changed: changed, - refresh: time.Minute, - tokenPath: tokenPath, - } - - var err error - switch { - case publicKey != "" && tokenPath != "": - if !strings.HasPrefix(strings.TrimSpace(publicKey), "-") { - publicKey = "-----BEGIN -----\n" + publicKey + "\n-----END -----" - } - - runner.enabled = true - runner.publicKey, err = jwt.ParseRSAPublicKeyFromPEM([]byte(publicKey)) - - case publicKey == "" && tokenPath != "": - err = errors.New("registration: missing public key") - - case publicKey != "" && tokenPath == "": - err = errors.New("registration: missing token path") - } - - return runner, err -} - -// CheckToken loads and verifies the configured token, returning an error when -// the file exists but cannot be verified, and -// returning the token if it can be verified. -// NOTE(upgradecheck): return the token/nil so that we can use the token -// in upgradecheck; currently a refresh of the token will cause a restart of the pod -// meaning that the token used in upgradecheck is always the current token. -// But if the restart behavior changes, we might drop the token return in main.go -// and change upgradecheck to retrieve the token itself -func (r *Runner) CheckToken() (*jwt.Token, error) { - data, errFile := os.ReadFile(r.tokenPath) - key := func(*jwt.Token) (any, error) { return r.publicKey, nil } - - // Assume [jwt] and [os] functions could do something unexpected; use defer - // to safely write to the token. - r.token.Lock() - defer r.token.Unlock() - - token, errToken := jwt.ParseWithClaims(string(data), &r.token, key, - jwt.WithExpirationRequired(), - jwt.WithValidMethods([]string{"RS256"}), - ) - - // The error from [os.ReadFile] indicates whether a token file exists. - r.token.Exists = !os.IsNotExist(errFile) - - // Reset most claims if there is any problem loading, parsing, validating, or - // verifying the token file. - if errFile != nil || errToken != nil { - r.token.RegisteredClaims = jwt.RegisteredClaims{} - } - - switch { - case !r.enabled || !r.token.Exists: - return nil, nil - case errFile != nil: - return nil, errFile - default: - return token, errToken - } -} - -func (r *Runner) state() (failed, required bool) { - // Assume [time] functions could do something unexpected; use defer to safely - // read the token. - r.token.RLock() - defer r.token.RUnlock() - - failed = r.token.Exists && r.token.ExpiresAt == nil - required = r.enabled && - (!r.token.Exists || failed || r.token.ExpiresAt.Before(time.Now())) - return -} - -// Required returns true when registration is required but the token is missing or invalid. -func (r *Runner) Required( - recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition, -) bool { - failed, required := r.state() - - if r.enabled && failed { - emitFailedWarning(recorder, object) - } - - if !required && conditions != nil { - before := len(*conditions) - meta.RemoveStatusCondition(conditions, v1beta1.Registered) - meta.RemoveStatusCondition(conditions, "RegistrationRequired") - meta.RemoveStatusCondition(conditions, "TokenRequired") - found := len(*conditions) != before - - if r.enabled && found { - emitVerifiedEvent(recorder, object) - } - } - - return required -} - -// NeedLeaderElection returns true so that r runs only on the single -// [manager.Manager] that is elected leader in the Kubernetes namespace. -func (r *Runner) NeedLeaderElection() bool { return true } - -// Start watches for a mounted registration token when enabled. It blocks -// until ctx is cancelled. -func (r *Runner) Start(ctx context.Context) error { - var ticks <-chan time.Time - - if r.enabled { - ticker := time.NewTicker(r.refresh) - defer ticker.Stop() - ticks = ticker.C - } - - log := logging.FromContext(ctx).WithValues("controller", "registration") - - for { - select { - case <-ticks: - _, before := r.state() - if _, err := r.CheckToken(); err != nil { - log.Error(err, "Unable to validate token") - } - if _, after := r.state(); before != after && r.changed != nil { - r.changed() - } - case <-ctx.Done(): - return ctx.Err() - } - } -} diff --git a/internal/registration/runner_test.go b/internal/registration/runner_test.go deleted file mode 100644 index c70c07c6b9..0000000000 --- a/internal/registration/runner_test.go +++ /dev/null @@ -1,574 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "encoding/pem" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/golang-jwt/jwt/v5" - "gotest.tools/v3/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/testing/events" -) - -func TestNewRunner(t *testing.T) { - t.Parallel() - - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - der, err := x509.MarshalPKIXPublicKey(&key.PublicKey) - assert.NilError(t, err) - - public := pem.EncodeToMemory(&pem.Block{Bytes: der}) - assert.Assert(t, len(public) != 0) - - t.Run("Disabled", func(t *testing.T) { - runner, err := NewRunner("", "", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, !runner.enabled) - }) - - t.Run("ConfiguredCorrectly", func(t *testing.T) { - runner, err := NewRunner(string(public), "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - - t.Run("ExtraLines", func(t *testing.T) { - input := "\n\n" + strings.ReplaceAll(string(public), "\n", "\n\n") + "\n\n" - - runner, err := NewRunner(input, "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - }) - - t.Run("WithoutPEMBoundaries", func(t *testing.T) { - lines := strings.Split(strings.TrimSpace(string(public)), "\n") - lines = lines[1 : len(lines)-1] - - for _, input := range []string{ - strings.Join(lines, ""), // single line - strings.Join(lines, "\n"), // multi-line - "\n\n" + strings.Join(lines, "\n\n") + "\n\n", // extra lines - } { - runner, err := NewRunner(input, "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - } - }) - }) - - t.Run("ConfiguredIncorrectly", func(t *testing.T) { - for _, tt := range []struct { - key, path, msg string - }{ - {msg: "public key", key: "", path: "any"}, - {msg: "token path", key: "bad", path: ""}, - {msg: "invalid key", key: "bad", path: "any"}, - {msg: "token path", key: string(public), path: ""}, - } { - _, err := NewRunner(tt.key, tt.path, nil) - assert.ErrorContains(t, err, tt.msg, "(key=%q, path=%q)", tt.key, tt.path) - } - }) -} - -func TestRunnerCheckToken(t *testing.T) { - t.Parallel() - - dir := t.TempDir() - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - t.Run("SafeToCallDisabled", func(t *testing.T) { - r := Runner{enabled: false} - _, err := r.CheckToken() - assert.NilError(t, err) - }) - - t.Run("FileMissing", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "nope")} - _, err := r.CheckToken() - assert.NilError(t, err) - }) - - t.Run("FileUnreadable", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "nope")} - assert.NilError(t, os.WriteFile(r.tokenPath, nil, 0o200)) // Writeable - - _, err := r.CheckToken() - assert.ErrorContains(t, err, "permission") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("FileEmpty", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "empty")} - assert.NilError(t, os.WriteFile(r.tokenPath, nil, 0o400)) // Readable - - _, err := r.CheckToken() - assert.ErrorContains(t, err, "malformed") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("WrongAlgorithm", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "hs256"), - } - - // Maliciously treating an RSA public key as an HMAC secret. - // - https://auth0.com/blog/critical-vulnerabilities-in-json-web-token-libraries/ - public, err := x509.MarshalPKIXPublicKey(r.publicKey) - assert.NilError(t, err) - data, err := jwt.New(jwt.SigningMethodHS256).SignedString(public) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.Assert(t, err != nil, "HMAC algorithm should be rejected") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("MissingExpiration", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "no-claims"), - } - - data, err := jwt.New(jwt.SigningMethodRS256).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.ErrorContains(t, err, "exp claim is required") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("ExpiredToken", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "expired"), - } - - data, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": jwt.NewNumericDate(time.Date(2020, 1, 1, 1, 1, 1, 1, time.UTC)), - }).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.ErrorContains(t, err, "is expired") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("ValidToken", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "valid"), - } - - expiration := jwt.NewNumericDate(time.Now().Add(time.Hour)) - data, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": expiration, - }).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - token, err := r.CheckToken() - assert.NilError(t, err) - assert.Assert(t, r.token.ExpiresAt != nil) - assert.Assert(t, token.Valid) - exp, err := token.Claims.GetExpirationTime() - assert.NilError(t, err) - assert.Equal(t, exp.Time, expiration.Time) - }) -} - -func TestRunnerLeaderElectionRunnable(t *testing.T) { - var runner manager.LeaderElectionRunnable = &Runner{} - - assert.Assert(t, runner.NeedLeaderElection()) -} - -func TestRunnerRequiredConditions(t *testing.T) { - t.Parallel() - - t.Run("RegistrationDisabled", func(t *testing.T) { - r := Runner{enabled: false} - - for _, tt := range []struct { - before, after []metav1.Condition - }{ - { - before: []metav1.Condition{}, - after: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - after: []metav1.Condition{}, - }, - } { - for _, exists := range []bool{false, true} { - for _, expires := range []time.Time{ - time.Now().Add(time.Hour), - time.Now().Add(-time.Hour), - } { - r.token.Exists = exists - r.token.ExpiresAt = jwt.NewNumericDate(expires) - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.DeepEqual(t, conditions, tt.after) - } - } - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - r := Runner{enabled: true} - - for _, tt := range []struct { - exists bool - expires time.Time - before []metav1.Condition - }{ - { - exists: false, expires: time.Now().Add(time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - exists: false, expires: time.Now().Add(-time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - exists: true, expires: time.Now().Add(-time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - r.token.Exists = tt.exists - r.token.ExpiresAt = jwt.NewNumericDate(tt.expires) - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.DeepEqual(t, conditions, tt.before) - } - }) - - t.Run("Registered", func(t *testing.T) { - r := Runner{} - r.token.Exists = true - r.token.ExpiresAt = jwt.NewNumericDate(time.Now().Add(time.Hour)) - - for _, tt := range []struct { - before, after []metav1.Condition - }{ - { - before: []metav1.Condition{}, - after: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - after: []metav1.Condition{}, - }, - } { - for _, enabled := range []bool{false, true} { - r.enabled = enabled - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.DeepEqual(t, conditions, tt.after) - } - } - }) -} - -func TestRunnerRequiredEvents(t *testing.T) { - t.Parallel() - - t.Run("RegistrationDisabled", func(t *testing.T) { - r := Runner{enabled: false} - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - for _, exists := range []bool{false, true} { - for _, expires := range []time.Time{ - time.Now().Add(time.Hour), - time.Now().Add(-time.Hour), - } { - r.token.Exists = exists - r.token.ExpiresAt = jwt.NewNumericDate(expires) - - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - } - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - r := Runner{enabled: true} - - t.Run("MissingToken", func(t *testing.T) { - r.token.Exists = false - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - }) - - t.Run("InvalidToken", func(t *testing.T) { - r.token.Exists = true - r.token.ExpiresAt = nil - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.Equal(t, len(recorder.Events), 1, "expected one event") - assert.Equal(t, recorder.Events[0].Type, "Warning") - assert.Equal(t, recorder.Events[0].Reason, "Token Authentication Failed") - } - }) - }) - - t.Run("Registered", func(t *testing.T) { - r := Runner{} - r.token.Exists = true - r.token.ExpiresAt = jwt.NewNumericDate(time.Now().Add(time.Hour)) - - t.Run("AlwaysRegistered", func(t *testing.T) { - // No prior registration conditions - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - } { - for _, enabled := range []bool{false, true} { - r.enabled = enabled - - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - } - }) - - t.Run("PreviouslyUnregistered", func(t *testing.T) { - r.enabled = true - - // One or more prior registration conditions - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 1, "expected one event") - assert.Equal(t, recorder.Events[0].Type, "Normal") - assert.Equal(t, recorder.Events[0].Reason, "Token Verified") - } - }) - }) -} - -func TestRunnerStart(t *testing.T) { - t.Parallel() - - dir := t.TempDir() - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - token, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": jwt.NewNumericDate(time.Now().Add(time.Hour)), - }).SignedString(key) - assert.NilError(t, err) - - t.Run("DisabledDoesNothing", func(t *testing.T) { - runner := &Runner{ - enabled: false, - refresh: time.Nanosecond, - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) - defer cancel() - - assert.ErrorIs(t, runner.Start(ctx), context.DeadlineExceeded, - "expected it to block until context is canceled") - }) - - t.Run("WithCallback", func(t *testing.T) { - called := false - runner := &Runner{ - changed: func() { called = true }, - enabled: true, - publicKey: &key.PublicKey, - refresh: time.Second, - tokenPath: filepath.Join(dir, "token"), - } - - // Begin with an invalid token. - assert.NilError(t, os.WriteFile(runner.tokenPath, nil, 0o600)) - _, err = runner.CheckToken() - assert.Assert(t, err != nil) - - // Replace it with a valid token. - assert.NilError(t, os.WriteFile(runner.tokenPath, []byte(token), 0o600)) - - // Run with a timeout that exceeds the refresh interval. - ctx, cancel := context.WithTimeout(context.Background(), runner.refresh*3/2) - defer cancel() - - assert.ErrorIs(t, runner.Start(ctx), context.DeadlineExceeded) - assert.Assert(t, called, "expected a call back") - }) -} diff --git a/internal/registration/testing.go b/internal/registration/testing.go deleted file mode 100644 index 7ea0032b31..0000000000 --- a/internal/registration/testing.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// NOTE: This type can go away following https://go.dev/issue/47487. - -type RegistrationFunc func(record.EventRecorder, client.Object, *[]metav1.Condition) bool - -func (fn RegistrationFunc) Required(rec record.EventRecorder, obj client.Object, conds *[]metav1.Condition) bool { - return fn(rec, obj, conds) -} - -var _ Registration = RegistrationFunc(nil) diff --git a/internal/upgradecheck/header.go b/internal/upgradecheck/header.go deleted file mode 100644 index f2449f909b..0000000000 --- a/internal/upgradecheck/header.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "net/http" - "os" - - googleuuid "github.com/google/uuid" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/util/uuid" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/internal/controller/postgrescluster" - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/kubernetes" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -const ( - clientHeader = "X-Crunchy-Client-Metadata" -) - -var ( - // Using apimachinery's UUID package, so our deployment UUID will be a string - deploymentID string -) - -// Extensible struct for client upgrade data -type clientUpgradeData struct { - BridgeClustersTotal int `json:"bridge_clusters_total"` - BuildSource string `json:"build_source"` - DeploymentID string `json:"deployment_id"` - FeatureGatesEnabled string `json:"feature_gates_enabled"` - IsOpenShift bool `json:"is_open_shift"` - KubernetesEnv string `json:"kubernetes_env"` - PGOClustersTotal int `json:"pgo_clusters_total"` - PGOInstaller string `json:"pgo_installer"` - PGOInstallerOrigin string `json:"pgo_installer_origin"` - PGOVersion string `json:"pgo_version"` - RegistrationToken string `json:"registration_token"` -} - -// generateHeader aggregates data and returns a struct of that data -// If any errors are encountered, it logs those errors and uses the default values -func generateHeader(ctx context.Context, crClient crclient.Client, - pgoVersion string, registrationToken string) *clientUpgradeData { - - return &clientUpgradeData{ - BridgeClustersTotal: getBridgeClusters(ctx, crClient), - BuildSource: os.Getenv("BUILD_SOURCE"), - DeploymentID: ensureDeploymentID(ctx, crClient), - FeatureGatesEnabled: feature.ShowEnabled(ctx), - IsOpenShift: kubernetes.IsOpenShift(ctx), - KubernetesEnv: kubernetes.VersionString(ctx), - PGOClustersTotal: getManagedClusters(ctx, crClient), - PGOInstaller: os.Getenv("PGO_INSTALLER"), - PGOInstallerOrigin: os.Getenv("PGO_INSTALLER_ORIGIN"), - PGOVersion: pgoVersion, - RegistrationToken: registrationToken, - } -} - -// ensureDeploymentID checks if the UUID exists in memory or in a ConfigMap -// If no UUID exists, ensureDeploymentID creates one and saves it in memory/as a ConfigMap -// Any errors encountered will be logged and the ID result will be what is in memory -func ensureDeploymentID(ctx context.Context, crClient crclient.Client) string { - // If there is no deploymentID in memory, generate one for possible use - if deploymentID == "" { - deploymentID = string(uuid.NewUUID()) - } - - cm := manageUpgradeCheckConfigMap(ctx, crClient, deploymentID) - - if cm != nil && cm.Data["deployment_id"] != "" { - deploymentID = cm.Data["deployment_id"] - } - - return deploymentID -} - -// manageUpgradeCheckConfigMap ensures a ConfigMap exists with a UUID -// If it doesn't exist, this creates it with the in-memory ID -// If it exists and it has a valid UUID, use that to replace the in-memory ID -// If it exists but the field is blank or mangled, we update the ConfigMap with the in-memory ID -func manageUpgradeCheckConfigMap(ctx context.Context, crClient crclient.Client, - currentID string) *corev1.ConfigMap { - - log := logging.FromContext(ctx) - upgradeCheckConfigMapMetadata := naming.UpgradeCheckConfigMap() - - cm := &corev1.ConfigMap{ - ObjectMeta: upgradeCheckConfigMapMetadata, - Data: map[string]string{"deployment_id": currentID}, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - - // If no namespace is set, then log this and skip trying to set the UUID in the ConfigMap - if upgradeCheckConfigMapMetadata.GetNamespace() == "" { - log.V(1).Info("upgrade check issue: namespace not set") - return cm - } - - retrievedCM := &corev1.ConfigMap{} - err := crClient.Get(ctx, naming.AsObjectKey(upgradeCheckConfigMapMetadata), retrievedCM) - - // If we get any error besides IsNotFound, log it, skip any ConfigMap steps, - // and use the in-memory deploymentID - if err != nil && !apierrors.IsNotFound(err) { - log.V(1).Info("upgrade check issue: error retrieving configmap", - "response", err.Error()) - return cm - } - - // If we get a ConfigMap with a "deployment_id", check if that UUID is valid - if retrievedCM.Data["deployment_id"] != "" { - _, parseErr := googleuuid.Parse(retrievedCM.Data["deployment_id"]) - // No error -- the ConfigMap has a valid deploymentID, so use that - if parseErr == nil { - cm.Data["deployment_id"] = retrievedCM.Data["deployment_id"] - } - } - - err = applyConfigMap(ctx, crClient, cm, postgrescluster.ControllerName) - if err != nil { - log.V(1).Info("upgrade check issue: could not apply configmap", - "response", err.Error()) - } - return cm -} - -// applyConfigMap is a focused version of the Reconciler.apply method, -// meant only to work with this ConfigMap -// It sends an apply patch to the Kubernetes API, with the fieldManager set to the deployment_id -// and the force parameter set to true. -// - https://docs.k8s.io/reference/using-api/server-side-apply/#managers -// - https://docs.k8s.io/reference/using-api/server-side-apply/#conflicts -func applyConfigMap(ctx context.Context, crClient crclient.Client, - object crclient.Object, owner string) error { - // Generate an apply-patch by comparing the object to its zero value. - zero := &corev1.ConfigMap{} - data, err := crclient.MergeFrom(zero).Data(object) - - if err == nil { - apply := crclient.RawPatch(crclient.Apply.Type(), data) - err = crClient.Patch(ctx, object, apply, - []crclient.PatchOption{crclient.ForceOwnership, crclient.FieldOwner(owner)}...) - } - return err -} - -// getManagedClusters returns a count of postgres clusters managed by this PGO instance -// Any errors encountered will be logged and the count result will be 0 -func getManagedClusters(ctx context.Context, crClient crclient.Client) int { - var count int - clusters := &v1beta1.PostgresClusterList{} - err := crClient.List(ctx, clusters) - if err != nil { - log := logging.FromContext(ctx) - log.V(1).Info("upgrade check issue: could not count postgres clusters", - "response", err.Error()) - } else { - count = len(clusters.Items) - } - return count -} - -// getBridgeClusters returns a count of Bridge clusters managed by this PGO instance -// Any errors encountered will be logged and the count result will be 0 -func getBridgeClusters(ctx context.Context, crClient crclient.Client) int { - var count int - clusters := &v1beta1.CrunchyBridgeClusterList{} - err := crClient.List(ctx, clusters) - if err != nil { - log := logging.FromContext(ctx) - log.V(1).Info("upgrade check issue: could not count bridge clusters", - "response", err.Error()) - } else { - count = len(clusters.Items) - } - return count -} - -func addHeader(req *http.Request, upgradeInfo *clientUpgradeData) *http.Request { - marshaled, _ := json.Marshal(upgradeInfo) - req.Header.Add(clientHeader, string(marshaled)) - return req -} diff --git a/internal/upgradecheck/header_test.go b/internal/upgradecheck/header_test.go deleted file mode 100644 index ac162f5cce..0000000000 --- a/internal/upgradecheck/header_test.go +++ /dev/null @@ -1,560 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "net/http" - "strings" - "testing" - - "gotest.tools/v3/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/uuid" - - // Google Kubernetes Engine / Google Cloud Platform authentication provider - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" - - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/kubernetes" - "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" - "github.com/crunchydata/postgres-operator/internal/testing/require" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func TestGenerateHeader(t *testing.T) { - setupDeploymentID(t) - ctx := context.Background() - cfg, cc := require.Kubernetes2(t) - - discovery, err := kubernetes.NewDiscoveryRunner(cfg) - assert.NilError(t, err) - assert.NilError(t, discovery.Read(ctx)) - ctx = kubernetes.NewAPIContext(ctx, discovery) - - t.Setenv("PGO_INSTALLER", "test") - t.Setenv("PGO_INSTALLER_ORIGIN", "test-origin") - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - t.Setenv("BUILD_SOURCE", "developer") - - t.Run("error ensuring ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - ctx, calls := setupLogCapture(ctx) - - res := generateHeader(ctx, fakeClientWithOptionalError, "1.2.3", "") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - pgoList := v1beta1.PostgresClusterList{} - err := cc.List(ctx, &pgoList) - assert.NilError(t, err) - assert.Equal(t, len(pgoList.Items), res.PGOClustersTotal) - bridgeList := v1beta1.CrunchyBridgeClusterList{} - err = cc.List(ctx, &bridgeList) - assert.NilError(t, err) - assert.Equal(t, len(bridgeList.Items), res.BridgeClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) - - t.Run("error getting cluster count", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "list error", - } - ctx, calls := setupLogCapture(ctx) - - res := generateHeader(ctx, fakeClientWithOptionalError, "1.2.3", "") - assert.Equal(t, len(*calls), 2) - // Aggregating the logs since we cannot determine which call will be first - callsAggregate := strings.Join(*calls, " ") - assert.Assert(t, cmp.Contains(callsAggregate, `upgrade check issue: could not count postgres clusters`)) - assert.Assert(t, cmp.Contains(callsAggregate, `upgrade check issue: could not count bridge clusters`)) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - assert.Equal(t, 0, res.PGOClustersTotal) - assert.Equal(t, 0, res.BridgeClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) - - t.Run("success", func(t *testing.T) { - ctx, calls := setupLogCapture(ctx) - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.TablespaceVolumes: true, - })) - ctx = feature.NewContext(ctx, gate) - - res := generateHeader(ctx, cc, "1.2.3", "") - assert.Equal(t, len(*calls), 0) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - pgoList := v1beta1.PostgresClusterList{} - err := cc.List(ctx, &pgoList) - assert.NilError(t, err) - assert.Equal(t, len(pgoList.Items), res.PGOClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Check(t, strings.Contains( - res.FeatureGatesEnabled, - "TablespaceVolumes=true", - )) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) -} - -func TestEnsureID(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("success, no id set in mem or configmap", func(t *testing.T) { - deploymentID = "" - oldID := deploymentID - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID != oldID) - assert.Assert(t, newID == deploymentID) - - cm := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cm) - assert.NilError(t, err) - assert.Equal(t, newID, cm.Data["deployment_id"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("success, id set in mem, configmap created", func(t *testing.T) { - oldID := setupDeploymentID(t) - - cm := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cm) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cm) - assert.NilError(t, err) - assert.Assert(t, deploymentID == cm.Data["deployment_id"]) - - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("success, id set in configmap, mem overwritten", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID != oldID) - assert.Assert(t, newID == deploymentID) - assert.Assert(t, deploymentID == cmRetrieved.Data["deployment_id"]) - - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed, no namespace given", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - t.Setenv("PGO_NAMESPACE", "") - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: namespace not set`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - assert.Assert(t, deploymentID != cmRetrieved.Data["deployment_id"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed with not NotFound error, using preexisting ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "get error", - } - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, fakeClientWithOptionalError) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: error retrieving configmap`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - }) - - t.Run("configmap failed to create, using preexisting ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - oldID := setupDeploymentID(t) - - ctx, calls := setupLogCapture(ctx) - newID := ensureDeploymentID(ctx, fakeClientWithOptionalError) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - }) -} - -func TestManageUpgradeCheckConfigMap(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("no namespace given", func(t *testing.T) { - ctx, calls := setupLogCapture(ctx) - t.Setenv("PGO_NAMESPACE", "") - - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: namespace not set`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) - - t.Run("configmap not found, created", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, returnedCM) - assert.NilError(t, err) - }) - - t.Run("configmap failed with not NotFound error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "get error", - } - ctx, calls := setupLogCapture(ctx) - - returnedCM := manageUpgradeCheckConfigMap(ctx, fakeClientWithOptionalError, - "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: error retrieving configmap`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) - - t.Run("no deployment id in configmap", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "wrong_field": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("mangled deployment id", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deploymentid": string(uuid.NewUUID())[1:], - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("good configmap with good id", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment-id"] != "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed to create", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, fakeClientWithOptionalError, - "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) -} - -func TestApplyConfigMap(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("successful create", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("successful update", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "old_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err := cc.Create(ctx, cm) - assert.NilError(t, err) - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - cm2 := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm2.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm2, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("successful nothing changed", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err := cc.Create(ctx, cm) - assert.NilError(t, err) - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - cm2 := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm2.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm2, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("failure", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - - err = applyConfigMap(ctx, fakeClientWithOptionalError, cm, "test") - assert.Error(t, err, "patch error") - }) -} - -func TestGetManagedClusters(t *testing.T) { - ctx := context.Background() - - t.Run("success", func(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - ctx, calls := setupLogCapture(ctx) - count := getManagedClusters(ctx, fakeClient) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, count == 2) - }) - - t.Run("list throw error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - setupFakeClientWithPGOScheme(t, true), "list error", - } - ctx, calls := setupLogCapture(ctx) - count := getManagedClusters(ctx, fakeClientWithOptionalError) - assert.Assert(t, len(*calls) > 0) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not count postgres clusters`)) - assert.Assert(t, count == 0) - }) -} - -func TestGetBridgeClusters(t *testing.T) { - ctx := context.Background() - - t.Run("success", func(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - ctx, calls := setupLogCapture(ctx) - count := getBridgeClusters(ctx, fakeClient) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, count == 2) - }) - - t.Run("list throw error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - setupFakeClientWithPGOScheme(t, true), "list error", - } - ctx, calls := setupLogCapture(ctx) - count := getBridgeClusters(ctx, fakeClientWithOptionalError) - assert.Assert(t, len(*calls) > 0) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not count bridge clusters`)) - assert.Assert(t, count == 0) - }) -} - -func TestAddHeader(t *testing.T) { - t.Run("successful", func(t *testing.T) { - req := &http.Request{ - Header: http.Header{}, - } - versionString := "1.2.3" - upgradeInfo := &clientUpgradeData{ - PGOVersion: versionString, - } - - result := addHeader(req, upgradeInfo) - header := result.Header[clientHeader] - - passedThroughData := &clientUpgradeData{} - err := json.Unmarshal([]byte(header[0]), passedThroughData) - assert.NilError(t, err) - - assert.Equal(t, passedThroughData.PGOVersion, "1.2.3") - // Failure to list clusters results in 0 returned - assert.Equal(t, passedThroughData.PGOClustersTotal, 0) - }) -} diff --git a/internal/upgradecheck/helpers_test.go b/internal/upgradecheck/helpers_test.go deleted file mode 100644 index 3d1c678ec5..0000000000 --- a/internal/upgradecheck/helpers_test.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "fmt" - "testing" - - "github.com/go-logr/logr/funcr" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/uuid" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - "github.com/crunchydata/postgres-operator/internal/controller/runtime" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -// fakeClientWithError is a controller runtime client and an error type to force -type fakeClientWithError struct { - crclient.Client - errorType string -} - -// Get returns the client.get OR an Error (`get error`) if the fakeClientWithError is set to error that way -func (f *fakeClientWithError) Get(ctx context.Context, key types.NamespacedName, obj crclient.Object, opts ...crclient.GetOption) error { - switch f.errorType { - case "get error": - return fmt.Errorf("get error") - default: - return f.Client.Get(ctx, key, obj, opts...) - } -} - -// Patch returns the client.get OR an Error (`patch error`) if the fakeClientWithError is set to error that way -// TODO: PatchType is not supported currently by fake -// - https://github.com/kubernetes/client-go/issues/970 -// Once that gets fixed, we can test without envtest -func (f *fakeClientWithError) Patch(ctx context.Context, obj crclient.Object, - patch crclient.Patch, opts ...crclient.PatchOption) error { - switch { - case f.errorType == "patch error": - return fmt.Errorf("patch error") - default: - return f.Client.Patch(ctx, obj, patch, opts...) - } -} - -// List returns the client.get OR an Error (`list error`) if the fakeClientWithError is set to error that way -func (f *fakeClientWithError) List(ctx context.Context, objList crclient.ObjectList, - opts ...crclient.ListOption) error { - switch f.errorType { - case "list error": - return fmt.Errorf("list error") - default: - return f.Client.List(ctx, objList, opts...) - } -} - -// setupDeploymentID returns a UUID -func setupDeploymentID(t *testing.T) string { - t.Helper() - deploymentID = string(uuid.NewUUID()) - return deploymentID -} - -// setupFakeClientWithPGOScheme returns a fake client with the PGO scheme added; -// if `includeCluster` is true, also adds some empty PostgresCluster and CrunchyBridgeCluster -// items to the client -func setupFakeClientWithPGOScheme(t *testing.T, includeCluster bool) crclient.Client { - t.Helper() - if includeCluster { - pc := &v1beta1.PostgresClusterList{ - Items: []v1beta1.PostgresCluster{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "hippo", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant", - }, - }, - }, - } - - bcl := &v1beta1.CrunchyBridgeClusterList{ - Items: []v1beta1.CrunchyBridgeCluster{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "hippo", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant", - }, - }, - }, - } - - return fake.NewClientBuilder(). - WithScheme(runtime.Scheme). - WithLists(pc, bcl). - Build() - } - return fake.NewClientBuilder().WithScheme(runtime.Scheme).Build() -} - -// setupLogCapture captures the logs and keeps count of the logs captured -func setupLogCapture(ctx context.Context) (context.Context, *[]string) { - calls := []string{} - testlog := funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - }) - return logging.NewContext(ctx, testlog), &calls -} diff --git a/internal/upgradecheck/http.go b/internal/upgradecheck/http.go deleted file mode 100644 index c2796ffe54..0000000000 --- a/internal/upgradecheck/http.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "fmt" - "io" - "net/http" - "time" - - "github.com/golang-jwt/jwt/v5" - "k8s.io/apimachinery/pkg/util/wait" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/logging" -) - -var ( - client HTTPClient - - // With these Backoff settings, wait.ExponentialBackoff will - // * use one second as the base time; - // * increase delays between calls by a power of 2 (1, 2, 4, etc.); - // * and retry four times. - // Note that there is no indeterminacy here since there is no Jitter set). - // With these parameters, the calls will occur at 0, 1, 3, and 7 seconds - // (i.e., at 1, 2, and 4 second delays for the retries). - backoff = wait.Backoff{ - Duration: 1 * time.Second, - Factor: float64(2), - Steps: 4, - } -) - -const ( - // upgradeCheckURL can be set using the CHECK_FOR_UPGRADES_URL env var - upgradeCheckURL = "https://operator-maestro.crunchydata.com/pgo-versions" -) - -type HTTPClient interface { - Do(req *http.Request) (*http.Response, error) -} - -// Creating an interface for cache with WaitForCacheSync to allow easier mocking -type CacheWithWait interface { - WaitForCacheSync(ctx context.Context) bool -} - -func init() { - // Since we create this client once during startup, - // we want each connection to be fresh, hence the non-default transport - // with DisableKeepAlives set to true - // See https://github.com/golang/go/issues/43905 and https://github.com/golang/go/issues/23427 - // for discussion of problems with long-lived connections - client = &http.Client{ - Timeout: 5 * time.Second, - Transport: &http.Transport{ - DisableKeepAlives: true, - }, - } -} - -func checkForUpgrades(ctx context.Context, url, versionString string, backoff wait.Backoff, - crclient crclient.Client, registrationToken string, -) (message string, header string, err error) { - var headerPayloadStruct *clientUpgradeData - - // Prep request - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err == nil { - // generateHeader always returns some sort of struct, using defaults/nil values - // in case some of the checks return errors - headerPayloadStruct = generateHeader(ctx, crclient, - versionString, registrationToken) - req = addHeader(req, headerPayloadStruct) - } - - // wait.ExponentialBackoff will retry the func according to the backoff object until - // (a) func returns done as true or - // (b) the backoff settings are exhausted, - // i.e., the process hits the cap for time or the number of steps - // The anonymous function here sets certain preexisting variables (bodyBytes, err, status) - // which are then used by the surrounding `checkForUpgrades` function as part of the return - var bodyBytes []byte - var status int - - if err == nil { - _ = wait.ExponentialBackoff( - backoff, - func() (done bool, backoffErr error) { - var res *http.Response - res, err = client.Do(req) - - if err == nil { - defer res.Body.Close() - status = res.StatusCode - - // This is a very basic check, ignoring nuances around - // certain StatusCodes that should either prevent or impact retries - if status == http.StatusOK { - bodyBytes, err = io.ReadAll(res.Body) - return true, nil - } - } - - // Return false, nil to continue checking - return false, nil - }) - } - - // We received responses, but none of them were 200 OK. - if err == nil && status != http.StatusOK { - err = fmt.Errorf("received StatusCode %d", status) - } - - // TODO: Parse response and log info for user on potential upgrades - return string(bodyBytes), req.Header.Get(clientHeader), err -} - -type CheckForUpgradesScheduler struct { - Client crclient.Client - - Refresh time.Duration - RegistrationToken string - URL, Version string -} - -// ManagedScheduler creates a [CheckForUpgradesScheduler] and adds it to m. -// NOTE(registration): This takes a token/nil parameter when the operator is started. -// Currently the operator restarts when the token is updated, -// so this token is always current; but if that restart behavior is changed, -// we will want the upgrade mechanism to instantiate its own registration runner -// or otherwise get the most recent token. -func ManagedScheduler(m manager.Manager, - url, version string, registrationToken *jwt.Token) error { - if url == "" { - url = upgradeCheckURL - } - - var token string - if registrationToken != nil { - token = registrationToken.Raw - } - - return m.Add(&CheckForUpgradesScheduler{ - Client: m.GetClient(), - Refresh: 24 * time.Hour, - RegistrationToken: token, - URL: url, - Version: version, - }) -} - -// NeedLeaderElection returns true so that s runs only on the single -// [manager.Manager] that is elected leader in the Kubernetes cluster. -func (s *CheckForUpgradesScheduler) NeedLeaderElection() bool { return true } - -// Start checks for upgrades periodically. It blocks until ctx is cancelled. -func (s *CheckForUpgradesScheduler) Start(ctx context.Context) error { - s.check(ctx) - - ticker := time.NewTicker(s.Refresh) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - s.check(ctx) - case <-ctx.Done(): - return ctx.Err() - } - } -} - -func (s *CheckForUpgradesScheduler) check(ctx context.Context) { - log := logging.FromContext(ctx) - - defer func() { - if v := recover(); v != nil { - log.V(1).Info("encountered panic in upgrade check", "response", v) - } - }() - - info, header, err := checkForUpgrades(ctx, - s.URL, s.Version, backoff, s.Client, s.RegistrationToken) - - if err != nil { - log.V(1).Info("could not complete upgrade check", "response", err.Error()) - } else { - log.Info(info, clientHeader, header) - } -} diff --git a/internal/upgradecheck/http_test.go b/internal/upgradecheck/http_test.go deleted file mode 100644 index 6393c305c8..0000000000 --- a/internal/upgradecheck/http_test.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net/http" - "strings" - "testing" - "time" - - "github.com/go-logr/logr/funcr" - "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/util/wait" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" -) - -func init() { - client = &MockClient{Timeout: 1} - // set backoff to two steps, 1 second apart for testing - backoff = wait.Backoff{ - Duration: 1 * time.Second, - Factor: float64(1), - Steps: 2, - } -} - -type MockClient struct { - Timeout time.Duration -} - -var funcFoo func() (*http.Response, error) - -// Do is the mock request that will return a mock success -func (m *MockClient) Do(req *http.Request) (*http.Response, error) { - return funcFoo() -} - -func TestCheckForUpgrades(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - - ctx := logging.NewContext(context.Background(), logging.Discard()) - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.TablespaceVolumes: true, - })) - ctx = feature.NewContext(ctx, gate) - - // Pass *testing.T to allows the correct messages from the assert package - // in the event of certain failures. - checkData := func(t *testing.T, header string) { - data := clientUpgradeData{} - err := json.Unmarshal([]byte(header), &data) - assert.NilError(t, err) - assert.Assert(t, data.DeploymentID != "") - assert.Equal(t, data.PGOVersion, "4.7.3") - assert.Equal(t, data.RegistrationToken, "speakFriend") - assert.Equal(t, data.BridgeClustersTotal, 2) - assert.Equal(t, data.PGOClustersTotal, 2) - assert.Equal(t, data.FeatureGatesEnabled, - "AutoCreateUserSchema=true,InstanceSidecars=true,PGUpgradeCPUConcurrency=true,TablespaceVolumes=true") - } - - t.Run("success", func(t *testing.T) { - // A successful call - funcFoo = func() (*http.Response, error) { - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.NilError(t, err) - assert.Equal(t, res, `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}`) - checkData(t, header) - }) - - t.Run("total failure, err sending", func(t *testing.T) { - var counter int - // A call returning errors - funcFoo = func() (*http.Response, error) { - counter++ - return &http.Response{}, errors.New("whoops") - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - // Two failed calls because of env var - assert.Equal(t, counter, 2) - assert.Equal(t, res, "") - assert.Equal(t, err.Error(), `whoops`) - checkData(t, header) - }) - - t.Run("total failure, bad StatusCode", func(t *testing.T) { - var counter int - // A call returning bad StatusCode - funcFoo = func() (*http.Response, error) { - counter++ - return &http.Response{ - Body: io.NopCloser(strings.NewReader("")), - StatusCode: http.StatusBadRequest, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.Equal(t, res, "") - // Two failed calls because of env var - assert.Equal(t, counter, 2) - assert.Equal(t, err.Error(), `received StatusCode 400`) - checkData(t, header) - }) - - t.Run("one failure, then success", func(t *testing.T) { - var counter int - // A call returning bad StatusCode the first time - // and a successful response the second time - funcFoo = func() (*http.Response, error) { - if counter < 1 { - counter++ - return &http.Response{ - Body: io.NopCloser(strings.NewReader("")), - StatusCode: http.StatusBadRequest, - }, nil - } - counter++ - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.Equal(t, counter, 2) - assert.NilError(t, err) - assert.Equal(t, res, `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}`) - checkData(t, header) - }) -} - -// TODO(benjaminjb): Replace `fake` with envtest -func TestCheckForUpgradesScheduler(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, false) - - t.Run("panic from checkForUpgrades doesn't bubble up", func(t *testing.T) { - ctx := context.Background() - - // capture logs - var calls []string - ctx = logging.NewContext(ctx, funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - })) - - // A panicking call - funcFoo = func() (*http.Response, error) { - panic(fmt.Errorf("oh no!")) - } - - s := CheckForUpgradesScheduler{ - Client: fakeClient, - } - s.check(ctx) - - assert.Equal(t, len(calls), 2) - assert.Assert(t, cmp.Contains(calls[1], `encountered panic in upgrade check`)) - }) - - t.Run("successful log each loop, ticker works", func(t *testing.T) { - ctx := context.Background() - - // capture logs - var calls []string - ctx = logging.NewContext(ctx, funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - })) - - // A successful call - funcFoo = func() (*http.Response, error) { - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - // Set loop time to 1s and sleep for 2s before sending the done signal - ctx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - s := CheckForUpgradesScheduler{ - Client: fakeClient, - Refresh: 1 * time.Second, - } - assert.ErrorIs(t, context.DeadlineExceeded, s.Start(ctx)) - - // Sleeping leads to some non-deterministic results, but we expect at least 2 executions - // plus one log for the failure to apply the configmap - assert.Assert(t, len(calls) >= 4) - - assert.Assert(t, cmp.Contains(calls[1], `{\"pgo_versions\":[{\"tag\":\"v5.0.4\"},{\"tag\":\"v5.0.3\"},{\"tag\":\"v5.0.2\"},{\"tag\":\"v5.0.1\"},{\"tag\":\"v5.0.0\"}]}`)) - assert.Assert(t, cmp.Contains(calls[3], `{\"pgo_versions\":[{\"tag\":\"v5.0.4\"},{\"tag\":\"v5.0.3\"},{\"tag\":\"v5.0.2\"},{\"tag\":\"v5.0.1\"},{\"tag\":\"v5.0.0\"}]}`)) - }) -} - -func TestCheckForUpgradesSchedulerLeaderOnly(t *testing.T) { - // CheckForUpgradesScheduler should implement this interface. - var s manager.LeaderElectionRunnable = new(CheckForUpgradesScheduler) - - assert.Assert(t, s.NeedLeaderElection(), - "expected to only run on the leader") -} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 68cc117f68..c6ceaaa63e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -367,12 +367,6 @@ type PostgresClusterStatus struct { // +optional PGBackRest *PGBackRestStatus `json:"pgbackrest,omitempty"` - // +optional - RegistrationRequired *RegistrationRequirementStatus `json:"registrationRequired,omitempty"` - - // +optional - TokenRequired string `json:"tokenRequired,omitempty"` - // Stores the current PostgreSQL major version following a successful // major PostgreSQL upgrade. // +optional @@ -427,7 +421,6 @@ const ( PersistentVolumeResizeError = "PersistentVolumeResizeError" PostgresClusterProgressing = "Progressing" ProxyAvailable = "ProxyAvailable" - Registered = "Registered" ) type PostgresInstanceSetSpec struct { @@ -610,10 +603,6 @@ func (s *PostgresProxySpec) Default() { } } -type RegistrationRequirementStatus struct { - PGOVersion string `json:"pgoVersion,omitempty"` -} - type PostgresProxyStatus struct { PGBouncer PGBouncerPodStatus `json:"pgBouncer,omitempty"` } diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 747e363854..ec31e27b3b 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -2176,11 +2176,6 @@ func (in *PostgresClusterStatus) DeepCopyInto(out *PostgresClusterStatus) { *out = new(PGBackRestStatus) (*in).DeepCopyInto(*out) } - if in.RegistrationRequired != nil { - in, out := &in.RegistrationRequired, &out.RegistrationRequired - *out = new(RegistrationRequirementStatus) - **out = **in - } out.Proxy = in.Proxy if in.UserInterface != nil { in, out := &in.UserInterface, &out.UserInterface @@ -2526,21 +2521,6 @@ func (in *PostgresVolumesSpec) DeepCopy() *PostgresVolumesSpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RegistrationRequirementStatus) DeepCopyInto(out *RegistrationRequirementStatus) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RegistrationRequirementStatus. -func (in *RegistrationRequirementStatus) DeepCopy() *RegistrationRequirementStatus { - if in == nil { - return nil - } - out := new(RegistrationRequirementStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RepoAzure) DeepCopyInto(out *RepoAzure) { *out = *in From bdbaae5f32c063a8694f696aa78c477c77b707f1 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 5 Nov 2025 12:53:59 -0800 Subject: [PATCH 63/79] Allow OTel metrics to work with postgres versions greater than 17. If postgres_exporter is used with postgres versions greater than 17, create a warning event and set the setup.sql blank. --- .../controller/postgrescluster/pgmonitor.go | 56 +++++-- .../postgrescluster/pgmonitor_test.go | 146 +++++++++++++++++- internal/pgmonitor/exporter.go | 6 + internal/pgmonitor/exporter_test.go | 6 + 4 files changed, 199 insertions(+), 15 deletions(-) diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index a08e182158..cac1bd2057 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -43,6 +43,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, monitoringSecret *corev1.Secret) error { var ( + err error writableInstance *Instance writablePod *corev1.Pod setup string @@ -64,23 +65,11 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // that function against an updated and running pod. if pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { - sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), cluster.Spec.PostgresVersion)) + setup, err = r.reconcileExporterSqlSetup(ctx, cluster) if err != nil { return err } - if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { - setup = metricsSetupForOTelCollector - } else { - // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. - // pgMonitor queries expect a path to a script that runs pgBackRest - // info and provides json output. In the queries yaml for pgBackRest - // the default path is `/usr/bin/pgbackrest-info.sh`. We update - // the path to point to the script in our database image. - setup = strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", - "/opt/crunchy/bin/postgres/pgbackrest_info.sh") - } - for _, containerStatus := range writablePod.Status.ContainerStatuses { if containerStatus.Name == naming.ContainerDatabase { pgImageSHA = containerStatus.ImageID @@ -145,6 +134,47 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return err } +// reconcileExporterSqlSetup generates the setup.sql string based on +// whether the OTel metrics feature is enabled or not and the postgres +// version being used. This function assumes that at least one of +// OTel metrics or postgres_exporter are enabled. +func (r *Reconciler) reconcileExporterSqlSetup(ctx context.Context, + cluster *v1beta1.PostgresCluster) (string, error) { + + // If OTel Metrics is enabled we always want to use it. Otherwise, + // we can assume that postgres_exporter is enabled and we should + // use that + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { + return metricsSetupForOTelCollector, nil + } + + // pgMonitor will not be adding support for postgres_exporter for postgres + // versions past 17. If using postgres 18 or later with the postgres_exporter, + // create a warning event and set the sql setup to an empty string + pgVersion := cluster.Spec.PostgresVersion + if pgVersion > 17 { + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "ExporterNotSupportedForPostgresVersion", + "postgres_exporter not supported for pg%d; use OTel for postgres 18 and later", + pgVersion) + return "", nil + } + + // OTel Metrics is not enabled and postgres is version 17 or less, + // go ahead and read the appropriate sql file, format the string, + // and return it + sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), pgVersion)) + if err != nil { + return "", err + } + // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. + // pgMonitor queries expect a path to a script that runs pgBackRest + // info and provides json output. In the queries yaml for pgBackRest + // the default path is `/usr/bin/pgbackrest-info.sh`. We update + // the path to point to the script in our database image. + return strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", + "/opt/crunchy/bin/postgres/pgbackrest_info.sh"), nil +} + // reconcileMonitoringSecret reconciles the secret containing authentication // for monitoring tools func (r *Reconciler) reconcileMonitoringSecret( diff --git a/internal/controller/postgrescluster/pgmonitor_test.go b/internal/controller/postgrescluster/pgmonitor_test.go index e4ccaf0d9f..084ed01755 100644 --- a/internal/controller/postgrescluster/pgmonitor_test.go +++ b/internal/controller/postgrescluster/pgmonitor_test.go @@ -20,10 +20,12 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -551,8 +553,7 @@ func TestReconcilePGMonitorExporter(t *testing.T) { observed := &observedInstances{forCluster: instances} called = false - assert.NilError(t, reconciler.reconcilePGMonitorExporter(ctx, - cluster, observed, nil)) + assert.NilError(t, reconciler.reconcilePGMonitorExporter(ctx, cluster, observed, nil)) assert.Assert(t, called, "PodExec was not called.") assert.Assert(t, cluster.Status.Monitoring.ExporterConfiguration != "", "ExporterConfiguration was empty.") }) @@ -830,6 +831,147 @@ func TestReconcileExporterQueriesConfig(t *testing.T) { actual, err = reconciler.reconcileExporterQueriesConfig(ctx, cluster) assert.NilError(t, err) assert.Assert(t, actual.Data["defaultQueries.yml"] == existing.Data["defaultQueries.yml"], "Data does not align.") + assert.Assert(t, actual.Data["defaultQueries.yml"] != "", "Data should not be empty.") + }) + + t.Run("Pg>17", func(t *testing.T) { + cluster.Spec.PostgresVersion = 18 + actual, err = reconciler.reconcileExporterQueriesConfig(ctx, cluster) + assert.NilError(t, err) + assert.Assert(t, actual.Data["defaultQueries.yml"] == "", "Data should be empty") }) }) } + +// TestReconcileExporterSqlSetup checks that the setup script returned +// by reconcileExporterSqlSetup is either empty or not depending on +// which exporter is enabled and what the postgres version is. +func TestReconcileExporterSqlSetup(t *testing.T) { + ctx := context.Background() + + monitoringSpec := &v1beta1.MonitoringSpec{ + PGMonitor: &v1beta1.PGMonitorSpec{ + Exporter: &v1beta1.ExporterSpec{ + Image: "image", + }, + }, + } + + instrumentationSpec := &v1beta1.InstrumentationSpec{ + Image: "image", + } + + testCases := []struct { + tcName string + postgresVersion int + exporterEnabled bool + otelMetricsEnabled bool + errorPresent bool + setupEmpty bool + expectedNumEvents int + expectedEvent string + }{{ + tcName: "ExporterEnabledOtelDisabled", + postgresVersion: 17, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterDisabledOtelEnabled", + postgresVersion: 17, + exporterEnabled: false, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "BothEnabled", + postgresVersion: 17, + exporterEnabled: true, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterEnabledOtelDisabledPostgres18", + postgresVersion: 18, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: false, + setupEmpty: true, + expectedNumEvents: 1, + expectedEvent: "postgres_exporter not supported for pg18; use OTel for postgres 18 and later", + }, { + tcName: "ExporterDisabledOtelEnabledPostgres18", + postgresVersion: 18, + exporterEnabled: false, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "BothEnabledPostgres18", + postgresVersion: 18, + exporterEnabled: true, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterEnabledOtelDisabledBadPostgresVersion", + postgresVersion: 1, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: true, + setupEmpty: true, + expectedNumEvents: 0, + expectedEvent: "", + }} + + for _, tc := range testCases { + t.Run(tc.tcName, func(t *testing.T) { + cluster := testCluster() + cluster.Spec.PostgresVersion = tc.postgresVersion + + recorder := events.NewRecorder(t, runtime.Scheme) + r := &Reconciler{Recorder: recorder} + + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: tc.otelMetricsEnabled, + })) + ctx := feature.NewContext(ctx, gate) + + if tc.otelMetricsEnabled { + cluster.Spec.Instrumentation = instrumentationSpec + } + + if tc.exporterEnabled { + cluster.Spec.Monitoring = monitoringSpec + } + + setup, err := r.reconcileExporterSqlSetup(ctx, cluster) + if tc.errorPresent { + assert.Assert(t, err != nil) + } else { + assert.NilError(t, err) + } + assert.Equal(t, setup == "", tc.setupEmpty) + + assert.Equal(t, len(recorder.Events), tc.expectedNumEvents) + if tc.expectedNumEvents == 1 { + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "ExporterNotSupportedForPostgresVersion") + assert.Equal(t, recorder.Events[0].Note, tc.expectedEvent) + assert.Equal(t, recorder.Events[0].Type, corev1.EventTypeWarning) + } + }) + } +} diff --git a/internal/pgmonitor/exporter.go b/internal/pgmonitor/exporter.go index c8422fcc2c..824674349b 100644 --- a/internal/pgmonitor/exporter.go +++ b/internal/pgmonitor/exporter.go @@ -66,6 +66,12 @@ func GenerateDefaultExporterQueries(ctx context.Context, cluster *v1beta1.Postgr queries += string(queriesContents) + "\n" } + // pgMonitor will not be adding support for postgres_exporter for postgres + // versions past 17. If pg version is greater than 17, return an empty string. + if cluster.Spec.PostgresVersion > 17 { + return "" + } + // Add general queries for specific postgres version queriesGeneral, err := os.ReadFile(fmt.Sprintf("%s/pg%d/queries_general.yml", queriesConfigDir, cluster.Spec.PostgresVersion)) if err != nil { diff --git a/internal/pgmonitor/exporter_test.go b/internal/pgmonitor/exporter_test.go index 486b658dab..f9c8321821 100644 --- a/internal/pgmonitor/exporter_test.go +++ b/internal/pgmonitor/exporter_test.go @@ -38,6 +38,12 @@ func TestGenerateDefaultExporterQueries(t *testing.T) { assert.Assert(t, strings.Contains(queries, "ccp_pg_stat_statements_reset"), "Queries do not contain 'ccp_pg_stat_statements_reset' query when they should.") }) + + t.Run("PG>17", func(t *testing.T) { + cluster.Spec.PostgresVersion = 18 + queries := GenerateDefaultExporterQueries(ctx, cluster) + assert.Equal(t, queries, "") + }) } func TestExporterStartCommand(t *testing.T) { From af258f260ee64d3f148ee73b9e94c4c8aec88fcf Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 7 Nov 2025 21:56:31 -0800 Subject: [PATCH 64/79] Fix some envtests: Add required container name and image to statefulsets. Pin envtest k8s version to latest supported version (1.34). --- Makefile | 1 + .../controller/postgrescluster/apply_test.go | 8 ++++++ .../controller_ref_manager_test.go | 8 ++++++ .../postgrescluster/pgbackrest_test.go | 27 ++++++++++++++++--- .../standalone_pgadmin/statefulset_test.go | 2 ++ 5 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 26302d5f9b..29e5090e1a 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ GO_BUILD = $(GO) build GO_TEST ?= $(GO) test KUTTL ?= kubectl-kuttl KUTTL_TEST ?= $(KUTTL) test +ENVTEST_K8S_VERSION ?= 1.34 ##@ General diff --git a/internal/controller/postgrescluster/apply_test.go b/internal/controller/postgrescluster/apply_test.go index 85dbca995d..d2c77ceb27 100644 --- a/internal/controller/postgrescluster/apply_test.go +++ b/internal/controller/postgrescluster/apply_test.go @@ -151,6 +151,14 @@ func TestServerSideApply(t *testing.T) { MatchLabels: map[string]string{"select": name}, } sts.Spec.Template.Labels = map[string]string{"select": name} + sts.Spec.Template.Spec = corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + } return &sts } diff --git a/internal/controller/postgrescluster/controller_ref_manager_test.go b/internal/controller/postgrescluster/controller_ref_manager_test.go index fa8450c5d9..758daf2ef3 100644 --- a/internal/controller/postgrescluster/controller_ref_manager_test.go +++ b/internal/controller/postgrescluster/controller_ref_manager_test.go @@ -46,6 +46,14 @@ func TestManageControllerRefs(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"label1": "val1"}, }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, } diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index eec1b05deb..f746b14597 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -1679,7 +1679,14 @@ func TestGetPGBackRestResources(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: naming.PGBackRestDedicatedLabels(clusterName), }, - Spec: corev1.PodSpec{}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, }, @@ -1717,7 +1724,14 @@ func TestGetPGBackRestResources(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: naming.PGBackRestDedicatedLabels(clusterName), }, - Spec: corev1.PodSpec{}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, }, @@ -1753,7 +1767,14 @@ func TestGetPGBackRestResources(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: naming.PGBackRestDedicatedLabels(clusterName), }, - Spec: corev1.PodSpec{}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, }, diff --git a/internal/controller/standalone_pgadmin/statefulset_test.go b/internal/controller/standalone_pgadmin/statefulset_test.go index 9d6b804476..e7cccf15dc 100644 --- a/internal/controller/standalone_pgadmin/statefulset_test.go +++ b/internal/controller/standalone_pgadmin/statefulset_test.go @@ -41,6 +41,7 @@ func TestReconcilePGAdminStatefulSet(t *testing.T) { resources: { requests: { storage: 1Gi } }, }, }`) + pgadmin.Spec.Image = initialize.String("some-image") assert.NilError(t, cc.Create(ctx, pgadmin)) t.Cleanup(func() { assert.Check(t, cc.Delete(ctx, pgadmin)) }) @@ -117,6 +118,7 @@ terminationGracePeriodSeconds: 30 resources: { requests: { storage: 1Gi } }, }, }`) + custompgadmin.Spec.Image = initialize.String("some-image") // annotation and label custompgadmin.Spec.Metadata = &v1beta1.Metadata{ From 442878b8b5a4093f19073f85cfc94c96fa0f4ac0 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Sat, 8 Nov 2025 00:17:34 -0800 Subject: [PATCH 65/79] Allow users to set ssl_groups or ssl_ecdh_curve via spec.config.parameters. --- .../postgres-operator.crunchydata.com_postgresclusters.yaml | 3 ++- .../v1beta1/postgres_types.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index bd3b210500..10cfd58790 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -4822,7 +4822,8 @@ spec: - message: change port using .spec.port instead rule: '!has(self.port)' - message: TLS is always enabled - rule: '!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))' + rule: '!has(self.ssl) && !self.exists(k, k.startsWith("ssl_") + && !(k == ''ssl_groups'' || k == ''ssl_ecdh_curve''))' - message: domain socket paths cannot be changed rule: '!self.exists(k, k.startsWith("unix_socket_"))' - message: wal_level must be "replica" or higher diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index 47f7382671..e45c29b8bd 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -54,7 +54,7 @@ type PostgresConfigSpec struct { // // +kubebuilder:validation:XValidation:rule=`!has(self.listen_addresses)`,message=`network connectivity is always enabled: listen_addresses` // +kubebuilder:validation:XValidation:rule=`!has(self.port)`,message=`change port using .spec.port instead` - // +kubebuilder:validation:XValidation:rule=`!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))`,message=`TLS is always enabled` + // +kubebuilder:validation:XValidation:rule=`!has(self.ssl) && !self.exists(k, k.startsWith("ssl_") && !(k == 'ssl_groups' || k == 'ssl_ecdh_curve'))`,message=`TLS is always enabled` // +kubebuilder:validation:XValidation:rule=`!self.exists(k, k.startsWith("unix_socket_"))`,message=`domain socket paths cannot be changed` // // # Write Ahead Log From d463f1233761442b24f3581f3b27aaa87cc7dcbf Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Sat, 8 Nov 2025 02:31:36 -0800 Subject: [PATCH 66/79] Add rule to prevent users from attempting to use the ssl_groups parameter with pg17 or earlier. --- .../postgres-operator.crunchydata.com_postgresclusters.yaml | 4 ++++ .../v1beta1/postgrescluster_types.go | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 10cfd58790..0c6558cc28 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -18248,6 +18248,10 @@ spec: - instances - postgresVersion type: object + x-kubernetes-validations: + - message: The ssl_groups parameter is only available in pg18 and greater + rule: '!has(self.?config.parameters.ssl_groups) || self.postgresVersion + > 17' status: description: PostgresClusterStatus defines the observed state of PostgresCluster properties: diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index c6ceaaa63e..6db6ce7459 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -13,6 +13,11 @@ import ( ) // PostgresClusterSpec defines the desired state of PostgresCluster +// --- +// +// # Postgres 18 +// +// +kubebuilder:validation:XValidation:rule=`!has(self.?config.parameters.ssl_groups) || self.postgresVersion > 17`,message=`The ssl_groups parameter is only available in pg18 and greater` type PostgresClusterSpec struct { // +optional Metadata *Metadata `json:"metadata,omitempty"` From 7da93cf0607afb9b29ad44e0419b48b9d8281b70 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Sat, 8 Nov 2025 03:51:04 -0800 Subject: [PATCH 67/79] Tests for ssl_groups and ssl_ecdh_curve config parameters. --- .../postgrescluster/postgres_config_test.go | 395 ++++++++++++++++++ internal/testing/require/encoding.go | 22 + internal/testing/require/errors.go | 13 +- 3 files changed, 429 insertions(+), 1 deletion(-) create mode 100644 internal/crd/validation/postgrescluster/postgres_config_test.go diff --git a/internal/crd/validation/postgrescluster/postgres_config_test.go b/internal/crd/validation/postgrescluster/postgres_config_test.go new file mode 100644 index 0000000000..fe3cf384b7 --- /dev/null +++ b/internal/crd/validation/postgrescluster/postgres_config_test.go @@ -0,0 +1,395 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package validation + +import ( + "context" + "fmt" + "testing" + + "gotest.tools/v3/assert" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestPostgresConfigParametersV1beta1(t *testing.T) { + ctx := context.Background() + cc := require.Kubernetes(t) + t.Parallel() + + namespace := require.Namespace(t, cc) + base := v1beta1.NewPostgresCluster() + + // required fields + require.UnmarshalInto(t, &base.Spec, `{ + postgresVersion: 16, + instances: [{ + dataVolumeClaimSpec: { + accessModes: [ReadWriteOnce], + resources: { requests: { storage: 1Mi } }, + }, + }], + }`) + + base.Spec.Backups = v1beta1.Backups{ + PGBackRest: v1beta1.PGBackRestArchive{ + Repos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + }, + } + base.Namespace = namespace.Name + base.Name = "postgres-config-parameters" + + assert.NilError(t, cc.Create(ctx, base.DeepCopy(), client.DryRunAll), + "expected this base cluster to be valid") + + var u unstructured.Unstructured + require.UnmarshalInto(t, &u, require.Value(yaml.Marshal(base))) + assert.Equal(t, u.GetAPIVersion(), "postgres-operator.crunchydata.com/v1beta1") + + testPostgresConfigParametersCommon(t, cc, u) + + t.Run("Logging", func(t *testing.T) { + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "log_directory", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + }) + + t.Run("ssl_groups and ssl_ecdh_curve", func(t *testing.T) { + t.Run("ssl_groups not allowed for pg17", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl_groups", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(17)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + }) + } + }) + + t.Run("ssl_groups allowed for pg18", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl_groups", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(18)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + + t.Run("ssl_ecdh_curve allowed for both", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl_ecdh_curve", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(17)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + + cluster2 := u.DeepCopy() + require.UnmarshalIntoField(t, cluster2, + require.Value(yaml.Marshal(18)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster2, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster2, client.DryRunAll)) + }) + } + }) + + t.Run("other ssl_* parameters not allowed for any pg version", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl_anything", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(17)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + + cluster1 := u.DeepCopy() + require.UnmarshalIntoField(t, cluster1, + require.Value(yaml.Marshal(18)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster1, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err = cc.Create(ctx, cluster1, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details = require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + }) + } + }) + }) +} + +func testPostgresConfigParametersCommon(t *testing.T, cc client.Client, base unstructured.Unstructured) { + ctx := context.Background() + + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {"archive_timeout", 100}, + {"archive_timeout", "20s"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + + t.Run("Disallowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "cluster_name", value: "asdf"}, + {key: "config_file", value: "asdf"}, + {key: "data_directory", value: ""}, + {key: "external_pid_file", value: ""}, + {key: "hba_file", value: "one"}, + {key: "hot_standby", value: "off"}, + {key: "ident_file", value: "two"}, + {key: "listen_addresses", value: ""}, + {key: "port", value: 5}, + {key: "wal_log_hints", value: "off"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + + // TODO(k8s-1.30) TODO(validation): Move the parameter name from the message to the field path. + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.key)) + }) + } + }) + + t.Run("Logging", func(t *testing.T) { + for _, tt := range []struct { + valid bool + key string + value any + message string + }{ + {valid: false, key: "log_file_mode", value: "", message: "cannot be changed"}, + {valid: false, key: "log_file_mode", value: "any", message: "cannot be changed"}, + {valid: false, key: "logging_collector", value: "", message: "unsafe"}, + {valid: false, key: "logging_collector", value: "off", message: "unsafe"}, + {valid: false, key: "logging_collector", value: "on", message: "unsafe"}, + + {valid: true, key: "log_destination", value: "anything"}, + {valid: true, key: "log_filename", value: "anything"}, + {valid: true, key: "log_filename", value: "percent-%s-too"}, + {valid: true, key: "log_rotation_age", value: "7d"}, + {valid: true, key: "log_rotation_age", value: 5}, + {valid: true, key: "log_rotation_size", value: "100MB"}, + {valid: true, key: "log_rotation_size", value: 13}, + {valid: true, key: "log_timezone", value: ""}, + {valid: true, key: "log_timezone", value: "nonsense"}, + } { + t.Run(fmt.Sprint(tt), func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + + if tt.valid { + assert.NilError(t, err) + assert.Equal(t, "", tt.message, "BUG IN TEST: no message expected when valid") + } else { + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + + // TODO(k8s-1.30) TODO(validation): Move the parameter name from the message to the field path. + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.key)) + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.message)) + } + }) + } + }) + + t.Run("NoConnections", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl", value: "off"}, + {key: "ssl_ca_file", value: ""}, + {key: "unix_socket_directories", value: "one"}, + {key: "unix_socket_group", value: "two"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("NoWriteAheadLog", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "archive_mode", value: "off"}, + {key: "archive_command", value: "true"}, + {key: "restore_command", value: "true"}, + {key: "recovery_target", value: "immediate"}, + {key: "recovery_target_name", value: "doot"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("wal_level", func(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + `logical`, "spec", "config", "parameters", "wal_level") + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + + t.Run("Invalid", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + `minimal`, "spec", "config", "parameters", "wal_level") + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + assert.ErrorContains(t, err, `"replica" or higher`) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, "wal_level")) + }) + }) + + t.Run("NoReplication", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "synchronous_standby_names", value: ""}, + {key: "primary_conninfo", value: ""}, + {key: "primary_slot_name", value: ""}, + {key: "recovery_min_apply_delay", value: ""}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) +} diff --git a/internal/testing/require/encoding.go b/internal/testing/require/encoding.go index a99f7a42f1..8016c1921a 100644 --- a/internal/testing/require/encoding.go +++ b/internal/testing/require/encoding.go @@ -9,6 +9,7 @@ import ( "testing" "gotest.tools/v3/assert" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/json" "sigs.k8s.io/yaml" ) @@ -37,3 +38,24 @@ func UnmarshalInto[Data ~string | ~[]byte, Destination *T, T any]( assert.NilError(t, err) assert.NilError(t, errors.Join(strict...)) } + +// UnmarshalIntoField parses input as YAML (or JSON) the same way as the Kubernetes API Server. +// The result goes into a (nested) field of output. It calls t.Fatal when something fails. +func UnmarshalIntoField[Data ~string | ~[]byte]( + t testing.TB, output *unstructured.Unstructured, input Data, fields ...string, +) { + t.Helper() + + if len(fields) == 0 { + t.Fatal("BUG: called without a destination") + } + + if output.Object == nil { + output.Object = map[string]any{} + } + + var value any + UnmarshalInto(t, &value, []byte(input)) + + assert.NilError(t, unstructured.SetNestedField(output.Object, value, fields...)) +} diff --git a/internal/testing/require/errors.go b/internal/testing/require/errors.go index 128a0397b0..039f8e2879 100644 --- a/internal/testing/require/errors.go +++ b/internal/testing/require/errors.go @@ -16,14 +16,25 @@ import ( // StatusError returns the [metav1.Status] within err's tree. // It calls t.Fatal when err is nil or there is no status. func StatusError(t testing.TB, err error) metav1.Status { - status, ok := err.(apierrors.APIStatus) + t.Helper() + status, ok := err.(apierrors.APIStatus) assert.Assert(t, ok || errors.As(err, &status), "%T does not implement %T", err, status) return status.Status() } +// StatusErrorDetails returns the details of [metav1.Status] within err's tree. +// It calls t.Fatal when err is nil, there is no status, or its Details field is nil. +func StatusErrorDetails(t testing.TB, err error) metav1.StatusDetails { + t.Helper() + + status := StatusError(t, err) + assert.Assert(t, status.Details != nil) + return *status.Details +} + // Value returns v or panics when err is not nil. func Value[T any](v T, err error) T { if err != nil { From 89174c947045f70ca098517d58776aa9969263c6 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 10 Nov 2025 17:30:56 -0800 Subject: [PATCH 68/79] Small refactor of TestPostgresConfigParametersV1beta1 --- .../postgrescluster/postgres_config_test.go | 118 ++++++------------ 1 file changed, 38 insertions(+), 80 deletions(-) diff --git a/internal/crd/validation/postgrescluster/postgres_config_test.go b/internal/crd/validation/postgrescluster/postgres_config_test.go index fe3cf384b7..e9315eff2a 100644 --- a/internal/crd/validation/postgrescluster/postgres_config_test.go +++ b/internal/crd/validation/postgrescluster/postgres_config_test.go @@ -76,43 +76,24 @@ func TestPostgresConfigParametersV1beta1(t *testing.T) { }) }) - t.Run("ssl_groups and ssl_ecdh_curve", func(t *testing.T) { - t.Run("ssl_groups not allowed for pg17", func(t *testing.T) { + t.Run("SSL Settings", func(t *testing.T) { + t.Run("Allowed", func(t *testing.T) { for _, tt := range []struct { - key string - value any + key string + value any + postgresVersion int }{ - {key: "ssl_groups", value: "anything"}, - } { - t.Run(tt.key, func(t *testing.T) { - cluster := u.DeepCopy() - require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(17)), - "spec", "postgresVersion") - require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(tt.value)), - "spec", "config", "parameters", tt.key) + // ssl_ecdh_curve is allowed for all supported Postgres versions + {key: "ssl_ecdh_curve", value: "anything", postgresVersion: 17}, + {key: "ssl_ecdh_curve", value: "anything", postgresVersion: 18}, - err := cc.Create(ctx, cluster, client.DryRunAll) - assert.Assert(t, apierrors.IsInvalid(err)) - - details := require.StatusErrorDetails(t, err) - assert.Assert(t, cmp.Len(details.Causes, 1)) - }) - } - }) - - t.Run("ssl_groups allowed for pg18", func(t *testing.T) { - for _, tt := range []struct { - key string - value any - }{ - {key: "ssl_groups", value: "anything"}, + // ssl_groups is only supported for Postgres 18 and greater + {key: "ssl_groups", value: "anything", postgresVersion: 18}, } { t.Run(tt.key, func(t *testing.T) { cluster := u.DeepCopy() require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(18)), + require.Value(yaml.Marshal(tt.postgresVersion)), "spec", "postgresVersion") require.UnmarshalIntoField(t, cluster, require.Value(yaml.Marshal(tt.value)), @@ -123,48 +104,39 @@ func TestPostgresConfigParametersV1beta1(t *testing.T) { } }) - t.Run("ssl_ecdh_curve allowed for both", func(t *testing.T) { + t.Run("Not Allowed", func(t *testing.T) { for _, tt := range []struct { - key string - value any + key string + value any + postgresVersion int }{ - {key: "ssl_ecdh_curve", value: "anything"}, + // setting "ssl" is not allowed for any Postgres version + {key: "ssl", value: "anything", postgresVersion: 17}, + {key: "ssl", value: "anything", postgresVersion: 18}, + + // setting any parameter with an "ssl_" prefix that is not + // "ssl_ecdh_curve" or "ssl_groups" is not allowed for any version + {key: "ssl_anything", value: "anything", postgresVersion: 17}, + {key: "ssl_anything", value: "anything", postgresVersion: 18}, + + // setting "ssl_ecdh_curve" with any additional suffix is not + // allowed for any version + {key: "ssl_ecdh_curve_bad", value: "anything", postgresVersion: 17}, + {key: "ssl_ecdh_curve_bad", value: "anything", postgresVersion: 18}, + + // setting "ssl_groups" is not allowed for Postgres versions 17 + // or earlier + {key: "ssl_groups", value: "anything", postgresVersion: 17}, + + // setting "ssl_groups" with any additional suffix is not + // allowed for any version + {key: "ssl_groups_bad", value: "anything", postgresVersion: 17}, + {key: "ssl_groups_bad", value: "anything", postgresVersion: 18}, } { t.Run(tt.key, func(t *testing.T) { cluster := u.DeepCopy() require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(17)), - "spec", "postgresVersion") - require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(tt.value)), - "spec", "config", "parameters", tt.key) - - assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) - - cluster2 := u.DeepCopy() - require.UnmarshalIntoField(t, cluster2, - require.Value(yaml.Marshal(18)), - "spec", "postgresVersion") - require.UnmarshalIntoField(t, cluster2, - require.Value(yaml.Marshal(tt.value)), - "spec", "config", "parameters", tt.key) - - assert.NilError(t, cc.Create(ctx, cluster2, client.DryRunAll)) - }) - } - }) - - t.Run("other ssl_* parameters not allowed for any pg version", func(t *testing.T) { - for _, tt := range []struct { - key string - value any - }{ - {key: "ssl_anything", value: "anything"}, - } { - t.Run(tt.key, func(t *testing.T) { - cluster := u.DeepCopy() - require.UnmarshalIntoField(t, cluster, - require.Value(yaml.Marshal(17)), + require.Value(yaml.Marshal(tt.postgresVersion)), "spec", "postgresVersion") require.UnmarshalIntoField(t, cluster, require.Value(yaml.Marshal(tt.value)), @@ -175,20 +147,6 @@ func TestPostgresConfigParametersV1beta1(t *testing.T) { details := require.StatusErrorDetails(t, err) assert.Assert(t, cmp.Len(details.Causes, 1)) - - cluster1 := u.DeepCopy() - require.UnmarshalIntoField(t, cluster1, - require.Value(yaml.Marshal(18)), - "spec", "postgresVersion") - require.UnmarshalIntoField(t, cluster1, - require.Value(yaml.Marshal(tt.value)), - "spec", "config", "parameters", tt.key) - - err = cc.Create(ctx, cluster1, client.DryRunAll) - assert.Assert(t, apierrors.IsInvalid(err)) - - details = require.StatusErrorDetails(t, err) - assert.Assert(t, cmp.Len(details.Causes, 1)) }) } }) From 91c7fc1c6cfbc0229ba46deb6e94d3cfe9b33936 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Fri, 21 Nov 2025 16:44:51 -0500 Subject: [PATCH 69/79] We do not support postgres_exporter for pg18. Remove exporter kuttl tests when running e2e tests for postgres 18. --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 29e5090e1a..3554eb52ff 100644 --- a/Makefile +++ b/Makefile @@ -249,6 +249,12 @@ generate-kuttl: ## Generate kuttl tests mkdir -p "$${target%/*}"; render < "$${source}" > "$${target}"; \ shift; \ done' - testing/kuttl/e2e/*/*.yaml testing/kuttl/e2e/*/*/*.yaml + if [ "$$KUTTL_PG_VERSION" -ge "18" ]; then \ + [ ! -d testing/kuttl/e2e-generated/exporter-custom-queries ] || rm -rf testing/kuttl/e2e-generated/exporter-custom-queries; \ + [ ! -d testing/kuttl/e2e-generated/exporter-no-tls ] || rm -rf testing/kuttl/e2e-generated/exporter-no-tls; \ + [ ! -d testing/kuttl/e2e-generated/exporter-tls ] || rm -rf testing/kuttl/e2e-generated/exporter-tls; \ + [ ! -d testing/kuttl/e2e-generated/exporter-password-change ] || rm -rf testing/kuttl/e2e-generated/exporter-password-change; \ + fi ##@ Generate From 45ceddc897ba8f0e680172531debbb4a2ccd22a2 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 24 Nov 2025 10:43:13 -0500 Subject: [PATCH 70/79] Remove double dash from kuttl test filenames to comply with stricter filename requirements in kuttl 0.23.0. --- .../kuttl/e2e/cluster-pause/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--cluster-paused.yaml => 01-cluster-paused.yaml} | 0 .../{02--cluster-resume.yaml => 02-cluster-resume.yaml} | 0 .../kuttl/e2e/cluster-start/{00--cluster.yaml => 00-cluster.yaml} | 0 .../kuttl/e2e/cluster-start/{01--connect.yaml => 01-connect.yaml} | 0 .../{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 ...--change-custom-queries.yaml => 01-change-custom-queries.yaml} | 0 .../{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 .../{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 .../{02--change-password.yaml => 02-change-password.yaml} | 0 .../{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 .../{01--valid-upgrade.yaml => 01-valid-upgrade.yaml} | 0 .../{10--cluster.yaml => 10-cluster.yaml} | 0 .../{11--shutdown-cluster.yaml => 11-shutdown-cluster.yaml} | 0 ...t-and-update-version.yaml => 12-start-and-update-version.yaml} | 0 .../{13--shutdown-cluster.yaml => 13-shutdown-cluster.yaml} | 0 .../{14--annotate-cluster.yaml => 14-annotate-cluster.yaml} | 0 .../{15--start-cluster.yaml => 15-start-cluster.yaml} | 0 .../{17--check-version.yaml => 17-check-version.yaml} | 0 .../{02--valid-upgrade.yaml => 02-valid-upgrade.yaml} | 0 ...ready-updated-cluster.yaml => 10-already-updated-cluster.yaml} | 0 .../kuttl/e2e/major-upgrade/{30--cluster.yaml => 30-cluster.yaml} | 0 .../major-upgrade/{31--create-data.yaml => 31-create-data.yaml} | 0 .../{32--shutdown-cluster.yaml => 32-shutdown-cluster.yaml} | 0 .../{33--annotate-cluster.yaml => 33-annotate-cluster.yaml} | 0 .../{34--restart-cluster.yaml => 34-restart-cluster.yaml} | 0 ...check-data-and-version.yaml => 36-check-data-and-version.yaml} | 0 .../e2e/optional-backups/{00--cluster.yaml => 00-cluster.yaml} | 0 .../e2e/optional-backups/{04--cluster.yaml => 04-cluster.yaml} | 0 .../e2e/optional-backups/{10--cluster.yaml => 10-cluster.yaml} | 0 .../e2e/optional-backups/{20--cluster.yaml => 20-cluster.yaml} | 0 .../e2e/optional-backups/{22--cluster.yaml => 22-cluster.yaml} | 0 .../{00--cluster.yaml => 00-cluster.yaml} | 0 ...luster.yaml => 01-add-instrumentation-to-postgrescluster.yaml} | 0 .../otel-logging-and-metrics/{03--backup.yaml => 03-backup.yaml} | 0 ...ion-to-pgadmin.yaml => 07-add-instrumentation-to-pgadmin.yaml} | 0 .../{09--add-custom-queries.yaml => 09-add-custom-queries.yaml} | 0 ...cluster.yaml => 11-add-per-db-metrics-to-postgrescluster.yaml} | 0 ....yaml => 13-add-second-per-db-metrics-to-postgrescluster.yaml} | 0 ...er.yaml => 15-remove-per-db-metrics-from-postgrescluster.yaml} | 0 ...stom-queries-per-db.yaml => 17-add-custom-queries-per-db.yaml} | 0 ...gs-metrics-exporter.yaml => 19-add-logs-metrics-exporter.yaml} | 0 .../{21--cluster-no-backups.yaml => 21-cluster-no-backups.yaml} | 0 .../{23--cluster-add-backups.yaml => 23-cluster-add-backups.yaml} | 0 .../{24--remove-backups.yaml => 24-remove-backups.yaml} | 0 .../{25--annotate-cluster.yaml => 25-annotate-cluster.yaml} | 0 .../e2e/password-change/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--psql-connect-uri.yaml => 01-psql-connect-uri.yaml} | 0 .../{01--psql-connect.yaml => 01-psql-connect.yaml} | 0 .../kuttl/e2e/password-change/{02--secret.yaml => 02-secret.yaml} | 0 .../{03--psql-connect-uri.yaml => 03-psql-connect-uri.yaml} | 0 .../{03--psql-connect.yaml => 03-psql-connect.yaml} | 0 .../kuttl/e2e/password-change/{04--secret.yaml => 04-secret.yaml} | 0 .../{05--psql-connect-uri.yaml => 05-psql-connect-uri.yaml} | 0 .../{05--psql-connect.yaml => 05-psql-connect.yaml} | 0 .../e2e/password-change/{06--cluster.yaml => 06-cluster.yaml} | 0 .../{07--psql-connect-uri.yaml => 07-psql-connect-uri.yaml} | 0 .../{07--psql-connect.yaml => 07-psql-connect.yaml} | 0 .../kuttl/e2e/password-change/{08--secret.yaml => 08-secret.yaml} | 0 .../{09--psql-connect-uri.yaml => 09-psql-connect-uri.yaml} | 0 .../{09--psql-connect.yaml => 09-psql-connect.yaml} | 0 .../kuttl/e2e/password-change/{10--secret.yaml => 10-secret.yaml} | 0 .../{11--psql-connect-uri.yaml => 11-psql-connect-uri.yaml} | 0 .../{11--psql-connect.yaml => 11-psql-connect.yaml} | 0 .../{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--check-backup-logs.yaml => 01-check-backup-logs.yaml} | 0 .../{02--cluster.yaml => 02-cluster.yaml} | 0 .../e2e/pgbackrest-init/{00--cluster.yaml => 00-cluster.yaml} | 0 .../e2e/pgbackrest-init/{02--cluster.yaml => 02-cluster.yaml} | 0 .../e2e/pgbackrest-init/{04--cluster.yaml => 04-cluster.yaml} | 0 .../{06--check-spool-path.yaml => 06-check-spool-path.yaml} | 0 .../{01--create-cluster.yaml => 01-create-cluster.yaml} | 0 .../{02--create-data.yaml => 02-create-data.yaml} | 0 .../e2e/pgbackrest-restore/{03--backup.yaml => 03-backup.yaml} | 0 .../{04--clone-cluster.yaml => 04-clone-cluster.yaml} | 0 .../{05--check-data.yaml => 05-check-data.yaml} | 0 .../{06--delete-clone.yaml => 06-delete-clone.yaml} | 0 .../pgbackrest-restore/{07--annotate.yaml => 07-annotate.yaml} | 0 .../{07--update-cluster.yaml => 07-update-cluster.yaml} | 0 .../{08--wait-restart.yaml => 08-wait-restart.yaml} | 0 .../pgbackrest-restore/{09--add-data.yaml => 09-add-data.yaml} | 0 .../{10--wait-archived.yaml => 10-wait-archived.yaml} | 0 .../{11--clone-cluster.yaml => 11-clone-cluster.yaml} | 0 .../{12--check-data.yaml => 12-check-data.yaml} | 0 .../{13--delete-clone.yaml => 13-delete-clone.yaml} | 0 .../pgbackrest-restore/{14--lose-data.yaml => 14-lose-data.yaml} | 0 .../{15--in-place-pitr.yaml => 15-in-place-pitr.yaml} | 0 .../{16--check-data.yaml => 16-check-data.yaml} | 0 .../{17--check-replication.yaml => 17-check-replication.yaml} | 0 testing/kuttl/e2e/pgbouncer/{00--cluster.yaml => 00-cluster.yaml} | 0 .../e2e/pgbouncer/{01--psql-connect.yaml => 01-psql-connect.yaml} | 0 .../{10--read-certificate.yaml => 10-read-certificate.yaml} | 0 .../{11--open-connection.yaml => 11-open-connection.yaml} | 0 .../{12--rotate-certificate.yaml => 12-rotate-certificate.yaml} | 0 .../{13--read-certificate.yaml => 13-read-certificate.yaml} | 0 .../{14--compare-certificate.yaml => 14-compare-certificate.yaml} | 0 .../{15--check-connection.yaml => 15-check-connection.yaml} | 0 .../kuttl/e2e/pgbouncer/{16--reconnect.yaml => 16-reconnect.yaml} | 0 .../kuttl/e2e/replica-read/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--psql-replica-read.yaml => 01-psql-replica-read.yaml} | 0 .../e2e/root-cert-ownership/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--check-owners.yaml => 01-check-owners.yaml} | 0 .../{02--delete-owner1.yaml => 02-delete-owner1.yaml} | 0 .../{03--check-owners.yaml => 03-check-owners.yaml} | 0 .../{04--delete-owner2.yaml => 04-delete-owner2.yaml} | 0 .../{05--check-secret.yaml => 05-check-secret.yaml} | 0 .../scaledown/{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 .../scaledown/{01--update-cluster.yaml => 01-update-cluster.yaml} | 0 .../scaledown/{02--delete-cluster.yaml => 02-delete-cluster.yaml} | 0 .../scaledown/{10--create-cluster.yaml => 10-create-cluster.yaml} | 0 .../scaledown/{12--update-cluster.yaml => 12-update-cluster.yaml} | 0 .../scaledown/{13--delete-cluster.yaml => 13-delete-cluster.yaml} | 0 .../scaledown/{20--create-cluster.yaml => 20-create-cluster.yaml} | 0 .../scaledown/{21--update-cluster.yaml => 21-update-cluster.yaml} | 0 .../e2e/security-context/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--security-context.yaml => 01-security-context.yaml} | 0 .../e2e/security-context/{10--kyverno.yaml => 10-kyverno.yaml} | 0 .../{00--create-cluster.yaml => 00-create-cluster.yaml} | 0 .../{01--user-schema.yaml => 01-user-schema.yaml} | 0 .../{02--create-pgadmin.yaml => 02-create-pgadmin.yaml} | 0 .../{04--update-pgadmin.yaml => 04-update-pgadmin.yaml} | 0 .../{00--pgadmin.yaml => 00-pgadmin.yaml} | 0 .../{01--update-service.yaml => 01-update-service.yaml} | 0 .../{02--remove-service.yaml => 02-remove-service.yaml} | 0 .../{10--manual-service.yaml => 10-manual-service.yaml} | 0 .../{20--owned-service.yaml => 20-owned-service.yaml} | 0 ...service-takeover-fails.yaml => 21-service-takeover-fails.yaml} | 0 .../{00--create-pgadmin.yaml => 00-create-pgadmin.yaml} | 0 .../{02--edit-pgadmin-users.yaml => 02-edit-pgadmin-users.yaml} | 0 ...-user-passwords.yaml => 04-change-pgadmin-user-passwords.yaml} | 0 ...06--delete-pgadmin-users.yaml => 06-delete-pgadmin-users.yaml} | 0 .../e2e/streaming-standby/{00--secrets.yaml => 00-secrets.yaml} | 0 .../{01--primary-cluster.yaml => 01-primary-cluster.yaml} | 0 .../{02--create-data.yaml => 02-create-data.yaml} | 0 .../{03--standby-cluster.yaml => 03-standby-cluster.yaml} | 0 .../streaming-standby/{04--check-data.yaml => 04-check-data.yaml} | 0 .../kuttl/e2e/switchover/{01--cluster.yaml => 01-cluster.yaml} | 0 .../e2e/tablespace-enabled/{00--cluster.yaml => 00-cluster.yaml} | 0 .../{01--psql-connect.yaml => 01-psql-connect.yaml} | 0 .../{00--create-resources.yaml => 00-create-resources.yaml} | 0 .../{01--create-data.yaml => 01-create-data.yaml} | 0 .../{02--shutdown-cluster.yaml => 02-shutdown-cluster.yaml} | 0 .../{03--annotate-cluster.yaml => 03-annotate-cluster.yaml} | 0 .../{04--restart-cluster.yaml => 04-restart-cluster.yaml} | 0 ...check-data-and-version.yaml => 06-check-data-and-version.yaml} | 0 .../{06--check-spool-path.yaml => 06-check-spool-path.yaml} | 0 146 files changed, 0 insertions(+), 0 deletions(-) rename testing/kuttl/e2e/cluster-pause/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/cluster-pause/{01--cluster-paused.yaml => 01-cluster-paused.yaml} (100%) rename testing/kuttl/e2e/cluster-pause/{02--cluster-resume.yaml => 02-cluster-resume.yaml} (100%) rename testing/kuttl/e2e/cluster-start/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/cluster-start/{01--connect.yaml => 01-connect.yaml} (100%) rename testing/kuttl/e2e/exporter-custom-queries/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/exporter-custom-queries/{01--change-custom-queries.yaml => 01-change-custom-queries.yaml} (100%) rename testing/kuttl/e2e/exporter-no-tls/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/exporter-password-change/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/exporter-password-change/{02--change-password.yaml => 02-change-password.yaml} (100%) rename testing/kuttl/e2e/exporter-tls/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{01--valid-upgrade.yaml => 01-valid-upgrade.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{10--cluster.yaml => 10-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{11--shutdown-cluster.yaml => 11-shutdown-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{12--start-and-update-version.yaml => 12-start-and-update-version.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{13--shutdown-cluster.yaml => 13-shutdown-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{14--annotate-cluster.yaml => 14-annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{15--start-cluster.yaml => 15-start-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade-missing-image/{17--check-version.yaml => 17-check-version.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{02--valid-upgrade.yaml => 02-valid-upgrade.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{10--already-updated-cluster.yaml => 10-already-updated-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{30--cluster.yaml => 30-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{31--create-data.yaml => 31-create-data.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{32--shutdown-cluster.yaml => 32-shutdown-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{33--annotate-cluster.yaml => 33-annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{34--restart-cluster.yaml => 34-restart-cluster.yaml} (100%) rename testing/kuttl/e2e/major-upgrade/{36--check-data-and-version.yaml => 36-check-data-and-version.yaml} (100%) rename testing/kuttl/e2e/optional-backups/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/optional-backups/{04--cluster.yaml => 04-cluster.yaml} (100%) rename testing/kuttl/e2e/optional-backups/{10--cluster.yaml => 10-cluster.yaml} (100%) rename testing/kuttl/e2e/optional-backups/{20--cluster.yaml => 20-cluster.yaml} (100%) rename testing/kuttl/e2e/optional-backups/{22--cluster.yaml => 22-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{01--add-instrumentation-to-postgrescluster.yaml => 01-add-instrumentation-to-postgrescluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{03--backup.yaml => 03-backup.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{07--add-instrumentation-to-pgadmin.yaml => 07-add-instrumentation-to-pgadmin.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{09--add-custom-queries.yaml => 09-add-custom-queries.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{11--add-per-db-metrics-to-postgrescluster.yaml => 11-add-per-db-metrics-to-postgrescluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{13--add-second-per-db-metrics-to-postgrescluster.yaml => 13-add-second-per-db-metrics-to-postgrescluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{15--remove-per-db-metrics-from-postgrescluster.yaml => 15-remove-per-db-metrics-from-postgrescluster.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{17--add-custom-queries-per-db.yaml => 17-add-custom-queries-per-db.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{19--add-logs-metrics-exporter.yaml => 19-add-logs-metrics-exporter.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{21--cluster-no-backups.yaml => 21-cluster-no-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{23--cluster-add-backups.yaml => 23-cluster-add-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{24--remove-backups.yaml => 24-remove-backups.yaml} (100%) rename testing/kuttl/e2e/otel-logging-and-metrics/{25--annotate-cluster.yaml => 25-annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/password-change/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/password-change/{01--psql-connect-uri.yaml => 01-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{01--psql-connect.yaml => 01-psql-connect.yaml} (100%) rename testing/kuttl/e2e/password-change/{02--secret.yaml => 02-secret.yaml} (100%) rename testing/kuttl/e2e/password-change/{03--psql-connect-uri.yaml => 03-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{03--psql-connect.yaml => 03-psql-connect.yaml} (100%) rename testing/kuttl/e2e/password-change/{04--secret.yaml => 04-secret.yaml} (100%) rename testing/kuttl/e2e/password-change/{05--psql-connect-uri.yaml => 05-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{05--psql-connect.yaml => 05-psql-connect.yaml} (100%) rename testing/kuttl/e2e/password-change/{06--cluster.yaml => 06-cluster.yaml} (100%) rename testing/kuttl/e2e/password-change/{07--psql-connect-uri.yaml => 07-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{07--psql-connect.yaml => 07-psql-connect.yaml} (100%) rename testing/kuttl/e2e/password-change/{08--secret.yaml => 08-secret.yaml} (100%) rename testing/kuttl/e2e/password-change/{09--psql-connect-uri.yaml => 09-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{09--psql-connect.yaml => 09-psql-connect.yaml} (100%) rename testing/kuttl/e2e/password-change/{10--secret.yaml => 10-secret.yaml} (100%) rename testing/kuttl/e2e/password-change/{11--psql-connect-uri.yaml => 11-psql-connect-uri.yaml} (100%) rename testing/kuttl/e2e/password-change/{11--psql-connect.yaml => 11-psql-connect.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-backup-standby/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-backup-standby/{01--check-backup-logs.yaml => 01-check-backup-logs.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-backup-standby/{02--cluster.yaml => 02-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-init/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-init/{02--cluster.yaml => 02-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-init/{04--cluster.yaml => 04-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-init/{06--check-spool-path.yaml => 06-check-spool-path.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{01--create-cluster.yaml => 01-create-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{02--create-data.yaml => 02-create-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{03--backup.yaml => 03-backup.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{04--clone-cluster.yaml => 04-clone-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{05--check-data.yaml => 05-check-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{06--delete-clone.yaml => 06-delete-clone.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{07--annotate.yaml => 07-annotate.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{07--update-cluster.yaml => 07-update-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{08--wait-restart.yaml => 08-wait-restart.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{09--add-data.yaml => 09-add-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{10--wait-archived.yaml => 10-wait-archived.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{11--clone-cluster.yaml => 11-clone-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{12--check-data.yaml => 12-check-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{13--delete-clone.yaml => 13-delete-clone.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{14--lose-data.yaml => 14-lose-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{15--in-place-pitr.yaml => 15-in-place-pitr.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{16--check-data.yaml => 16-check-data.yaml} (100%) rename testing/kuttl/e2e/pgbackrest-restore/{17--check-replication.yaml => 17-check-replication.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{01--psql-connect.yaml => 01-psql-connect.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{10--read-certificate.yaml => 10-read-certificate.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{11--open-connection.yaml => 11-open-connection.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{12--rotate-certificate.yaml => 12-rotate-certificate.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{13--read-certificate.yaml => 13-read-certificate.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{14--compare-certificate.yaml => 14-compare-certificate.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{15--check-connection.yaml => 15-check-connection.yaml} (100%) rename testing/kuttl/e2e/pgbouncer/{16--reconnect.yaml => 16-reconnect.yaml} (100%) rename testing/kuttl/e2e/replica-read/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/replica-read/{01--psql-replica-read.yaml => 01-psql-replica-read.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{01--check-owners.yaml => 01-check-owners.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{02--delete-owner1.yaml => 02-delete-owner1.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{03--check-owners.yaml => 03-check-owners.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{04--delete-owner2.yaml => 04-delete-owner2.yaml} (100%) rename testing/kuttl/e2e/root-cert-ownership/{05--check-secret.yaml => 05-check-secret.yaml} (100%) rename testing/kuttl/e2e/scaledown/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{01--update-cluster.yaml => 01-update-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{02--delete-cluster.yaml => 02-delete-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{10--create-cluster.yaml => 10-create-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{12--update-cluster.yaml => 12-update-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{13--delete-cluster.yaml => 13-delete-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{20--create-cluster.yaml => 20-create-cluster.yaml} (100%) rename testing/kuttl/e2e/scaledown/{21--update-cluster.yaml => 21-update-cluster.yaml} (100%) rename testing/kuttl/e2e/security-context/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/security-context/{01--security-context.yaml => 01-security-context.yaml} (100%) rename testing/kuttl/e2e/security-context/{10--kyverno.yaml => 10-kyverno.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-db-uri/{00--create-cluster.yaml => 00-create-cluster.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-db-uri/{01--user-schema.yaml => 01-user-schema.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-db-uri/{02--create-pgadmin.yaml => 02-create-pgadmin.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-db-uri/{04--update-pgadmin.yaml => 04-update-pgadmin.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{00--pgadmin.yaml => 00-pgadmin.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{01--update-service.yaml => 01-update-service.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{02--remove-service.yaml => 02-remove-service.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{10--manual-service.yaml => 10-manual-service.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{20--owned-service.yaml => 20-owned-service.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-service/{21--service-takeover-fails.yaml => 21-service-takeover-fails.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-user-management/{00--create-pgadmin.yaml => 00-create-pgadmin.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-user-management/{02--edit-pgadmin-users.yaml => 02-edit-pgadmin-users.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-user-management/{04--change-pgadmin-user-passwords.yaml => 04-change-pgadmin-user-passwords.yaml} (100%) rename testing/kuttl/e2e/standalone-pgadmin-user-management/{06--delete-pgadmin-users.yaml => 06-delete-pgadmin-users.yaml} (100%) rename testing/kuttl/e2e/streaming-standby/{00--secrets.yaml => 00-secrets.yaml} (100%) rename testing/kuttl/e2e/streaming-standby/{01--primary-cluster.yaml => 01-primary-cluster.yaml} (100%) rename testing/kuttl/e2e/streaming-standby/{02--create-data.yaml => 02-create-data.yaml} (100%) rename testing/kuttl/e2e/streaming-standby/{03--standby-cluster.yaml => 03-standby-cluster.yaml} (100%) rename testing/kuttl/e2e/streaming-standby/{04--check-data.yaml => 04-check-data.yaml} (100%) rename testing/kuttl/e2e/switchover/{01--cluster.yaml => 01-cluster.yaml} (100%) rename testing/kuttl/e2e/tablespace-enabled/{00--cluster.yaml => 00-cluster.yaml} (100%) rename testing/kuttl/e2e/tablespace-enabled/{01--psql-connect.yaml => 01-psql-connect.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{00--create-resources.yaml => 00-create-resources.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{01--create-data.yaml => 01-create-data.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{02--shutdown-cluster.yaml => 02-shutdown-cluster.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{03--annotate-cluster.yaml => 03-annotate-cluster.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{04--restart-cluster.yaml => 04-restart-cluster.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{06--check-data-and-version.yaml => 06-check-data-and-version.yaml} (100%) rename testing/kuttl/e2e/wal-pvc-pgupgrade/{06--check-spool-path.yaml => 06-check-spool-path.yaml} (100%) diff --git a/testing/kuttl/e2e/cluster-pause/00--cluster.yaml b/testing/kuttl/e2e/cluster-pause/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/00--cluster.yaml rename to testing/kuttl/e2e/cluster-pause/00-cluster.yaml diff --git a/testing/kuttl/e2e/cluster-pause/01--cluster-paused.yaml b/testing/kuttl/e2e/cluster-pause/01-cluster-paused.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/01--cluster-paused.yaml rename to testing/kuttl/e2e/cluster-pause/01-cluster-paused.yaml diff --git a/testing/kuttl/e2e/cluster-pause/02--cluster-resume.yaml b/testing/kuttl/e2e/cluster-pause/02-cluster-resume.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/02--cluster-resume.yaml rename to testing/kuttl/e2e/cluster-pause/02-cluster-resume.yaml diff --git a/testing/kuttl/e2e/cluster-start/00--cluster.yaml b/testing/kuttl/e2e/cluster-start/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-start/00--cluster.yaml rename to testing/kuttl/e2e/cluster-start/00-cluster.yaml diff --git a/testing/kuttl/e2e/cluster-start/01--connect.yaml b/testing/kuttl/e2e/cluster-start/01-connect.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-start/01--connect.yaml rename to testing/kuttl/e2e/cluster-start/01-connect.yaml diff --git a/testing/kuttl/e2e/exporter-custom-queries/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-custom-queries/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-custom-queries/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-custom-queries/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-custom-queries/01--change-custom-queries.yaml b/testing/kuttl/e2e/exporter-custom-queries/01-change-custom-queries.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-custom-queries/01--change-custom-queries.yaml rename to testing/kuttl/e2e/exporter-custom-queries/01-change-custom-queries.yaml diff --git a/testing/kuttl/e2e/exporter-no-tls/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-no-tls/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-no-tls/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-no-tls/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-password-change/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-password-change/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-password-change/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-password-change/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-password-change/02--change-password.yaml b/testing/kuttl/e2e/exporter-password-change/02-change-password.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-password-change/02--change-password.yaml rename to testing/kuttl/e2e/exporter-password-change/02-change-password.yaml diff --git a/testing/kuttl/e2e/exporter-tls/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-tls/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-tls/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-tls/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/01--valid-upgrade.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/01-valid-upgrade.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/01--valid-upgrade.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/01-valid-upgrade.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/10-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/10-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/11--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/11-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/11--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/11-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/12--start-and-update-version.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/12-start-and-update-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/12--start-and-update-version.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/12-start-and-update-version.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/13--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/13-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/13--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/13-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/14--annotate-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/14-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/14--annotate-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/14-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/15--start-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/15-start-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/15--start-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/15-start-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/17--check-version.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/17-check-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/17--check-version.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/17-check-version.yaml diff --git a/testing/kuttl/e2e/major-upgrade/02--valid-upgrade.yaml b/testing/kuttl/e2e/major-upgrade/02-valid-upgrade.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/02--valid-upgrade.yaml rename to testing/kuttl/e2e/major-upgrade/02-valid-upgrade.yaml diff --git a/testing/kuttl/e2e/major-upgrade/10--already-updated-cluster.yaml b/testing/kuttl/e2e/major-upgrade/10-already-updated-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/10--already-updated-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/10-already-updated-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/30--cluster.yaml b/testing/kuttl/e2e/major-upgrade/30-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/30--cluster.yaml rename to testing/kuttl/e2e/major-upgrade/30-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/31--create-data.yaml b/testing/kuttl/e2e/major-upgrade/31-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/31--create-data.yaml rename to testing/kuttl/e2e/major-upgrade/31-create-data.yaml diff --git a/testing/kuttl/e2e/major-upgrade/32--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade/32-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/32--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/32-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/33--annotate-cluster.yaml b/testing/kuttl/e2e/major-upgrade/33-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/33--annotate-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/33-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/34--restart-cluster.yaml b/testing/kuttl/e2e/major-upgrade/34-restart-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/34--restart-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/34-restart-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/36--check-data-and-version.yaml b/testing/kuttl/e2e/major-upgrade/36-check-data-and-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/36--check-data-and-version.yaml rename to testing/kuttl/e2e/major-upgrade/36-check-data-and-version.yaml diff --git a/testing/kuttl/e2e/optional-backups/00--cluster.yaml b/testing/kuttl/e2e/optional-backups/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/00--cluster.yaml rename to testing/kuttl/e2e/optional-backups/00-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/04--cluster.yaml b/testing/kuttl/e2e/optional-backups/04-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/04--cluster.yaml rename to testing/kuttl/e2e/optional-backups/04-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/10--cluster.yaml b/testing/kuttl/e2e/optional-backups/10-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/10--cluster.yaml rename to testing/kuttl/e2e/optional-backups/10-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/20--cluster.yaml b/testing/kuttl/e2e/optional-backups/20-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/20--cluster.yaml rename to testing/kuttl/e2e/optional-backups/20-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/22--cluster.yaml b/testing/kuttl/e2e/optional-backups/22-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/22--cluster.yaml rename to testing/kuttl/e2e/optional-backups/22-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/00--cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/00-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/01-add-instrumentation-to-postgrescluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/01--add-instrumentation-to-postgrescluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/01-add-instrumentation-to-postgrescluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/03-backup.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/03--backup.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/03-backup.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/07-add-instrumentation-to-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/07--add-instrumentation-to-pgadmin.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/07-add-instrumentation-to-pgadmin.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/09-add-custom-queries.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/09--add-custom-queries.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/09-add-custom-queries.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11-add-per-db-metrics-to-postgrescluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/11--add-per-db-metrics-to-postgrescluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/11-add-per-db-metrics-to-postgrescluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13-add-second-per-db-metrics-to-postgrescluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/13--add-second-per-db-metrics-to-postgrescluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/13-add-second-per-db-metrics-to-postgrescluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/15-remove-per-db-metrics-from-postgrescluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/15--remove-per-db-metrics-from-postgrescluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/15-remove-per-db-metrics-from-postgrescluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/17-add-custom-queries-per-db.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/17--add-custom-queries-per-db.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/17-add-custom-queries-per-db.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19-add-logs-metrics-exporter.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/19-add-logs-metrics-exporter.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/21-cluster-no-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/21--cluster-no-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/21-cluster-no-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/23--cluster-add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/23-cluster-add-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/23--cluster-add-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/23-cluster-add-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/24--remove-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/24-remove-backups.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/24--remove-backups.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/24-remove-backups.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/25--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/25-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/25--annotate-cluster.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/25-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/password-change/00--cluster.yaml b/testing/kuttl/e2e/password-change/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/00--cluster.yaml rename to testing/kuttl/e2e/password-change/00-cluster.yaml diff --git a/testing/kuttl/e2e/password-change/01--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/01-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/01--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/01-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/01--psql-connect.yaml b/testing/kuttl/e2e/password-change/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/01--psql-connect.yaml rename to testing/kuttl/e2e/password-change/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/02--secret.yaml b/testing/kuttl/e2e/password-change/02-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/02--secret.yaml rename to testing/kuttl/e2e/password-change/02-secret.yaml diff --git a/testing/kuttl/e2e/password-change/03--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/03-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/03--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/03-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/03--psql-connect.yaml b/testing/kuttl/e2e/password-change/03-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/03--psql-connect.yaml rename to testing/kuttl/e2e/password-change/03-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/04--secret.yaml b/testing/kuttl/e2e/password-change/04-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/04--secret.yaml rename to testing/kuttl/e2e/password-change/04-secret.yaml diff --git a/testing/kuttl/e2e/password-change/05--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/05-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/05--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/05-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/05--psql-connect.yaml b/testing/kuttl/e2e/password-change/05-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/05--psql-connect.yaml rename to testing/kuttl/e2e/password-change/05-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/06--cluster.yaml b/testing/kuttl/e2e/password-change/06-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/06--cluster.yaml rename to testing/kuttl/e2e/password-change/06-cluster.yaml diff --git a/testing/kuttl/e2e/password-change/07--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/07-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/07--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/07-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/07--psql-connect.yaml b/testing/kuttl/e2e/password-change/07-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/07--psql-connect.yaml rename to testing/kuttl/e2e/password-change/07-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/08--secret.yaml b/testing/kuttl/e2e/password-change/08-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/08--secret.yaml rename to testing/kuttl/e2e/password-change/08-secret.yaml diff --git a/testing/kuttl/e2e/password-change/09--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/09-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/09--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/09-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/09--psql-connect.yaml b/testing/kuttl/e2e/password-change/09-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/09--psql-connect.yaml rename to testing/kuttl/e2e/password-change/09-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/10--secret.yaml b/testing/kuttl/e2e/password-change/10-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/10--secret.yaml rename to testing/kuttl/e2e/password-change/10-secret.yaml diff --git a/testing/kuttl/e2e/password-change/11--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/11-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/11--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/11-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/11--psql-connect.yaml b/testing/kuttl/e2e/password-change/11-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/11--psql-connect.yaml rename to testing/kuttl/e2e/password-change/11-psql-connect.yaml diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/00--cluster.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/00--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/01--check-backup-logs.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/01-check-backup-logs.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/01--check-backup-logs.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/01-check-backup-logs.yaml diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/02--cluster.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/02-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/02--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/02-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/00--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/00--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/02--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/02-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/02--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/02-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/04--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/04-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/04--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/04-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/06--check-spool-path.yaml b/testing/kuttl/e2e/pgbackrest-init/06-check-spool-path.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/06--check-spool-path.yaml rename to testing/kuttl/e2e/pgbackrest-init/06-check-spool-path.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/01-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/01-create-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/02--create-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/02-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/02--create-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/02-create-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/03--backup.yaml b/testing/kuttl/e2e/pgbackrest-restore/03-backup.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/03--backup.yaml rename to testing/kuttl/e2e/pgbackrest-restore/03-backup.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/04--clone-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/04-clone-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/04--clone-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/04-clone-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/05--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/05-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/05--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/05-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/06--delete-clone.yaml b/testing/kuttl/e2e/pgbackrest-restore/06-delete-clone.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/06--delete-clone.yaml rename to testing/kuttl/e2e/pgbackrest-restore/06-delete-clone.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/07--annotate.yaml b/testing/kuttl/e2e/pgbackrest-restore/07-annotate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/07--annotate.yaml rename to testing/kuttl/e2e/pgbackrest-restore/07-annotate.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/07-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/07-update-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/08--wait-restart.yaml b/testing/kuttl/e2e/pgbackrest-restore/08-wait-restart.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/08--wait-restart.yaml rename to testing/kuttl/e2e/pgbackrest-restore/08-wait-restart.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/09--add-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/09-add-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/09--add-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/09-add-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/10--wait-archived.yaml b/testing/kuttl/e2e/pgbackrest-restore/10-wait-archived.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/10--wait-archived.yaml rename to testing/kuttl/e2e/pgbackrest-restore/10-wait-archived.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/11--clone-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/11-clone-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/11--clone-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/11-clone-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/12--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/12-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/12--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/12-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/13--delete-clone.yaml b/testing/kuttl/e2e/pgbackrest-restore/13-delete-clone.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/13--delete-clone.yaml rename to testing/kuttl/e2e/pgbackrest-restore/13-delete-clone.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/14--lose-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/14-lose-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/14--lose-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/14-lose-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/15--in-place-pitr.yaml b/testing/kuttl/e2e/pgbackrest-restore/15-in-place-pitr.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/15--in-place-pitr.yaml rename to testing/kuttl/e2e/pgbackrest-restore/15-in-place-pitr.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/16--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/16-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/16--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/16-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/17--check-replication.yaml b/testing/kuttl/e2e/pgbackrest-restore/17-check-replication.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/17--check-replication.yaml rename to testing/kuttl/e2e/pgbackrest-restore/17-check-replication.yaml diff --git a/testing/kuttl/e2e/pgbouncer/00--cluster.yaml b/testing/kuttl/e2e/pgbouncer/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/00--cluster.yaml rename to testing/kuttl/e2e/pgbouncer/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbouncer/01--psql-connect.yaml b/testing/kuttl/e2e/pgbouncer/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/01--psql-connect.yaml rename to testing/kuttl/e2e/pgbouncer/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/pgbouncer/10--read-certificate.yaml b/testing/kuttl/e2e/pgbouncer/10-read-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/10--read-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/10-read-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/11--open-connection.yaml b/testing/kuttl/e2e/pgbouncer/11-open-connection.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/11--open-connection.yaml rename to testing/kuttl/e2e/pgbouncer/11-open-connection.yaml diff --git a/testing/kuttl/e2e/pgbouncer/12--rotate-certificate.yaml b/testing/kuttl/e2e/pgbouncer/12-rotate-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/12--rotate-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/12-rotate-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/13--read-certificate.yaml b/testing/kuttl/e2e/pgbouncer/13-read-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/13--read-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/13-read-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/14--compare-certificate.yaml b/testing/kuttl/e2e/pgbouncer/14-compare-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/14--compare-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/14-compare-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/15--check-connection.yaml b/testing/kuttl/e2e/pgbouncer/15-check-connection.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/15--check-connection.yaml rename to testing/kuttl/e2e/pgbouncer/15-check-connection.yaml diff --git a/testing/kuttl/e2e/pgbouncer/16--reconnect.yaml b/testing/kuttl/e2e/pgbouncer/16-reconnect.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/16--reconnect.yaml rename to testing/kuttl/e2e/pgbouncer/16-reconnect.yaml diff --git a/testing/kuttl/e2e/replica-read/00--cluster.yaml b/testing/kuttl/e2e/replica-read/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/replica-read/00--cluster.yaml rename to testing/kuttl/e2e/replica-read/00-cluster.yaml diff --git a/testing/kuttl/e2e/replica-read/01--psql-replica-read.yaml b/testing/kuttl/e2e/replica-read/01-psql-replica-read.yaml similarity index 100% rename from testing/kuttl/e2e/replica-read/01--psql-replica-read.yaml rename to testing/kuttl/e2e/replica-read/01-psql-replica-read.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/00--cluster.yaml b/testing/kuttl/e2e/root-cert-ownership/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/00--cluster.yaml rename to testing/kuttl/e2e/root-cert-ownership/00-cluster.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/01--check-owners.yaml b/testing/kuttl/e2e/root-cert-ownership/01-check-owners.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/01--check-owners.yaml rename to testing/kuttl/e2e/root-cert-ownership/01-check-owners.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/02--delete-owner1.yaml b/testing/kuttl/e2e/root-cert-ownership/02-delete-owner1.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/02--delete-owner1.yaml rename to testing/kuttl/e2e/root-cert-ownership/02-delete-owner1.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/03--check-owners.yaml b/testing/kuttl/e2e/root-cert-ownership/03-check-owners.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/03--check-owners.yaml rename to testing/kuttl/e2e/root-cert-ownership/03-check-owners.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/04--delete-owner2.yaml b/testing/kuttl/e2e/root-cert-ownership/04-delete-owner2.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/04--delete-owner2.yaml rename to testing/kuttl/e2e/root-cert-ownership/04-delete-owner2.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/05--check-secret.yaml b/testing/kuttl/e2e/root-cert-ownership/05-check-secret.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/05--check-secret.yaml rename to testing/kuttl/e2e/root-cert-ownership/05-check-secret.yaml diff --git a/testing/kuttl/e2e/scaledown/00--create-cluster.yaml b/testing/kuttl/e2e/scaledown/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/00--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/01--update-cluster.yaml b/testing/kuttl/e2e/scaledown/01-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/01--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/01-update-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/02--delete-cluster.yaml b/testing/kuttl/e2e/scaledown/02-delete-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/02--delete-cluster.yaml rename to testing/kuttl/e2e/scaledown/02-delete-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/10--create-cluster.yaml b/testing/kuttl/e2e/scaledown/10-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/10--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/10-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/12--update-cluster.yaml b/testing/kuttl/e2e/scaledown/12-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/12--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/12-update-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/13--delete-cluster.yaml b/testing/kuttl/e2e/scaledown/13-delete-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/13--delete-cluster.yaml rename to testing/kuttl/e2e/scaledown/13-delete-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/20--create-cluster.yaml b/testing/kuttl/e2e/scaledown/20-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/20--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/20-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/21--update-cluster.yaml b/testing/kuttl/e2e/scaledown/21-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/21--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/21-update-cluster.yaml diff --git a/testing/kuttl/e2e/security-context/00--cluster.yaml b/testing/kuttl/e2e/security-context/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/security-context/00--cluster.yaml rename to testing/kuttl/e2e/security-context/00-cluster.yaml diff --git a/testing/kuttl/e2e/security-context/01--security-context.yaml b/testing/kuttl/e2e/security-context/01-security-context.yaml similarity index 100% rename from testing/kuttl/e2e/security-context/01--security-context.yaml rename to testing/kuttl/e2e/security-context/01-security-context.yaml diff --git a/testing/kuttl/e2e/security-context/10--kyverno.yaml b/testing/kuttl/e2e/security-context/10-kyverno.yaml similarity index 100% rename from testing/kuttl/e2e/security-context/10--kyverno.yaml rename to testing/kuttl/e2e/security-context/10-kyverno.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/00--create-cluster.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/00--create-cluster.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/01--user-schema.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/01-user-schema.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/01--user-schema.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/01-user-schema.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/02--create-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/02-create-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/02--create-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/02-create-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/04--update-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/04-update-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/04--update-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/04-update-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/00--pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/00-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/00--pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/00-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/01--update-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/01-update-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/01--update-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/01-update-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/02--remove-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/02-remove-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/02--remove-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/02-remove-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/10--manual-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/10-manual-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/10--manual-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/10-manual-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/20--owned-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/20-owned-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/20--owned-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/20-owned-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/21--service-takeover-fails.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/21-service-takeover-fails.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/21--service-takeover-fails.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/21-service-takeover-fails.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/00--create-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/00-create-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/00--create-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/00-create-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/02--edit-pgadmin-users.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/02-edit-pgadmin-users.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/02--edit-pgadmin-users.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/02-edit-pgadmin-users.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/04--change-pgadmin-user-passwords.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/04-change-pgadmin-user-passwords.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/04--change-pgadmin-user-passwords.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/04-change-pgadmin-user-passwords.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/06--delete-pgadmin-users.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/06-delete-pgadmin-users.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/06--delete-pgadmin-users.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/06-delete-pgadmin-users.yaml diff --git a/testing/kuttl/e2e/streaming-standby/00--secrets.yaml b/testing/kuttl/e2e/streaming-standby/00-secrets.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/00--secrets.yaml rename to testing/kuttl/e2e/streaming-standby/00-secrets.yaml diff --git a/testing/kuttl/e2e/streaming-standby/01--primary-cluster.yaml b/testing/kuttl/e2e/streaming-standby/01-primary-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/01--primary-cluster.yaml rename to testing/kuttl/e2e/streaming-standby/01-primary-cluster.yaml diff --git a/testing/kuttl/e2e/streaming-standby/02--create-data.yaml b/testing/kuttl/e2e/streaming-standby/02-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/02--create-data.yaml rename to testing/kuttl/e2e/streaming-standby/02-create-data.yaml diff --git a/testing/kuttl/e2e/streaming-standby/03--standby-cluster.yaml b/testing/kuttl/e2e/streaming-standby/03-standby-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/03--standby-cluster.yaml rename to testing/kuttl/e2e/streaming-standby/03-standby-cluster.yaml diff --git a/testing/kuttl/e2e/streaming-standby/04--check-data.yaml b/testing/kuttl/e2e/streaming-standby/04-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/04--check-data.yaml rename to testing/kuttl/e2e/streaming-standby/04-check-data.yaml diff --git a/testing/kuttl/e2e/switchover/01--cluster.yaml b/testing/kuttl/e2e/switchover/01-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/switchover/01--cluster.yaml rename to testing/kuttl/e2e/switchover/01-cluster.yaml diff --git a/testing/kuttl/e2e/tablespace-enabled/00--cluster.yaml b/testing/kuttl/e2e/tablespace-enabled/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/tablespace-enabled/00--cluster.yaml rename to testing/kuttl/e2e/tablespace-enabled/00-cluster.yaml diff --git a/testing/kuttl/e2e/tablespace-enabled/01--psql-connect.yaml b/testing/kuttl/e2e/tablespace-enabled/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/tablespace-enabled/01--psql-connect.yaml rename to testing/kuttl/e2e/tablespace-enabled/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/00--create-resources.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/00-create-resources.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/00--create-resources.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/00-create-resources.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/01--create-data.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/01-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/01--create-data.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/01-create-data.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/02--shutdown-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/02-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/02--shutdown-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/02-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/03--annotate-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/03-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/03--annotate-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/03-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/04--restart-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/04-restart-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/04--restart-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/04-restart-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-data-and-version.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-data-and-version.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-data-and-version.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-data-and-version.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-spool-path.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-spool-path.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-spool-path.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-spool-path.yaml From 4d584da5bd65eaafa2d99c8244b839b60f225392 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Mon, 24 Nov 2025 15:57:57 -0500 Subject: [PATCH 71/79] kuttl test file names have changed, update references --- testing/kuttl/README.md | 14 -------------- testing/kuttl/e2e/delete-namespace/README.md | 2 +- .../kuttl/e2e/exporter-password-change/README.md | 4 ++-- .../e2e/major-upgrade-missing-image/README.md | 14 +++++++------- testing/kuttl/e2e/scaledown/readme.MD | 16 ++++++++-------- 5 files changed, 18 insertions(+), 32 deletions(-) diff --git a/testing/kuttl/README.md b/testing/kuttl/README.md index 41fbf46e19..a67ff35808 100644 --- a/testing/kuttl/README.md +++ b/testing/kuttl/README.md @@ -44,20 +44,6 @@ There are two ways to run a single test in isolation: - using an env var with the make target: `KUTTL_TEST='kuttl test --test ' make check-kuttl` - using `kubectl kuttl --test` flag: `kubectl kuttl test testing/kuttl/e2e-generated --test ` -### Writing additional tests - -To make it easier to read tests, we want to put our `assert.yaml`/`errors.yaml` files after the -files that create/update the objects for a step. To achieve this, infix an extra `-` between the -step number and the object/step name. - -For example, if the `00` test step wants to create a cluster and then assert that the cluster is ready, -the files would be named - -```yaml -00--cluster.yaml # note the extra `-` to ensure that it sorts above the following file -00-assert.yaml -``` - ### Generating tests KUTTL is good at setting up K8s objects for testing, but does not have a native way to dynamically diff --git a/testing/kuttl/e2e/delete-namespace/README.md b/testing/kuttl/e2e/delete-namespace/README.md index 697e2ae915..4b0f951fef 100644 --- a/testing/kuttl/e2e/delete-namespace/README.md +++ b/testing/kuttl/e2e/delete-namespace/README.md @@ -6,6 +6,6 @@ * Check that nothing remains. Note: KUTTL provides a `$NAMESPACE` var that can be used in scripts/commands, -but which cannot be used in object definition yamls (like `01--cluster.yaml`). +but which cannot be used in object definition yamls (like `01-cluster.yaml`). Therefore, we use a given, non-random namespace that is defined in the makefile and generated with `generate-kuttl`. diff --git a/testing/kuttl/e2e/exporter-password-change/README.md b/testing/kuttl/e2e/exporter-password-change/README.md index 2a5b596309..d3d11f263c 100644 --- a/testing/kuttl/e2e/exporter-password-change/README.md +++ b/testing/kuttl/e2e/exporter-password-change/README.md @@ -1,6 +1,6 @@ # Exporter Password Change -## 00--create-cluster: +## 00-create-cluster: The TestStep will: 1) Apply the `files/inital-postgrescluster.yaml` file to create a cluster with monitoring enabled @@ -13,7 +13,7 @@ The TestStep will: This TestAssert will loop through a script until: 1) the instance pod has the `ContainersReady` condition with status `true` -2) the asserts from `00--create-cluster` are met. +2) the asserts from `00-create-cluster` are met. ## 01-assert: diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/README.md b/testing/kuttl/e2e/major-upgrade-missing-image/README.md index 1053da29ed..ec3d4493b5 100644 --- a/testing/kuttl/e2e/major-upgrade-missing-image/README.md +++ b/testing/kuttl/e2e/major-upgrade-missing-image/README.md @@ -6,31 +6,31 @@ PostgresCluster spec or via the RELATED_IMAGES environment variables. ### Basic PGUpgrade controller and CRD instance validation -* 01--valid-upgrade: create a valid PGUpgrade instance +* 01-valid-upgrade: create a valid PGUpgrade instance * 01-assert: check that the PGUpgrade instance exists and has the expected status ### Verify new statuses for missing required container images -* 10--cluster: create the cluster with an unavailable image (i.e. Postgres 11) +* 10-cluster: create the cluster with an unavailable image (i.e. Postgres 11) * 10-assert: check that the PGUpgrade instance has the expected reason: "PGClusterNotShutdown" * 11-shutdown-cluster: set the spec.shutdown value to 'true' as required for upgrade * 11-assert: check that the new reason is set, "PGClusterPrimaryNotIdentified" ### Update to an available Postgres version, start and upgrade PostgresCluster -* 12--start-and-update-version: update the Postgres version on both CRD instances and set 'shutdown' to false +* 12-start-and-update-version: update the Postgres version on both CRD instances and set 'shutdown' to false * 12-assert: verify that the cluster is running and the PGUpgrade instance now has the new status info with reason: "PGClusterNotShutdown" -* 13--shutdown-cluster: set spec.shutdown to 'true' +* 13-shutdown-cluster: set spec.shutdown to 'true' * 13-assert: check that the PGUpgrade instance has the expected reason: "PGClusterMissingRequiredAnnotation" -* 14--annotate-cluster: set the required annotation +* 14-annotate-cluster: set the required annotation * 14-assert: verify that the upgrade succeeded and the new Postgres version shows in the cluster's status -* 15--start-cluster: set the new Postgres version and spec.shutdown to 'false' +* 15-start-cluster: set the new Postgres version and spec.shutdown to 'false' ### Verify upgraded PostgresCluster * 15-assert: verify that the cluster is running * 16-check-pgbackrest: check that the pgbackrest setup has successfully completed -* 17--check-version: check the version reported by PostgreSQL +* 17-check-version: check the version reported by PostgreSQL * 17-assert: assert the Job from the previous step succeeded diff --git a/testing/kuttl/e2e/scaledown/readme.MD b/testing/kuttl/e2e/scaledown/readme.MD index 44fd880ed1..dd7f8fed7e 100644 --- a/testing/kuttl/e2e/scaledown/readme.MD +++ b/testing/kuttl/e2e/scaledown/readme.MD @@ -8,24 +8,24 @@ have the expected number of pods. ### From two sets to one set -* 00--create-cluster: create the cluster with two instance sets, one replica each +* 00-create-cluster: create the cluster with two instance sets, one replica each * 00-assert: check that the cluster exists with the expected status -* 01--update-cluster: update the cluster to remove one instance set +* 01-update-cluster: update the cluster to remove one instance set * 01-assert: check that the cluster exists with the expected status -* 02--delete-cluster +* 02-delete-cluster ### From one set with multiple replicas to one set with one replica -* 10--create-cluster: create the cluster with one instance set with two replicas +* 10-create-cluster: create the cluster with one instance set with two replicas * 10-assert: check that the cluster exists with the expected status * 11-annotate: set the roles as labels on the pods -* 12--update-cluster: update the cluster to remove one replica +* 12-update-cluster: update the cluster to remove one replica * 12-assert: check that the cluster exists with the expected status; and that the `master` pod that exists was the `master` before the scaledown -* 13--delete-cluster: delete the cluster +* 13-delete-cluster: delete the cluster ### From two sets with variable replicas to two set with one replica each -* 20--create-cluster: create the cluster with two instance sets, with two and one replica +* 20-create-cluster: create the cluster with two instance sets, with two and one replica * 20-assert: check that the cluster exists with the expected status -* 21--update-cluster: update the cluster to reduce the two-replica instance to one-replica +* 21-update-cluster: update the cluster to reduce the two-replica instance to one-replica * 21-assert: check that the cluster exists with the expected status From e38f8d70f556b0fb8915d946a532e78a8253b57b Mon Sep 17 00:00:00 2001 From: ValClarkson Date: Tue, 25 Nov 2025 15:42:35 -0500 Subject: [PATCH 72/79] November 2025 release PGO-2789 --- Makefile | 2 +- config/manager/manager.yaml | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 3554eb52ff..1ec77512ea 100644 --- a/Makefile +++ b/Makefile @@ -228,7 +228,7 @@ generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 16 generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 17 generate-kuttl: export KUTTL_PG_VERSION ?= 16 generate-kuttl: export KUTTL_POSTGIS_VERSION ?= 3.4 -generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2534 +generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547 generate-kuttl: export KUTTL_TEST_DELETE_NAMESPACE ?= kuttl-test-delete-namespace generate-kuttl: ## Generate kuttl tests [ ! -d testing/kuttl/e2e-generated ] || rm -r testing/kuttl/e2e-generated diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 24bf6caadf..81cc2b9160 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,27 +23,27 @@ spec: - name: CRUNCHY_DEBUG value: "true" - name: RELATED_IMAGE_POSTGRES_16 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.10-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.3 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.3-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.11-3.3-2547" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.4-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.11-3.4-2547" - name: RELATED_IMAGE_POSTGRES_17 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547" - name: RELATED_IMAGE_POSTGRES_17_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.4-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.7-3.4-2547" - name: RELATED_IMAGE_PGBACKREST - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547" - name: RELATED_IMAGE_PGBOUNCER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2547" - name: RELATED_IMAGE_PGUPGRADE - value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.6-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.7-2547" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2534" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2547" - name: RELATED_IMAGE_COLLECTOR - value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.3-0" + value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.5-0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } From 406b7d5fbe9822f783a1f93b567a79bbe0b4e66e Mon Sep 17 00:00:00 2001 From: ValClarkson Date: Tue, 25 Nov 2025 15:53:36 -0500 Subject: [PATCH 73/79] updated image version --- config/manager/manager.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 81cc2b9160..fc6133d899 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -37,11 +37,11 @@ spec: - name: RELATED_IMAGE_PGBOUNCER value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2547" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547" - name: RELATED_IMAGE_PGUPGRADE value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.7-2547" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.2-2547" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547" - name: RELATED_IMAGE_COLLECTOR value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.5-0" securityContext: From 133d3da161833ab52fa9e8fa1529658d1bb9e571 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Tue, 25 Nov 2025 12:29:57 -0500 Subject: [PATCH 74/79] Enable github actions for REL_5_8. Use go to install kuttl. --- .github/ISSUE_TEMPLATE/bug_report.md | 60 ++++ .github/ISSUE_TEMPLATE/feature_request.md | 42 +++ .../support---question-and-answer.md | 35 +++ .github/actions/k3d/action.yaml | 94 ++++++ .github/actions/trivy/action.yaml | 138 ++++++++ .github/dependabot.yml | 63 ++++ .github/pull_request_template.md | 30 ++ .github/workflows/codeql-analysis.yaml | 37 +++ .github/workflows/govulncheck.yaml | 50 +++ .github/workflows/lint.yaml | 32 ++ .github/workflows/test.yaml | 199 ++++++++++++ .github/workflows/trivy.yaml | 127 ++++++++ .golangci.next.yaml | 114 +++++-- .golangci.yaml | 296 +++++++++++------- Makefile | 2 +- 15 files changed, 1181 insertions(+), 138 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/support---question-and-answer.md create mode 100644 .github/actions/k3d/action.yaml create mode 100644 .github/actions/trivy/action.yaml create mode 100644 .github/dependabot.yml create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/codeql-analysis.yaml create mode 100644 .github/workflows/govulncheck.yaml create mode 100644 .github/workflows/lint.yaml create mode 100644 .github/workflows/test.yaml create mode 100644 .github/workflows/trivy.yaml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000..30e551a122 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,60 @@ +--- +name: Report a Bug +about: Found an issue? Let us fix it. +--- + +Please ensure you do the following when reporting a bug: + +- [ ] Provide a concise description of what the bug is. +- [ ] Provide information about your environment. +- [ ] Provide clear steps to reproduce the bug. +- [ ] Attach applicable logs. Please do not attach screenshots showing logs unless you are unable to copy and paste the log data. +- [ ] Ensure any code / output examples are [properly formatted](https://docs.github.com/en/github/writing-on-github/basic-writing-and-formatting-syntax#quoting-code) for legibility. + +Note that some logs needed to troubleshoot may be found in the `/pgdata//pg_log` directory on your Postgres instance. + +An incomplete bug report can lead to delays in resolving the issue or the closing of a ticket, so please be as detailed as possible. + +If you are looking for [general support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/), please view the [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) page for where you can ask questions. + +Thanks for reporting the issue, we're looking forward to helping you! + +## Overview + +Add a concise description of what the bug is. + +## Environment + +Please provide the following details: + +- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) +- Platform Version: (e.g. `1.20.3`, `4.7.0`) +- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) +- Postgres Version (e.g. `15`) +- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) + +## Steps to Reproduce + +### REPRO + +Provide steps to get to the error condition: + +1. Run `...` +1. Do `...` +1. Try `...` + +### EXPECTED + +1. Provide the behavior that you expected. + +### ACTUAL + +1. Describe what actually happens + +## Logs + +Please provided appropriate log output or any configuration files that may help troubleshoot the issue. **DO NOT** include sensitive information, such as passwords. + +## Additional Information + +Please provide any additional information that may be helpful. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000000..4de2077c77 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,42 @@ +--- +name: Feature Request +about: Help us improve PGO! +--- + +Have an idea to improve PGO? We'd love to hear it! We're going to need some information from you to learn more about your feature requests. + +Please be sure you've done the following: + +- [ ] Provide a concise description of your feature request. +- [ ] Describe your use case. Detail the problem you are trying to solve. +- [ ] Describe how you envision that the feature would work. +- [ ] Provide general information about your current PGO environment. + +## Overview + +Provide a concise description of your feature request. + +## Use Case + +Describe your use case. Why do you want this feature? What problem will it solve? Why will it help you? Why will it make it easier to use PGO? + +## Desired Behavior + +Describe how the feature would work. How do you envision interfacing with it? + +## Environment + +Tell us about your environment: + +Please provide the following details: + +- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) +- Platform Version: (e.g. `1.20.3`, `4.7.0`) +- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) +- Postgres Version (e.g. `15`) +- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) +- Number of Postgres clusters: (`XYZ`) + +## Additional Information + +Please provide any additional information that may be helpful. diff --git a/.github/ISSUE_TEMPLATE/support---question-and-answer.md b/.github/ISSUE_TEMPLATE/support---question-and-answer.md new file mode 100644 index 0000000000..271caa9029 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/support---question-and-answer.md @@ -0,0 +1,35 @@ +--- +name: Support +about: "Learn how to interact with the PGO community" +--- + +If you believe you have found have found a bug, please open up [Bug Report](https://github.com/CrunchyData/postgres-operator/issues/new?template=bug_report.md) + +If you have a feature request, please open up a [Feature Request](https://github.com/CrunchyData/postgres-operator/issues/new?template=feature_request.md) + +You can find information about general PGO [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) at: + +[https://access.crunchydata.com/documentation/postgres-operator/latest/support/](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) + +## Questions + +For questions that are neither bugs nor feature requests, please be sure to + +- [ ] Provide information about your environment (see below for more information). +- [ ] Provide any steps or other relevant details related to your question. +- [ ] Attach logs, where applicable. Please do not attach screenshots showing logs unless you are unable to copy and paste the log data. +- [ ] Ensure any code / output examples are [properly formatted](https://docs.github.com/en/github/writing-on-github/basic-writing-and-formatting-syntax#quoting-code) for legibility. + +Besides Pod logs, logs may also be found in the `/pgdata/pg/log` directory on your Postgres instance. + +If you are looking for [general support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/), please view the [support](https://access.crunchydata.com/documentation/postgres-operator/latest/support/) page for where you can ask questions. + +### Environment + +Please provide the following details: + +- Platform: (`Kubernetes`, `OpenShift`, `Rancher`, `GKE`, `EKS`, `AKS` etc.) +- Platform Version: (e.g. `1.20.3`, `4.7.0`) +- PGO Image Tag: (e.g. `ubi8-5.x.y-0`) +- Postgres Version (e.g. `15`) +- Storage: (e.g. `hostpath`, `nfs`, or the name of your storage class) diff --git a/.github/actions/k3d/action.yaml b/.github/actions/k3d/action.yaml new file mode 100644 index 0000000000..b6e6ed5c2b --- /dev/null +++ b/.github/actions/k3d/action.yaml @@ -0,0 +1,94 @@ +name: k3d +description: Start k3s using k3d +inputs: + k3d-tag: + default: latest + required: true + description: > + Git tag from https://github.com/k3d-io/k3d/releases or "latest" + k3s-channel: + default: latest + required: true + description: > + https://docs.k3s.io/upgrades/manual#release-channels + prefetch-images: + required: true + description: > + Each line is the name of an image to fetch onto all Kubernetes nodes + prefetch-timeout: + default: 3m + required: true + description: > + Amount of time to wait for images to be fetched + +outputs: + k3d-version: + value: ${{ steps.k3d.outputs.k3d }} + description: > + K3d version + kubernetes-version: + value: ${{ steps.k3s.outputs.server }} + description: > + Kubernetes server version, as reported by the Kubernetes API + pause-image: + value: ${{ steps.k3s.outputs.pause-image }} + description: > + Pause image for prefetch images DaemonSet + +runs: + using: composite + steps: + - id: k3d + name: Install k3d + shell: bash + env: + K3D_TAG: ${{ inputs.k3d-tag }} + run: | + curl --fail --silent https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | + TAG="${K3D_TAG#latest}" bash + k3d version | awk '{ print "${tolower($1)}=${$3}" >> $GITHUB_OUTPUT }' + + - id: k3s + name: Start k3s + shell: bash + run: | + k3d cluster create --image '+${{ inputs.k3s-channel }}' --no-lb --timeout=2m --wait + kubectl version | awk '{ print "${tolower($1)}=${$3}" >> $GITHUB_OUTPUT }' + + PAUSE_IMAGE=$(docker exec $(k3d node list --output json | jq --raw-output 'first.name') \ + k3s agent --help | awk '$1 == "--pause-image" { + match($0, /default: "[^"]*"/); + print substr($0, RSTART+10, RLENGTH-11) + }') + echo "pause-image=${PAUSE_IMAGE}" >> $GITHUB_OUTPUT + + - name: Prefetch container images + shell: bash + env: + INPUT_IMAGES: ${{ inputs.prefetch-images }} + INPUT_TIMEOUT: ${{ inputs.prefetch-timeout }} + run: | + jq <<< "$INPUT_IMAGES" --raw-input 'select(. != "")' | + jq --slurp \ + --arg pause '${{ steps.k3s.outputs.pause-image }}' \ + --argjson labels '{"name":"image-prefetch"}' \ + --argjson name '"image-prefetch"' \ + '{ + apiVersion: "apps/v1", kind: "DaemonSet", + metadata: { name: $name, labels: $labels }, + spec: { + selector: { matchLabels: $labels }, + template: { + metadata: { labels: $labels }, + spec: { + initContainers: to_entries | map({ + name: "c\(.key)", image: .value, command: ["true"], + }), + containers: [{ name: "pause", image: $pause }] + } + } + } + }' | + kubectl create --filename=- + kubectl rollout status daemonset.apps/image-prefetch --timeout "$INPUT_TIMEOUT" || + kubectl describe daemonset.apps/image-prefetch diff --git a/.github/actions/trivy/action.yaml b/.github/actions/trivy/action.yaml new file mode 100644 index 0000000000..bcc67421cb --- /dev/null +++ b/.github/actions/trivy/action.yaml @@ -0,0 +1,138 @@ +# Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# schema documentation: https://docs.github.com/actions/sharing-automations/creating-actions/metadata-syntax-for-github-actions +# yaml-language-server: $schema=https://json.schemastore.org/github-action.json + +name: Trivy +description: Scan this project using Trivy + +# The Trivy team maintains an action, but it has trouble caching its vulnerability data: +# https://github.com/aquasecurity/trivy-action/issues/389 +# +# 1. It caches vulnerability data once per calendar day, despite Trivy wanting +# to download more frequently than that. +# 2. When it fails to download the data, it fails the workflow *and* caches +# the incomplete data. +# 3. When (1) and (2) coincide, every following run that day *must* update the data, +# producing more opportunities for (2) and more failed workflows. +# +# The action below uses any recent cache matching `cache-prefix` and calculates a cache key +# derived from the data Trivy downloads. An older database is better than no scans at all. +# When a run successfully updates the data, that data is cached and available to other runs. + +inputs: + cache: + default: restore,success,use + description: >- + What Trivy data to cache; one or more of restore, save, success, or use. + The value "use" instructs Trivy to read and write to its cache. + The value "restore" loads the Trivy cache from GitHub. + The value "success" saves the Trivy cache to GitHub when Trivy succeeds. + The value "save" saves the Trivy cache to GitHub regardless of Trivy. + + database: + default: update + description: >- + How Trivy should handle its data; one of update or skip. + The value "skip" fetches no Trivy data at all. + + setup: + default: v0.65.0,cache + description: >- + How to install Trivy; one or more of version, none, or cache. + The value "none" does not install Trivy at all. + + cache-directory: + default: ${{ github.workspace }}/.cache/trivy + description: >- + Directory where Trivy should store its data + + cache-prefix: + default: cache-trivy + description: >- + Name (key) where Trivy data should be stored in the GitHub cache + + scan-target: + default: . + description: >- + What Trivy should scan + + scan-type: + default: repository + description: >- + How Trivy should interpret scan-target; one of filesystem, image, repository, or sbom. + +runs: + using: composite + steps: + # Parse list inputs as separated by commas and spaces. + # Select the maximum version-looking string from `inputs.setup`. + - id: parsed + shell: bash + run: | + # Validate inputs + ( + <<< '${{ inputs.cache }}' jq -rRsS '"cache=\(split("[,\\s]+"; "") - [""])"' + <<< '${{ inputs.setup }}' jq -rRsS ' + "setup=\(split("[,\\s]+"; "") - [""])", + "version=\(split("[,\\s]+"; "") | max_by(split("[v.]"; "") | map(tonumber?)))" + ' + ) | tee --append "${GITHUB_OUTPUT}" + + # Install Trivy as requested. + # NOTE: `setup-trivy` can download a "latest" version but cannot cache it. + - if: ${{ ! contains(fromJSON(steps.parsed.outputs.setup), 'none') }} + uses: aquasecurity/setup-trivy@v0.2.4 + with: + cache: ${{ contains(fromJSON(steps.parsed.outputs.setup), 'cache') }} + version: ${{ steps.parsed.outputs.version }} + + # Restore a recent cache beginning with the prefix. + - id: restore + if: ${{ contains(fromJSON(steps.parsed.outputs.cache), 'restore') }} + uses: actions/cache/restore@v4 + with: + path: ${{ inputs.cache-directory }} + key: ${{ inputs.cache-prefix }}- + + - id: trivy + shell: bash + env: + TRIVY_CACHE_DIR: >- + ${{ contains(fromJSON(steps.parsed.outputs.cache), 'use') && inputs.cache-directory || '' }} + TRIVY_SKIP_CHECK_UPDATE: ${{ inputs.database == 'skip' }} + TRIVY_SKIP_DB_UPDATE: ${{ inputs.database == 'skip' }} + TRIVY_SKIP_JAVA_DB_UPDATE: ${{ inputs.database == 'skip' }} + TRIVY_SKIP_VEX_REPO_UPDATE: ${{ inputs.database == 'skip' }} + run: | + # Run Trivy + trivy '${{ inputs.scan-type }}' '${{ inputs.scan-target }}' || result=$? + + checksum=$([[ -z "${TRIVY_CACHE_DIR}" ]] || cat "${TRIVY_CACHE_DIR}/"*/metadata.json | sha256sum) + echo 'cache-key=${{ inputs.cache-prefix }}-'"${checksum%% *}" >> "${GITHUB_OUTPUT}" + + exit "${result-0}" + + # Save updated data to the cache when requested. + - if: >- + ${{ + steps.restore.outcome == 'success' && + steps.restore.outputs.cache-matched-key == steps.trivy.outputs.cache-key + }} + shell: bash + run: | + # Cache hit on ${{ steps.restore.outputs.cache-matched-key }} + - if: >- + ${{ + steps.restore.outputs.cache-matched-key != steps.trivy.outputs.cache-key && + ( + (contains(fromJSON(steps.parsed.outputs.cache), 'save') && !cancelled()) || + (contains(fromJSON(steps.parsed.outputs.cache), 'success') && success()) + ) + }} + uses: actions/cache/save@v4 + with: + key: ${{ steps.trivy.outputs.cache-key }} + path: ${{ inputs.cache-directory }} diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..8a16fc8d6f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,63 @@ +# Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# documentation: https://docs.github.com/code-security/dependabot/dependabot-version-updates +# schema documentation: https://docs.github.com/code-security/dependabot/working-with-dependabot/dependabot-options-reference +# yaml-language-server: $schema=https://json.schemastore.org/dependabot-2.0.json +# +# Dependabot allows only one schedule per package-ecosystem, directory, and target-branch. +# Configurations that lack a "target-branch" field also affect security updates. +# +# There is a hack to have *two* schedules: https://github.com/dependabot/dependabot-core/issues/1778#issuecomment-1988140219 +--- +version: 2 + +updates: + - package-ecosystem: github-actions + directories: + # "/" is a special case that includes ".github/workflows/*" + - '/' + - '.github/actions/*' + registries: '*' + schedule: + interval: weekly + day: tuesday + labels: + - dependencies + groups: + # Group security updates into one pull request + action-vulnerabilities: + applies-to: security-updates + patterns: ['*'] + + # Group version updates into one pull request + github-actions: + applies-to: version-updates + patterns: ['*'] + + - package-ecosystem: gomod + directory: '/' + registries: '*' + schedule: + interval: weekly + day: wednesday + labels: + - dependencies + groups: + # Group security updates into one pull request + go-vulnerabilities: + applies-to: security-updates + patterns: ['*'] + + # Group Kubernetes and OpenTelemetry version updates into separate pull requests + kubernetes: + patterns: ['k8s.io/*', 'sigs.k8s.io/*'] + opentelemetry: + patterns: ['go.opentelemetry.io/*'] + go-dependencies: + patterns: ['*'] + exclude-patterns: + - 'k8s.io/*' + - 'sigs.k8s.io/*' + - 'go.opentelemetry.io/*' diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..b03369bf09 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,30 @@ +**Checklist:** + + + - [ ] Have you added an explanation of what your changes do and why you'd like them to be included? + - [ ] Have you updated or added documentation for the change, as applicable? + - [ ] Have you tested your changes on all related environments with successful results, as applicable? + - [ ] Have you added automated tests? + + + +**Type of Changes:** + + + - [ ] New feature + - [ ] Bug fix + - [ ] Documentation + - [ ] Testing enhancement + - [ ] Other + + +**What is the current behavior (link to any open issues here)?** + + + +**What is the new behavior (if this is a feature change)?** +- [ ] Breaking change (fix or feature that would cause existing functionality to change) + + + +**Other Information**: diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml new file mode 100644 index 0000000000..16a6d85e12 --- /dev/null +++ b/.github/workflows/codeql-analysis.yaml @@ -0,0 +1,37 @@ +# https://codeql.github.com +name: CodeQL + +on: + pull_request: + branches: + - REL_5_8 + push: + branches: + - REL_5_8 + schedule: + - cron: '10 18 * * 2' + +jobs: + analyze: + if: ${{ github.repository == 'CrunchyData/postgres-operator' }} + permissions: + actions: read + contents: read + security-events: write + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: { languages: go } + + - name: Autobuild + # This action calls `make` which runs our "help" target. + uses: github/codeql-action/autobuild@v4 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/govulncheck.yaml b/.github/workflows/govulncheck.yaml new file mode 100644 index 0000000000..6721104401 --- /dev/null +++ b/.github/workflows/govulncheck.yaml @@ -0,0 +1,50 @@ +# https://go.dev/security/vuln +name: govulncheck + +on: + pull_request: + branches: + - REL_5_8 + push: + branches: + - REL_5_8 + +env: + # Use the Go toolchain installed by setup-go + # https://github.com/actions/setup-go/issues/457 + # + # TODO(govulncheck): Remove when "golang/govulncheck-action" uses "actions/setup-go" v6 or newer + GOTOOLCHAIN: local + +jobs: + vulnerabilities: + if: ${{ github.repository == 'CrunchyData/postgres-operator' }} + permissions: + security-events: write + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + # Install Go and produce a SARIF report. This fails only when the tool is + # unable to scan. + - name: Prepare report + uses: golang/govulncheck-action@v1 + with: + output-file: 'govulncheck-results.sarif' + output-format: 'sarif' + repo-checkout: false + + # Submit the SARIF report to GitHub code scanning. Pull request checks + # succeed or fail according to branch protection rules. + # - https://docs.github.com/en/code-security/code-scanning + - name: Upload results to GitHub + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: 'govulncheck-results.sarif' + + # Print any detected vulnerabilities to the workflow log. This step fails + # when the tool detects a vulnerability in code that is called. + # - https://go.dev/blog/govulncheck + - name: Log results + run: govulncheck --format text --show verbose ./... diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000000..230e2a7fae --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,32 @@ +name: Linters + +on: + pull_request: + branches: + - REL_5_8 + +jobs: + golangci-lint: + runs-on: ubuntu-24.04 + permissions: + contents: read + checks: write + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - uses: golangci/golangci-lint-action@v9 + with: + version: latest + args: --timeout=5m + + # Count issues reported by disabled linters. The command always + # exits zero to ensure it does not fail the pull request check. + - name: Count non-blocking issues + run: | + golangci-lint run --config .golangci.next.yaml --show-stats >> "${GITHUB_STEP_SUMMARY}" \ + --max-issues-per-linter=0 \ + --max-same-issues=0 \ + --uniq-by-line=0 \ + --output.text.path=/dev/null ||: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000000..3e0629dd5f --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,199 @@ +name: Tests + +on: + pull_request: + branches: + - REL_5_8 + push: + branches: + - REL_5_8 + +jobs: + go-test: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - name: Ensure go.mod is tidy + run: go mod tidy --diff + - name: Ensure generated files are committed + run: make check-generate + - run: make check + + kubernetes-api: + runs-on: ubuntu-24.04 + needs: [go-test] + strategy: + fail-fast: false + matrix: + kubernetes: ['default'] + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - run: go mod download + - run: ENVTEST_K8S_VERSION="${KUBERNETES#default}" make check-envtest + env: + KUBERNETES: "${{ matrix.kubernetes }}" + GO_TEST: go test --coverprofile 'envtest.coverage' --coverpkg ./internal/... + + # Upload coverage to GitHub + - run: gzip envtest.coverage + - uses: actions/upload-artifact@v5 + with: + name: "~coverage~kubernetes-api=${{ matrix.kubernetes }}" + path: envtest.coverage.gz + retention-days: 1 + + kubernetes-k3d: + if: "${{ github.repository == 'CrunchyData/postgres-operator' }}" + runs-on: ubuntu-24.04 + needs: [go-test] + strategy: + fail-fast: false + matrix: + kubernetes: [v1.30, v1.34] + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - name: Start k3s + uses: ./.github/actions/k3d + with: + k3s-channel: "${{ matrix.kubernetes }}" + prefetch-images: | + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547 + + - run: make createnamespaces check-envtest-existing + env: + PGO_TEST_TIMEOUT_SCALE: 1.2 + GO_TEST: go test --coverprofile 'envtest-existing.coverage' --coverpkg ./internal/... + + # Upload coverage to GitHub + - run: gzip envtest-existing.coverage + - uses: actions/upload-artifact@v5 + with: + name: "~coverage~kubernetes-k3d=${{ matrix.kubernetes }}" + path: envtest-existing.coverage.gz + retention-days: 1 + + e2e-k3d-kuttl: + runs-on: ubuntu-24.04 + needs: [go-test] + strategy: + fail-fast: false + matrix: + kubernetes: [v1.30, v1.34] + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + + - name: Start k3s + uses: ./.github/actions/k3d + with: + k3s-channel: "${{ matrix.kubernetes }}" + prefetch-timeout: 5m + prefetch-images: | + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.1-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547 + + - name: Get pgMonitor files. + run: make get-pgmonitor + env: + PGMONITOR_DIR: "${{ github.workspace }}/hack/tools/pgmonitor" + QUERIES_CONFIG_DIR: "${{ github.workspace }}/hack/tools/queries" + + - run: go mod download + - name: Build executable + run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator + + # Start a Docker container with the working directory mounted. + - name: Start PGO + run: | + kubectl apply --server-side -k ./config/namespace + kubectl apply --server-side -k ./config/dev + hack/create-kubeconfig.sh postgres-operator pgo + docker run --detach --network host --read-only \ + --volume "$(pwd):/mnt" --workdir '/mnt' --env 'PATH=/mnt/bin' \ + --env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \ + --env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \ + --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547' \ + --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547' \ + --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547' \ + --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.1-2547' \ + --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547' \ + --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547' \ + --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547' \ + --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.5-0' \ + --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ + --name 'postgres-operator' ubuntu \ + postgres-operator + + - run: make generate-kuttl + env: + KUTTL_PG_UPGRADE_FROM_VERSION: '16' + KUTTL_PG_UPGRADE_TO_VERSION: '17' + KUTTL_PG_VERSION: '16' + KUTTL_POSTGIS_VERSION: '3.4' + KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547' + - run: | + make check-kuttl && exit + failed=$? + echo '::group::PGO logs'; docker logs 'postgres-operator'; echo '::endgroup::' + exit $failed + + - name: Stop PGO + run: docker stop 'postgres-operator' || true + + coverage-report: + if: ${{ success() || contains(needs.*.result, 'success') }} + runs-on: ubuntu-24.04 + needs: + - kubernetes-api + - kubernetes-k3d + - e2e-k3d-kuttl + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: { go-version: stable } + - uses: actions/download-artifact@v6 + with: { path: download } + + # Combine the coverage profiles by taking the mode line from any one file + # and the data from all files. Write a list of functions with less than + # 100% coverage to the job summary, and upload a complete HTML report. + - name: Generate report + run: | + gunzip --keep download/*/*.gz + ( sed -e '1q' download/*/*.coverage + tail -qn +2 download/*/*.coverage ) > total.coverage + go tool cover --func total.coverage -o total-coverage.txt + go tool cover --html total.coverage -o total-coverage.html + + awk < total-coverage.txt ' + END { print "
Total Coverage: " $3 " " $2 "" } + ' >> "${GITHUB_STEP_SUMMARY}" + + sed < total-coverage.txt -e '/100.0%/d' -e "s,$(go list -m)/,," | column -t | awk ' + NR == 1 { print "\n\n```" } { print } END { if (NR > 0) print "```\n\n"; print "
" } + ' >> "${GITHUB_STEP_SUMMARY}" + + # Upload coverage to GitHub + - run: gzip total-coverage.html + - uses: actions/upload-artifact@v5 + with: + name: coverage-report=html + path: total-coverage.html.gz + retention-days: 15 diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml new file mode 100644 index 0000000000..e3fd63b2ee --- /dev/null +++ b/.github/workflows/trivy.yaml @@ -0,0 +1,127 @@ +# https://aquasecurity.github.io/trivy +name: Trivy + +on: + pull_request: + branches: + - REL_5_8 + push: + branches: + - REL_5_8 + +env: + # Use the committed Trivy configuration files. + TRIVY_IGNOREFILE: .trivyignore.yaml + TRIVY_SECRET_CONFIG: trivy-secret.yaml + +jobs: + cache: + # Run only one of these jobs at a time across the entire project. + concurrency: { group: trivy-cache } + # Do not fail this workflow when this job fails. + continue-on-error: true + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - name: Download Trivy + uses: ./.github/actions/trivy + env: + TRIVY_DEBUG: true + TRIVY_DOWNLOAD_DB_ONLY: true + TRIVY_NO_PROGRESS: true + TRIVY_SCANNERS: license,secret,vuln + with: + cache: restore,success,use + database: update + + licenses: + # Run this job after the cache job regardless of its success or failure. + needs: [cache] + if: >- + ${{ !cancelled() }} + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + # Trivy needs a populated Go module cache to detect Go module licenses. + - uses: actions/setup-go@v6 + with: { go-version: stable } + - run: go mod download + + # Report success only when detected licenses are listed in [.trivyignore.yaml]. + - name: Scan licenses + uses: ./.github/actions/trivy + env: + TRIVY_DEBUG: true + TRIVY_EXIT_CODE: 1 + TRIVY_SCANNERS: license + with: + cache: restore,use + database: skip + + secrets: + # Run this job after the cache job regardless of its success or failure. + needs: [cache] + if: >- + ${{ !cancelled() }} + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + # Report success only when detected secrets are listed in [.trivyignore.yaml]. + - name: Scan secrets + uses: ./.github/actions/trivy + env: + TRIVY_EXIT_CODE: 1 + TRIVY_SCANNERS: secret + with: + cache: restore,use + database: skip + + vulnerabilities: + # Run this job after the cache job regardless of its success or failure. + needs: [cache] + if: >- + ${{ github.repository == 'CrunchyData/postgres-operator' && !cancelled() }} + permissions: + security-events: write + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + # Print any detected secrets or vulnerabilities to the workflow log for + # human consumption. This step fails only when Trivy is unable to scan. + # A later step uploads results to GitHub as a pull request check. + - name: Log detected vulnerabilities + uses: ./.github/actions/trivy + env: + TRIVY_SCANNERS: secret,vuln + with: + cache: restore,use + database: skip + + # Produce a SARIF report of actionable results. This step fails only when + # Trivy is unable to scan. + - name: Report actionable vulnerabilities + uses: ./.github/actions/trivy + env: + TRIVY_IGNORE_UNFIXED: true + TRIVY_FORMAT: 'sarif' + TRIVY_OUTPUT: 'trivy-results.sarif' + TRIVY_SCANNERS: secret,vuln + with: + cache: use + database: skip + setup: none + + # Submit the SARIF report to GitHub code scanning. Pull requests checks + # succeed or fail according to branch protection rules. + # - https://docs.github.com/en/code-security/code-scanning + - name: Upload results to GitHub + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: 'trivy-results.sarif' diff --git a/.golangci.next.yaml b/.golangci.next.yaml index 6b76d7b1d2..2aa389e841 100644 --- a/.golangci.next.yaml +++ b/.golangci.next.yaml @@ -4,39 +4,95 @@ # Rules that should be enforced immediately belong in [.golangci.yaml]. # # Both files are used by [.github/workflows/lint.yaml]. +version: "2" +# https://golangci-lint.run/usage/linters linters: - disable-all: true - enable: - - contextcheck - - err113 - - gocritic - - godot - - godox - - gofumpt - - gosec # exclude-use-default - - nilnil + default: all + disable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - copyloopvar + - depguard + - dupword + - durationcheck + - errchkjson + - errname + - errorlint + - exhaustive + - exptostd + - fatcontext + - forbidigo + - ginkgolinter + - gocheckcompilerdirectives + - gochecksumtype + - goheader + - gomoddirectives + - gomodguard + - goprintffuncname + - gosmopolitan + - grouper + - iface + - importas + - interfacebloat + - intrange + - loggercheck + - makezero + - mirror + - misspell + - musttag + - nilerr + - nilnesserr + - noctx - nolintlint - - predeclared - - revive - - staticcheck # exclude-use-default - - tenv - - thelper - - tparallel + - nosprintfhostport + - prealloc + - promlinter + - protogetter + - reassign + - recvcheck + - rowserrcheck + - sloglint + - spancheck + - sqlclosecheck + - tagalign + - testifylint + - unconvert + - unparam + - usestdlibvars + - usetesting - wastedassign + - wsl + - zerologlint -issues: - exclude-rules: - # We call external linters when they are installed: Flake8, ShellCheck, etc. - - linters: [gosec] - path: '_test[.]go$' - text: 'G204: Subprocess launched with variable' + settings: + thelper: + # https://github.com/kulti/thelper/issues/27 + tb: { begin: true, first: true } + test: { begin: true, first: true, name: true } + + exclusions: + warn-unused: true + # Ignore built-in exclusions + presets: [] + rules: + # We call external linters when they are installed: Flake8, ShellCheck, etc. + - linters: [gosec] + path: '_test[.]go$' + text: 'G204: Subprocess launched with variable' - # https://github.com/golangci/golangci-lint/issues/2239 - exclude-use-default: false +# https://golangci-lint.run/usage/formatters +formatters: + enable: + - gofumpt + +issues: + # Fix only when requested + fix: false -linters-settings: - thelper: - # https://github.com/kulti/thelper/issues/27 - tb: { begin: true, first: true } - test: { begin: true, first: true, name: true } + # Show all issues at once + max-issues-per-linter: 0 + max-same-issues: 0 + uniq-by-line: false diff --git a/.golangci.yaml b/.golangci.yaml index da19e26976..55a54549f6 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,121 +1,201 @@ # https://golangci-lint.run/usage/configuration/ +version: "2" +# https://golangci-lint.run/usage/linters linters: - disable: - - contextcheck - - gci - - gofumpt + default: standard enable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - copyloopvar - depguard + - dupword + - durationcheck + - errchkjson + - errname + - errorlint + - exhaustive + - exptostd + - fatcontext + - forbidigo + - ginkgolinter + - gocheckcompilerdirectives + - gochecksumtype - goheader + - gomoddirectives - gomodguard - - gosimple + - goprintffuncname + - gosec + - gosmopolitan + - grouper + - iface - importas + - interfacebloat + - intrange + - loggercheck + - makezero + - mirror - misspell + - musttag + - nilerr + - nilnesserr + - noctx + - nolintlint + - nosprintfhostport + - prealloc + - promlinter + - protogetter + - reassign + - recvcheck + - rowserrcheck + - sloglint + - spancheck + - sqlclosecheck + - tagalign + - testifylint - unconvert - presets: - - bugs - - format - - unused - -linters-settings: - depguard: + - unparam + - usestdlibvars + - usetesting + - wastedassign + - zerologlint + + settings: + dupword: + ignore: + # We might see duplicate instances of 'fi' if we end two bash 'if' statements + - fi + + depguard: + rules: + everything: + files: ['$all'] + list-mode: lax + allow: + - go.opentelemetry.io/otel/semconv/v1.27.0 + deny: + - pkg: go.opentelemetry.io/otel/semconv + desc: Use "go.opentelemetry.io/otel/semconv/v1.27.0" instead. + - pkg: io/ioutil + desc: Use the "io" and "os" packages instead. See https://go.dev/doc/go1.16#ioutil + - pkg: math/rand$ + desc: Use the "math/rand/v2" package instead. See https://go.dev/doc/go1.22#math_rand_v2 + not-tests: + files: ['!$test','!**/internal/testing/**'] + list-mode: lax + deny: + - pkg: net/http/httptest + desc: Should be used only in tests. + - pkg: testing/* + desc: The "testing" packages should be used only in tests. + - pkg: github.com/crunchydata/postgres-operator/internal/crd/* + desc: The "internal/crd" packages should be used only in tests. + - pkg: github.com/crunchydata/postgres-operator/internal/testing/* + desc: The "internal/testing" packages should be used only in tests. + - pkg: k8s.io/client-go/discovery + desc: Use the "internal/kubernetes" package instead. + tests: + files: ['$test'] + list-mode: lax + deny: + - pkg: github.com/pkg/errors + desc: Use the "errors" package unless you are interacting with stack traces. + + errchkjson: + check-error-free-encoding: true + + goheader: + template: |- + Copyright {{ DATES }} Crunchy Data Solutions, Inc. + + SPDX-License-Identifier: Apache-2.0 + values: + regexp: + DATES: ((201[7-9]|202[0-4]) - 2025|2025) + + gomodguard: + blocked: + modules: + - go.yaml.in/yaml/v2: { recommendations: [sigs.k8s.io/yaml] } + - go.yaml.in/yaml/v3: { recommendations: [sigs.k8s.io/yaml] } + - gopkg.in/yaml.v2: { recommendations: [sigs.k8s.io/yaml] } + - gopkg.in/yaml.v3: { recommendations: [sigs.k8s.io/yaml] } + - gotest.tools: { recommendations: [gotest.tools/v3] } + - k8s.io/kubernetes: + reason: k8s.io/kubernetes is for building kubelet, kubeadm, etc. + + importas: + no-unaliased: true + alias: + - pkg: k8s.io/api/(\w+)/(v[\w\w]+) + alias: $1$2 + - pkg: k8s.io/apimachinery/pkg/apis/(\w+)/(v[\w\d]+) + alias: $1$2 + - pkg: k8s.io/apimachinery/pkg/api/errors + alias: apierrors + + spancheck: + checks: [end, record-error] + extra-start-span-signatures: + - github.com/crunchydata/postgres-operator/internal/tracing.Start:opentelemetry + ignore-check-signatures: + - tracing.Escape + + exclusions: + warn-unused: true + presets: + - common-false-positives + - legacy + - std-error-handling rules: - everything: - list-mode: lax - allow: - - go.opentelemetry.io/otel/semconv/v1.27.0 - deny: - - pkg: go.opentelemetry.io/otel/semconv - desc: Use "go.opentelemetry.io/otel/semconv/v1.27.0" instead. - - - pkg: io/ioutil - desc: > - Use the "io" and "os" packages instead. - See https://go.dev/doc/go1.16#ioutil - - not-tests: - files: ['!$test'] - deny: - - pkg: net/http/httptest - desc: Should be used only in tests. - - - pkg: testing/* - desc: The "testing" packages should be used only in tests. - - - pkg: github.com/crunchydata/postgres-operator/internal/testing/* - desc: The "internal/testing" packages should be used only in tests. - - - pkg: k8s.io/client-go/discovery - desc: Use the "internal/kubernetes" package instead. - - tests: - files: ['$test'] - deny: - - pkg: github.com/pkg/errors - desc: Use the "errors" package unless you are interacting with stack traces. - - errchkjson: - check-error-free-encoding: true - - exhaustive: - default-signifies-exhaustive: true - - goheader: - template: |- - Copyright {{ DATES }} Crunchy Data Solutions, Inc. - - SPDX-License-Identifier: Apache-2.0 - values: - regexp: - DATES: '((201[7-9]|202[0-4]) - 2025|2025)' - - goimports: - local-prefixes: github.com/crunchydata/postgres-operator - - gomodguard: - blocked: - modules: - - gopkg.in/yaml.v2: { recommendations: [sigs.k8s.io/yaml] } - - gopkg.in/yaml.v3: { recommendations: [sigs.k8s.io/yaml] } - - gotest.tools: { recommendations: [gotest.tools/v3] } - - k8s.io/kubernetes: - reason: > - k8s.io/kubernetes is for managing dependencies of the Kubernetes - project, i.e. building kubelet and kubeadm. - - gosec: - excludes: - # Flags for potentially-unsafe casting of ints, similar problem to globally-disabled G103 - - G115 - - importas: - alias: - - pkg: k8s.io/api/(\w+)/(v[\w\w]+) - alias: $1$2 - - pkg: k8s.io/apimachinery/pkg/apis/(\w+)/(v[\w\d]+) - alias: $1$2 - - pkg: k8s.io/apimachinery/pkg/api/errors - alias: apierrors - no-unaliased: true - - spancheck: - checks: [end, record-error] - extra-start-span-signatures: - - 'github.com/crunchydata/postgres-operator/internal/tracing.Start:opentelemetry' - ignore-check-signatures: - - 'tracing.Escape' + # It is fine for tests to use "math/rand" packages. + - linters: [gosec] + path: '(.+)_test[.]go' + text: weak random number generator + + # This internal package is the one place we want to do API discovery. + - linters: [depguard] + path: internal/kubernetes/discovery.go + text: k8s.io/client-go/discovery + + # Postgres HBA rules often include "all all all" + - linters: [dupword] + path: /(hba|postgres)[^/]+$ + text: words \(all\) found + + # These value types have unmarshal methods. + # https://github.com/raeperd/recvcheck/issues/7 + - linters: [recvcheck] + path: internal/pki/pki.go + text: methods of "(Certificate|PrivateKey)" + + - linters: [staticcheck] + text: corev1.(Endpoints|EndpointSubset) is deprecated + + - linters: [staticcheck] + path: internal/controller/ + text: >- + deprecated: Use `RequeueAfter` instead + +# https://golangci-lint.run/usage/formatters +formatters: + enable: + - gci + - gofmt + settings: + gci: + sections: + - standard + - default + - localmodule issues: - exclude-generated: strict - exclude-rules: - # This internal package is the one place we want to do API discovery. - - linters: [depguard] - path: internal/kubernetes/discovery.go - text: k8s.io/client-go/discovery - - # These value types have unmarshal methods. - # https://github.com/raeperd/recvcheck/issues/7 - - linters: [recvcheck] - path: internal/pki/pki.go - text: 'methods of "(Certificate|PrivateKey)"' + # Fix only when requested + fix: false + + # Show all issues at once + max-issues-per-linter: 0 + max-same-issues: 0 + uniq-by-line: false diff --git a/Makefile b/Makefile index 1ec77512ea..ad32ad2f86 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ BUILDAH_BUILD ?= buildah bud GO ?= go GO_BUILD = $(GO) build GO_TEST ?= $(GO) test -KUTTL ?= kubectl-kuttl +KUTTL ?= $(GO) run github.com/kudobuilder/kuttl/cmd/kubectl-kuttl@latest KUTTL_TEST ?= $(KUTTL) test ENVTEST_K8S_VERSION ?= 1.34 From cfe60ab8c69d8047e6204336e4a964e1a56a6b5f Mon Sep 17 00:00:00 2001 From: andrewlecuyer Date: Fri, 9 May 2025 21:24:37 +0000 Subject: [PATCH 75/79] Skip Tests that Write to Job Status During EnvTest Existing Runs --- .../postgrescluster/postgres_test.go | 5 +++++ .../postgrescluster/snapshots_test.go | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/internal/controller/postgrescluster/postgres_test.go b/internal/controller/postgrescluster/postgres_test.go index db33e7f074..e1a1a5da0f 100644 --- a/internal/controller/postgrescluster/postgres_test.go +++ b/internal/controller/postgrescluster/postgres_test.go @@ -9,6 +9,8 @@ import ( "errors" "fmt" "io" + "os" + "strings" "testing" "github.com/go-logr/logr/funcr" @@ -526,6 +528,9 @@ volumeMode: Filesystem }) t.Run("DataVolumeSourceClusterWithGoodSnapshot", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } cluster := testCluster() ns := setupNamespace(t, tClient) cluster.Namespace = ns.Name diff --git a/internal/controller/postgrescluster/snapshots_test.go b/internal/controller/postgrescluster/snapshots_test.go index 4c0ea36761..caa681e3e1 100644 --- a/internal/controller/postgrescluster/snapshots_test.go +++ b/internal/controller/postgrescluster/snapshots_test.go @@ -6,6 +6,8 @@ package postgrescluster import ( "context" + "os" + "strings" "testing" "time" @@ -181,6 +183,9 @@ func TestReconcileVolumeSnapshots(t *testing.T) { }) t.Run("SnapshotsEnabledReadySnapshotsExist", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create a volume snapshot class volumeSnapshotClassName := "my-snapshotclass" volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{ @@ -455,6 +460,9 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledBackupExistsCreateRestore", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() @@ -500,6 +508,9 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledSuccessfulRestoreExists", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() @@ -562,6 +573,9 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledFailedRestoreExists", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() @@ -838,6 +852,9 @@ func TestGetLatestCompleteBackupJob(t *testing.T) { }) t.Run("OneCompleteBackupJob", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } currentTime := metav1.Now() currentStartTime := metav1.NewTime(currentTime.AddDate(0, 0, -1)) @@ -865,6 +882,9 @@ func TestGetLatestCompleteBackupJob(t *testing.T) { }) t.Run("TwoCompleteBackupJobs", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } currentTime := metav1.Now() currentStartTime := metav1.NewTime(currentTime.AddDate(0, 0, -1)) earlierTime := metav1.NewTime(currentTime.AddDate(-1, 0, 0)) From 8d1b3d431d6b67cd292063ad5b68d5559a61ef63 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 3 Dec 2025 11:47:06 -0800 Subject: [PATCH 76/79] bump crypto package to 0.45 and go to 1.24 --- go.mod | 18 +++++++++--------- go.sum | 32 ++++++++++++++++---------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/go.mod b/go.mod index 1216780747..8ac52ebaf2 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,7 @@ module github.com/crunchydata/postgres-operator // If this is changing when you don't want it to, see hack/go-get.sh -go 1.23.0 +go 1.24.0 require ( github.com/go-logr/logr v1.4.2 @@ -19,8 +19,8 @@ require ( go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/sdk v1.32.0 go.opentelemetry.io/otel/trace v1.32.0 - golang.org/x/crypto v0.36.0 - golang.org/x/tools v0.28.0 + golang.org/x/crypto v0.45.0 + golang.org/x/tools v0.38.0 gotest.tools/v3 v3.5.1 k8s.io/api v0.31.0 k8s.io/apimachinery v0.31.0 @@ -102,13 +102,13 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect - golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.38.0 // indirect + golang.org/x/mod v0.29.0 // indirect + golang.org/x/net v0.47.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sync v0.12.0 // indirect - golang.org/x/sys v0.31.0 // indirect - golang.org/x/term v0.30.0 // indirect - golang.org/x/text v0.23.0 // indirect + golang.org/x/sync v0.18.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/term v0.37.0 // indirect + golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect diff --git a/go.sum b/go.sum index 4886398d01..abaef7095e 100644 --- a/go.sum +++ b/go.sum @@ -208,48 +208,48 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= -golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 h1:LoYXNGAShUG3m/ehNk4iFctuhGX/+R1ZpfJ4/ia80JM= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= -golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= -golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= -golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= -golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 7cf192f21b14b1e2ce12334aa9099d8fa25d44a0 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 3 Dec 2025 23:42:30 -0800 Subject: [PATCH 77/79] Adjustments to appease golangci linter --- internal/bridge/client.go | 36 +++++++++--------- internal/bridge/client_test.go | 18 ++++----- internal/bridge/crunchybridgecluster/apply.go | 2 +- .../crunchybridgecluster_controller.go | 6 +-- .../bridge/crunchybridgecluster/delete.go | 2 +- .../crunchybridgecluster/delete_test.go | 4 +- .../crunchybridgecluster/mock_bridge_api.go | 1 - .../bridge/crunchybridgecluster/postgres.go | 5 +-- .../crunchybridgecluster/postgres_test.go | 3 +- internal/bridge/installation_test.go | 10 ++--- internal/config/config.go | 2 +- internal/controller/pgupgrade/jobs.go | 2 +- .../postgrescluster/cluster_test.go | 36 +++++++++--------- .../postgrescluster/controller_ref_manager.go | 2 +- .../postgrescluster/controller_test.go | 1 - .../controller/postgrescluster/instance.go | 4 +- .../postgrescluster/instance_test.go | 1 - .../postgrescluster/patroni_test.go | 14 +++---- .../postgrescluster/pgadmin_test.go | 12 +++--- .../controller/postgrescluster/pgbackrest.go | 18 ++++----- .../postgrescluster/pgbackrest_test.go | 29 ++++++++------- .../postgrescluster/pgbouncer_test.go | 12 +++--- .../controller/postgrescluster/pgmonitor.go | 2 +- internal/controller/postgrescluster/pki.go | 4 +- .../controller/postgrescluster/pki_test.go | 14 +++---- .../controller/postgrescluster/postgres.go | 4 +- .../controller/postgrescluster/snapshots.go | 3 +- .../postgrescluster/snapshots_test.go | 37 +++++++++---------- .../controller/postgrescluster/suite_test.go | 4 +- .../controller/postgrescluster/util_test.go | 11 +++--- .../controller/postgrescluster/volumes.go | 18 ++++----- internal/controller/runtime/runtime.go | 3 +- .../controller/standalone_pgadmin/apply.go | 2 +- .../standalone_pgadmin/configmap.go | 3 +- .../standalone_pgadmin/configmap_test.go | 4 +- .../standalone_pgadmin/controller.go | 4 +- internal/controller/standalone_pgadmin/pod.go | 2 +- .../controller/standalone_pgadmin/related.go | 14 +++---- .../controller/standalone_pgadmin/service.go | 10 ++--- .../standalone_pgadmin/statefulset.go | 9 ++--- .../controller/standalone_pgadmin/users.go | 8 ++-- .../standalone_pgadmin/users_test.go | 20 +++++----- .../controller/standalone_pgadmin/volume.go | 3 +- internal/kubernetes/discovery.go | 2 +- internal/patroni/config.go | 2 +- internal/patroni/config_test.go | 5 +-- internal/pgadmin/config_test.go | 4 +- internal/pgadmin/users_test.go | 2 +- internal/pgbackrest/config.go | 2 +- internal/pgbackrest/config_test.go | 6 +-- internal/pgbackrest/pgbackrest_test.go | 6 +-- internal/pgbackrest/reconcile_test.go | 2 +- internal/pgbackrest/util_test.go | 8 ++-- internal/pgbouncer/config_test.go | 2 +- internal/pgbouncer/postgres.go | 2 +- internal/pki/encoding_test.go | 8 ++-- internal/pki/pki_test.go | 10 ++--- internal/postgres/config_test.go | 24 ++++++------ internal/postgres/exec_test.go | 2 +- internal/postgres/password/md5.go | 1 - internal/postgres/password/scram.go | 2 +- internal/shell/paths_test.go | 2 +- internal/testing/events/recorder.go | 2 +- internal/testing/require/exec.go | 2 +- internal/util/secrets_test.go | 4 +- 65 files changed, 240 insertions(+), 259 deletions(-) diff --git a/internal/bridge/client.go b/internal/bridge/client.go index 9ec13ec2bb..3e3c4c3b4c 100644 --- a/internal/bridge/client.go +++ b/internal/bridge/client.go @@ -280,7 +280,7 @@ func (c *Client) doWithBackoff( request.Header = headers.Clone() //nolint:bodyclose // This response is returned to the caller. - response, err = c.Client.Do(request) + response, err = c.Do(request) } // An error indicates there was no response from the server, and the @@ -327,7 +327,7 @@ func (c *Client) doWithRetry( // Retry the request when the server responds with "Too many requests". // - https://docs.crunchybridge.com/api-concepts/getting-started/#status-codes // - https://docs.crunchybridge.com/api-concepts/getting-started/#rate-limiting - for err == nil && response.StatusCode == 429 { + for err == nil && response.StatusCode == http.StatusTooManyRequests { seconds, _ := strconv.Atoi(response.Header.Get("Retry-After")) // Only retry when the response indicates how long to wait. @@ -378,11 +378,11 @@ func (c *Client) CreateAuthObject(ctx context.Context, authn AuthObject) (AuthOb } // 401, Unauthorized - case response.StatusCode == 401: + case response.StatusCode == http.StatusUnauthorized: err = fmt.Errorf("%w: %s", errAuthentication, body) default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -409,7 +409,7 @@ func (c *Client) CreateInstallation(ctx context.Context) (Installation, error) { } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -445,7 +445,7 @@ func (c *Client) ListClusters(ctx context.Context, apiKey, teamId string) ([]*Cl } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -486,7 +486,7 @@ func (c *Client) CreateCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -524,14 +524,14 @@ func (c *Client) DeleteCluster(ctx context.Context, apiKey, id string) (*Cluster // --https://docs.crunchybridge.com/api-concepts/idempotency#delete-semantics // But also, if we can't find it... // Maybe if no ID we return already deleted? - case response.StatusCode == 410: + case response.StatusCode == http.StatusGone: fallthrough - case response.StatusCode == 404: + case response.StatusCode == http.StatusNotFound: deletedAlready = true err = nil default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -565,7 +565,7 @@ func (c *Client) GetCluster(ctx context.Context, apiKey, id string) (*ClusterApi } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -599,7 +599,7 @@ func (c *Client) GetClusterStatus(ctx context.Context, apiKey, id string) (*Clus } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -633,7 +633,7 @@ func (c *Client) GetClusterUpgrade(ctx context.Context, apiKey, id string) (*Clu } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -674,7 +674,7 @@ func (c *Client) UpgradeCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -709,7 +709,7 @@ func (c *Client) UpgradeClusterHA(ctx context.Context, apiKey, id, action string } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -747,7 +747,7 @@ func (c *Client) UpdateCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -777,7 +777,7 @@ func (c *Client) GetClusterRole(ctx context.Context, apiKey, clusterId, roleName } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -807,7 +807,7 @@ func (c *Client) ListClusterRoles(ctx context.Context, apiKey, id string) ([]*Cl } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } diff --git a/internal/bridge/client_test.go b/internal/bridge/client_test.go index 6b464c05b3..f1aa1c8ddd 100644 --- a/internal/bridge/client_test.go +++ b/internal/bridge/client_test.go @@ -31,8 +31,8 @@ func TestClientBackoff(t *testing.T) { client := NewClient("", "") var total time.Duration - for i := 1; i <= 50 && client.Backoff.Steps > 0; i++ { - step := client.Backoff.Step() + for i := 1; i <= 50 && client.Steps > 0; i++ { + step := client.Step() total += step t.Logf("%02d:%20v%20v", i, step, total) @@ -68,7 +68,7 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with one attempt, i.e. no backoff. client := NewClient(server.URL, "xyz") - client.Backoff.Steps = 1 + client.Steps = 1 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -113,8 +113,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 5 + client.Duration = time.Millisecond + client.Steps = 5 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -170,8 +170,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 5 + client.Duration = time.Millisecond + client.Steps = 5 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -190,8 +190,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with lots of brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 100 + client.Duration = time.Millisecond + client.Steps = 100 assert.Equal(t, client.BaseURL.String(), server.URL) ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) diff --git a/internal/bridge/crunchybridgecluster/apply.go b/internal/bridge/crunchybridgecluster/apply.go index baffd16516..6edd870790 100644 --- a/internal/bridge/crunchybridgecluster/apply.go +++ b/internal/bridge/crunchybridgecluster/apply.go @@ -22,7 +22,7 @@ func (r *CrunchyBridgeClusterReconciler) patch( patch client.Patch, options ...client.PatchOption, ) error { options = append([]client.PatchOption{r.Owner}, options...) - return r.Client.Patch(ctx, object, patch, options...) + return r.Patch(ctx, object, patch, options...) } // apply sends an apply patch to object's endpoint in the Kubernetes API and diff --git a/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go b/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go index 2e81e7f113..ec9973ade1 100644 --- a/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go +++ b/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go @@ -91,7 +91,7 @@ func (r *CrunchyBridgeClusterReconciler) SetupWithManager( func (r *CrunchyBridgeClusterReconciler) setControllerReference( owner *v1beta1.CrunchyBridgeCluster, controlled client.Object, ) error { - return controllerutil.SetControllerReference(owner, controlled, r.Client.Scheme()) + return controllerutil.SetControllerReference(owner, controlled, r.Scheme()) } //+kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="crunchybridgeclusters",verbs={get,patch,update} @@ -684,7 +684,7 @@ func (r *CrunchyBridgeClusterReconciler) GetSecretKeys( }} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) + r.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if err == nil { if existing.Data["key"] != nil && existing.Data["team"] != nil { @@ -707,7 +707,7 @@ func (r *CrunchyBridgeClusterReconciler) deleteControlled( version := object.GetResourceVersion() exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} - return r.Client.Delete(ctx, object, exactly) + return r.Delete(ctx, object, exactly) } return nil diff --git a/internal/bridge/crunchybridgecluster/delete.go b/internal/bridge/crunchybridgecluster/delete.go index b0a957a0ec..ae44c8036b 100644 --- a/internal/bridge/crunchybridgecluster/delete.go +++ b/internal/bridge/crunchybridgecluster/delete.go @@ -28,7 +28,7 @@ func (r *CrunchyBridgeClusterReconciler) handleDelete( log := ctrl.LoggerFrom(ctx) // If the CrunchyBridgeCluster isn't being deleted, add the finalizer - if crunchybridgecluster.ObjectMeta.DeletionTimestamp.IsZero() { + if crunchybridgecluster.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(crunchybridgecluster, finalizer) { controllerutil.AddFinalizer(crunchybridgecluster, finalizer) if err := r.Update(ctx, crunchybridgecluster); err != nil { diff --git a/internal/bridge/crunchybridgecluster/delete_test.go b/internal/bridge/crunchybridgecluster/delete_test.go index c04daaa131..c86746ef1b 100644 --- a/internal/bridge/crunchybridgecluster/delete_test.go +++ b/internal/bridge/crunchybridgecluster/delete_test.go @@ -65,7 +65,7 @@ func TestHandleDeleteCluster(t *testing.T) { // Get cluster from kubernetes and assert that the deletion timestamp was added assert.NilError(t, tClient.Get(ctx, client.ObjectKeyFromObject(cluster), cluster)) - assert.Check(t, !cluster.ObjectMeta.DeletionTimestamp.IsZero()) + assert.Check(t, !cluster.DeletionTimestamp.IsZero()) // Note: We must run handleDelete multiple times because we don't want to remove the // finalizer until we're sure that the cluster has been deleted from Bridge, so we @@ -107,7 +107,7 @@ func TestHandleDeleteCluster(t *testing.T) { // Get cluster from kubernetes and assert that the deletion timestamp was added assert.NilError(t, tClient.Get(ctx, client.ObjectKeyFromObject(cluster), cluster)) - assert.Check(t, !cluster.ObjectMeta.DeletionTimestamp.IsZero()) + assert.Check(t, !cluster.DeletionTimestamp.IsZero()) // Run handleDelete again to attempt to delete from Bridge, but provide bad api key cluster.Status.ID = "2345" diff --git a/internal/bridge/crunchybridgecluster/mock_bridge_api.go b/internal/bridge/crunchybridgecluster/mock_bridge_api.go index f0841dee44..f0439531d1 100644 --- a/internal/bridge/crunchybridgecluster/mock_bridge_api.go +++ b/internal/bridge/crunchybridgecluster/mock_bridge_api.go @@ -13,7 +13,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/bridge" "github.com/crunchydata/postgres-operator/internal/initialize" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) diff --git a/internal/bridge/crunchybridgecluster/postgres.go b/internal/bridge/crunchybridgecluster/postgres.go index a1431ca93f..80096de91b 100644 --- a/internal/bridge/crunchybridgecluster/postgres.go +++ b/internal/bridge/crunchybridgecluster/postgres.go @@ -11,7 +11,6 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -93,7 +92,7 @@ func (r *CrunchyBridgeClusterReconciler) reconcilePostgresRoleSecrets( // Make sure that this cluster's role secret names are not being used by any other // secrets in the namespace allSecretsInNamespace := &corev1.SecretList{} - err := errors.WithStack(r.Client.List(ctx, allSecretsInNamespace, client.InNamespace(cluster.Namespace))) + err := errors.WithStack(r.List(ctx, allSecretsInNamespace, client.InNamespace(cluster.Namespace))) if err != nil { return nil, nil, err } @@ -116,7 +115,7 @@ func (r *CrunchyBridgeClusterReconciler) reconcilePostgresRoleSecrets( selector, err := naming.AsSelector(naming.CrunchyBridgeClusterPostgresRoles(cluster.Name)) if err == nil { err = errors.WithStack( - r.Client.List(ctx, secrets, + r.List(ctx, secrets, client.InNamespace(cluster.Namespace), client.MatchingLabelsSelector{Selector: selector}, )) diff --git a/internal/bridge/crunchybridgecluster/postgres_test.go b/internal/bridge/crunchybridgecluster/postgres_test.go index e9454bd4ee..6fae4fe26a 100644 --- a/internal/bridge/crunchybridgecluster/postgres_test.go +++ b/internal/bridge/crunchybridgecluster/postgres_test.go @@ -8,12 +8,11 @@ import ( "context" "testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "gotest.tools/v3/assert" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/crunchydata/postgres-operator/internal/bridge" "github.com/crunchydata/postgres-operator/internal/testing/require" diff --git a/internal/bridge/installation_test.go b/internal/bridge/installation_test.go index 766233b8bb..f7a86e2d3a 100644 --- a/internal/bridge/installation_test.go +++ b/internal/bridge/installation_test.go @@ -99,7 +99,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -155,7 +155,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -289,7 +289,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -343,7 +343,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -426,7 +426,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } diff --git a/internal/config/config.go b/internal/config/config.go index cc72b921ed..ed8d87c5d0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -173,7 +173,7 @@ func VerifyImageValues(cluster *v1beta1.PostgresCluster) error { } if len(images) > 0 { - return fmt.Errorf("Missing image(s): %s", images) + return fmt.Errorf("missing image(s): %s", images) } return nil diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index 4879209734..53420cb8fe 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -354,7 +354,7 @@ func pgUpgradeContainerImage(upgrade *v1beta1.PGUpgrade) string { // spec is defined. If it is undefined, an error is returned. func verifyUpgradeImageValue(upgrade *v1beta1.PGUpgrade) error { if pgUpgradeContainerImage(upgrade) == "" { - return fmt.Errorf("Missing crunchy-upgrade image") + return fmt.Errorf("missing crunchy-upgrade image") } return nil } diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index a38a128086..5fa92d32cf 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -137,8 +137,8 @@ func TestCustomLabels(t *testing.T) { t.Run("Cluster", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "global-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "global-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "daisy-instance1", Replicas: initialize.Int32(1), @@ -185,8 +185,8 @@ func TestCustomLabels(t *testing.T) { t.Run("Instance", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "instance-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "instance-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "max-instance", Replicas: initialize.Int32(1), @@ -236,8 +236,8 @@ func TestCustomLabels(t *testing.T) { t.Run("PGBackRest", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbackrest-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbackrest-cluster" + cluster.Namespace = ns.Name cluster.Spec.Backups.PGBackRest.Metadata = &v1beta1.Metadata{ Labels: map[string]string{"my.pgbackrest.label": "lucy"}, } @@ -280,8 +280,8 @@ func TestCustomLabels(t *testing.T) { t.Run("PGBouncer", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbouncer-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbouncer-cluster" + cluster.Namespace = ns.Name cluster.Spec.Proxy.PGBouncer.Metadata = &v1beta1.Metadata{ Labels: map[string]string{"my.pgbouncer.label": "lucy"}, } @@ -375,8 +375,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("Cluster", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "global-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "global-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "daisy-instance1", Replicas: initialize.Int32(1), @@ -424,8 +424,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("Instance", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "instance-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "instance-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "max-instance", Replicas: initialize.Int32(1), @@ -475,8 +475,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("PGBackRest", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbackrest-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbackrest-cluster" + cluster.Namespace = ns.Name cluster.Spec.Backups.PGBackRest.Metadata = &v1beta1.Metadata{ Annotations: map[string]string{"my.pgbackrest.annotation": "lucy"}, } @@ -519,8 +519,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("PGBouncer", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbouncer-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbouncer-cluster" + cluster.Namespace = ns.Name cluster.Spec.Proxy.PGBouncer.Metadata = &v1beta1.Metadata{ Annotations: map[string]string{"my.pgbouncer.annotation": "lucy"}, } @@ -768,12 +768,12 @@ type: ClusterIP assert.NilError(t, err) // Annotations present in the metadata. - assert.Assert(t, cmp.MarshalMatches(service.ObjectMeta.Annotations, ` + assert.Assert(t, cmp.MarshalMatches(service.Annotations, ` some: note `)) // Labels present in the metadata. - assert.Assert(t, cmp.MarshalMatches(service.ObjectMeta.Labels, ` + assert.Assert(t, cmp.MarshalMatches(service.Labels, ` happy: label postgres-operator.crunchydata.com/cluster: pg2 postgres-operator.crunchydata.com/role: replica diff --git a/internal/controller/postgrescluster/controller_ref_manager.go b/internal/controller/postgrescluster/controller_ref_manager.go index 36f3b67d6d..d229728b12 100644 --- a/internal/controller/postgrescluster/controller_ref_manager.go +++ b/internal/controller/postgrescluster/controller_ref_manager.go @@ -88,7 +88,7 @@ func (r *Reconciler) claimObject(ctx context.Context, postgresCluster *v1beta1.P // At this point the resource has no controller ref and is therefore an orphan. Ignore if // either the PostgresCluster resource or the orphaned resource is being deleted, or if the selector - // for the orphaned resource doesn't doesn't include the proper PostgresCluster label + // for the orphaned resource doesn't include the proper PostgresCluster label _, hasPGClusterLabel := obj.GetLabels()[naming.LabelCluster] if postgresCluster.GetDeletionTimestamp() != nil || !hasPGClusterLabel { return nil diff --git a/internal/controller/postgrescluster/controller_test.go b/internal/controller/postgrescluster/controller_test.go index 243baef94c..4d561d4c7f 100644 --- a/internal/controller/postgrescluster/controller_test.go +++ b/internal/controller/postgrescluster/controller_test.go @@ -14,7 +14,6 @@ import ( . "github.com/onsi/gomega" . "github.com/onsi/gomega/gstruct" "github.com/pkg/errors" //nolint:depguard // This legacy test covers so much code, it logs the origin of unexpected errors. - "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 0c4f506c35..d2ac4e3bb5 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -335,7 +335,7 @@ func (r *Reconciler) observeInstances( status.DesiredPGDataVolume = make(map[string]string) for _, instance := range observed.bySet[name] { - status.Replicas += int32(len(instance.Pods)) //nolint:gosec + status.Replicas += int32(len(instance.Pods)) if ready, known := instance.IsReady(); known && ready { status.ReadyReplicas++ @@ -752,7 +752,7 @@ func findAvailableInstanceNames(set v1beta1.PostgresInstanceSetSpec, } // Determine whether or not the PVC is associated with an existing instance within the same - // instance set. If not, then the instance name associated with that PVC can be be reused. + // instance set. If not, then the instance name associated with that PVC can be reused. for _, pvc := range setVolumes { pvcInstanceName := pvc.GetLabels()[naming.LabelInstance] instance := observedInstances.byName[pvcInstanceName] diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index 5eb2e479dd..83afc6d20f 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -1581,7 +1581,6 @@ func TestGenerateInstanceStatefulSetIntent(t *testing.T) { `)) }, }} { - test := test t.Run(test.name, func(t *testing.T) { cluster := test.ip.cluster diff --git a/internal/controller/postgrescluster/patroni_test.go b/internal/controller/postgrescluster/patroni_test.go index 85cd2dddb7..728b75aee3 100644 --- a/internal/controller/postgrescluster/patroni_test.go +++ b/internal/controller/postgrescluster/patroni_test.go @@ -97,12 +97,12 @@ ownerReferences: assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "pg2", "postgres-operator.crunchydata.com/patroni": "pg2-ha", @@ -125,13 +125,13 @@ ownerReferences: assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "pg2", @@ -472,15 +472,15 @@ func TestReconcilePatroniStatus(t *testing.T) { ObjectMeta: naming.PatroniDistributedConfiguration(postgresCluster), } if writeAnnotation { - endpoints.ObjectMeta.Annotations = make(map[string]string) - endpoints.ObjectMeta.Annotations["initialize"] = systemIdentifier + endpoints.Annotations = make(map[string]string) + endpoints.Annotations["initialize"] = systemIdentifier } assert.NilError(t, tClient.Create(ctx, endpoints, &client.CreateOptions{})) instance := &Instance{ Name: instanceName, Runner: runner, } - for i := 0; i < readyReplicas; i++ { + for range readyReplicas { instance.Pods = append(instance.Pods, &corev1.Pod{ Status: corev1.PodStatus{ Conditions: []corev1.PodCondition{{ diff --git a/internal/controller/postgrescluster/pgadmin_test.go b/internal/controller/postgrescluster/pgadmin_test.go index f4be61a8bb..1d0a305b2a 100644 --- a/internal/controller/postgrescluster/pgadmin_test.go +++ b/internal/controller/postgrescluster/pgadmin_test.go @@ -104,12 +104,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, configmap.Annotations, map[string]string{ "a": "v5", "b": "v2", "e": "v6", }) // Labels present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, configmap.Labels, map[string]string{ "c": "v7", "d": "v4", "f": "v8", "postgres-operator.crunchydata.com/cluster": "pg1", "postgres-operator.crunchydata.com/role": "pgadmin", @@ -194,12 +194,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "my-cluster", "postgres-operator.crunchydata.com/role": "pgadmin", @@ -225,13 +225,13 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "my-cluster", diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index e77456af63..5c84f2a22a 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -1215,7 +1215,7 @@ func (r *Reconciler) reconcileRestoreJob(ctx context.Context, var deltaOptFound, foundTarget bool for _, opt := range opts { switch { - case targetRegex.Match([]byte(opt)): + case targetRegex.MatchString(opt): foundTarget = true case strings.Contains(opt, "--delta"): deltaOptFound = true @@ -2303,7 +2303,7 @@ func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context, if isCreate { r.Recorder.Eventf(postgresCluster, corev1.EventTypeNormal, EventRepoHostCreated, - "created pgBackRest repository host %s/%s", repoHost.TypeMeta.Kind, repoHostName) + "created pgBackRest repository host %s/%s", repoHost.Kind, repoHostName) } return repoHost, nil @@ -2489,7 +2489,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, backupJob := &batchv1.Job{} backupJob.ObjectMeta = naming.PGBackRestBackupJob(postgresCluster) if currentBackupJob != nil { - backupJob.ObjectMeta.Name = currentBackupJob.ObjectMeta.Name + backupJob.Name = currentBackupJob.Name } var labels, annotations map[string]string @@ -2502,8 +2502,8 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, map[string]string{ naming.PGBackRestBackup: manualAnnotation, }) - backupJob.ObjectMeta.Labels = labels - backupJob.ObjectMeta.Annotations = annotations + backupJob.Labels = labels + backupJob.Annotations = annotations spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) @@ -2665,7 +2665,7 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, backupJob := &batchv1.Job{} backupJob.ObjectMeta = naming.PGBackRestBackupJob(postgresCluster) if job != nil { - backupJob.ObjectMeta.Name = job.ObjectMeta.Name + backupJob.Name = job.Name } var labels, annotations map[string]string @@ -2679,8 +2679,8 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, naming.PGBackRestCurrentConfig: containerName, naming.PGBackRestConfigHash: configHash, }) - backupJob.ObjectMeta.Labels = labels - backupJob.ObjectMeta.Annotations = annotations + backupJob.Labels = labels + backupJob.Annotations = annotations spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, serviceAccount.GetName(), labels, annotations) @@ -2847,7 +2847,7 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, } // Don't record event or return an error if configHashMismatch is true, since this just means // configuration changes in ConfigMaps/Secrets have not yet propagated to the container. - // Therefore, just log an an info message and return an error to requeue and try again. + // Therefore, just log an info message and return an error to requeue and try again. if configHashMismatch { return true, nil diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index f746b14597..5e5e43737c 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -464,9 +464,10 @@ topologySpreadConstraints: var instanceConfFound, dedicatedRepoConfFound bool for k, v := range config.Data { if v != "" { - if k == pgbackrest.CMInstanceKey { + switch k { + case pgbackrest.CMInstanceKey: instanceConfFound = true - } else if k == pgbackrest.CMRepoKey { + case pgbackrest.CMRepoKey: dedicatedRepoConfFound = true } } @@ -963,7 +964,7 @@ func TestReconcileReplicaCreateBackup(t *testing.T) { var foundOwnershipRef bool // verify ownership refs - for _, ref := range backupJob.ObjectMeta.GetOwnerReferences() { + for _, ref := range backupJob.GetOwnerReferences() { if ref.Name == clusterName { foundOwnershipRef = true break @@ -3227,11 +3228,11 @@ func TestGenerateRestoreJobIntent(t *testing.T) { t.Run(fmt.Sprintf("openshift-%v", openshift), func(t *testing.T) { t.Run("ObjectMeta", func(t *testing.T) { t.Run("Name", func(t *testing.T) { - assert.Equal(t, job.ObjectMeta.Name, + assert.Equal(t, job.Name, naming.PGBackRestRestoreJob(cluster).Name) }) t.Run("Namespace", func(t *testing.T) { - assert.Equal(t, job.ObjectMeta.Namespace, + assert.Equal(t, job.Namespace, naming.PGBackRestRestoreJob(cluster).Namespace) }) t.Run("Annotations", func(t *testing.T) { @@ -3454,15 +3455,15 @@ func TestObserveRestoreEnv(t *testing.T) { createResources: func(t *testing.T, cluster *v1beta1.PostgresCluster) { fakeLeaderEP := &corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeLeaderEP)) fakeDCSEP := &corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeDCSEP)) fakeFailoverEP := &corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeFailoverEP)) job := generateJob(cluster.Name, initialize.Bool(false), initialize.Bool(false)) @@ -3478,15 +3479,15 @@ func TestObserveRestoreEnv(t *testing.T) { createResources: func(t *testing.T, cluster *v1beta1.PostgresCluster) { fakeLeaderEP := &corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeLeaderEP)) fakeDCSEP := &corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeDCSEP)) fakeFailoverEP := &corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeFailoverEP)) }, result: testResult{ @@ -3656,15 +3657,15 @@ func TestPrepareForRestore(t *testing.T) { cluster *v1beta1.PostgresCluster) (*batchv1.Job, []corev1.Endpoints) { fakeLeaderEP := corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeLeaderEP)) fakeDCSEP := corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeDCSEP)) fakeFailoverEP := corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeFailoverEP)) return nil, []corev1.Endpoints{fakeLeaderEP, fakeDCSEP, fakeFailoverEP} }, diff --git a/internal/controller/postgrescluster/pgbouncer_test.go b/internal/controller/postgrescluster/pgbouncer_test.go index 3785a50695..6d389c3bad 100644 --- a/internal/controller/postgrescluster/pgbouncer_test.go +++ b/internal/controller/postgrescluster/pgbouncer_test.go @@ -105,12 +105,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "pg7", "postgres-operator.crunchydata.com/role": "pgbouncer", @@ -136,13 +136,13 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "pg7", @@ -420,12 +420,12 @@ namespace: ns3 assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, deploy.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, deploy.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, deploy.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, deploy.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "test-cluster", "postgres-operator.crunchydata.com/role": "pgbouncer", diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index cac1bd2057..37ded3ff66 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -360,7 +360,7 @@ func addPGMonitorExporterToInstancePodSpec( }, }, } - configVolume.VolumeSource.Projected.Sources = append(configVolume.VolumeSource.Projected.Sources, + configVolume.Projected.Sources = append(configVolume.Projected.Sources, defaultConfigVolumeProjection) } diff --git a/internal/controller/postgrescluster/pki.go b/internal/controller/postgrescluster/pki.go index 787daef212..d52d6a75da 100644 --- a/internal/controller/postgrescluster/pki.go +++ b/internal/controller/postgrescluster/pki.go @@ -63,7 +63,7 @@ func (r *Reconciler) reconcileRootCertificate( intent.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret")) intent.Namespace, intent.Name = cluster.Namespace, naming.RootCertSecret intent.Data = make(map[string][]byte) - intent.ObjectMeta.OwnerReferences = existing.ObjectMeta.OwnerReferences + intent.OwnerReferences = existing.OwnerReferences // A root secret is scoped to the namespace where postgrescluster(s) // are deployed. For operator deployments with postgresclusters in more than @@ -140,7 +140,7 @@ func (r *Reconciler) reconcileClusterCertificate( intent := &corev1.Secret{ObjectMeta: naming.PostgresTLSSecret(cluster)} intent.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret")) intent.Data = make(map[string][]byte) - intent.ObjectMeta.OwnerReferences = existing.ObjectMeta.OwnerReferences + intent.OwnerReferences = existing.OwnerReferences intent.Annotations = naming.Merge(cluster.Spec.Metadata.GetAnnotationsOrNil()) intent.Labels = naming.Merge( diff --git a/internal/controller/postgrescluster/pki_test.go b/internal/controller/postgrescluster/pki_test.go index a234292eb8..0cb5f15a99 100644 --- a/internal/controller/postgrescluster/pki_test.go +++ b/internal/controller/postgrescluster/pki_test.go @@ -89,7 +89,7 @@ func TestReconcileCerts(t *testing.T) { err := tClient.Get(ctx, client.ObjectKeyFromObject(rootSecret), rootSecret) assert.NilError(t, err) - assert.Check(t, len(rootSecret.ObjectMeta.OwnerReferences) == 1, "first owner reference not set") + assert.Check(t, len(rootSecret.OwnerReferences) == 1, "first owner reference not set") expectedOR := metav1.OwnerReference{ APIVersion: "postgres-operator.crunchydata.com/v1beta1", @@ -98,8 +98,8 @@ func TestReconcileCerts(t *testing.T) { UID: cluster1.UID, } - if len(rootSecret.ObjectMeta.OwnerReferences) > 0 { - assert.Equal(t, rootSecret.ObjectMeta.OwnerReferences[0], expectedOR) + if len(rootSecret.OwnerReferences) > 0 { + assert.Equal(t, rootSecret.OwnerReferences[0], expectedOR) } }) @@ -114,7 +114,7 @@ func TestReconcileCerts(t *testing.T) { clist := &v1beta1.PostgresClusterList{} assert.NilError(t, tClient.List(ctx, clist)) - assert.Check(t, len(rootSecret.ObjectMeta.OwnerReferences) == 2, "second owner reference not set") + assert.Check(t, len(rootSecret.OwnerReferences) == 2, "second owner reference not set") expectedOR := metav1.OwnerReference{ APIVersion: "postgres-operator.crunchydata.com/v1beta1", @@ -123,8 +123,8 @@ func TestReconcileCerts(t *testing.T) { UID: cluster2.UID, } - if len(rootSecret.ObjectMeta.OwnerReferences) > 1 { - assert.Equal(t, rootSecret.ObjectMeta.OwnerReferences[1], expectedOR) + if len(rootSecret.OwnerReferences) > 1 { + assert.Equal(t, rootSecret.OwnerReferences[1], expectedOR) } }) @@ -301,7 +301,7 @@ func TestReconcileCerts(t *testing.T) { testSecret := &corev1.Secret{} testSecret.Namespace, testSecret.Name = namespace, "newcustomsecret" // simulate cluster spec update - cluster2.Spec.CustomTLSSecret.LocalObjectReference.Name = "newcustomsecret" + cluster2.Spec.CustomTLSSecret.Name = "newcustomsecret" // get the expected secret projection testSecretProjection := clusterCertSecretProjection(testSecret) diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index 6351e18f84..10901e10dd 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -571,7 +571,7 @@ func (r *Reconciler) reconcilePostgresUserSecrets( // If both secrets have "pguser" or neither have "pguser", // sort by creation timestamp - return secrets.Items[i].CreationTimestamp.Time.After(secrets.Items[j].CreationTimestamp.Time) + return secrets.Items[i].CreationTimestamp.After(secrets.Items[j].CreationTimestamp.Time) }) // Index secrets by PostgreSQL user name and delete any that are not in the @@ -644,11 +644,11 @@ func (r *Reconciler) reconcilePostgresUsersInPostgreSQL( running, known := instance.IsRunning(container) if running && known && len(instance.Pods) > 0 { pod := instance.Pods[0] - ctx = logging.NewContext(ctx, logging.FromContext(ctx).WithValues("pod", pod.Name)) podExecutor = func( ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error { + ctx = logging.NewContext(ctx, logging.FromContext(ctx).WithValues("pod", pod.Name)) return r.PodExec(ctx, pod.Namespace, pod.Name, container, stdin, stdout, stderr, command...) } break diff --git a/internal/controller/postgrescluster/snapshots.go b/internal/controller/postgrescluster/snapshots.go index 8f36cefdfc..ff00928d6b 100644 --- a/internal/controller/postgrescluster/snapshots.go +++ b/internal/controller/postgrescluster/snapshots.go @@ -10,6 +10,7 @@ import ( "strings" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -17,8 +18,6 @@ import ( "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/controller-runtime/pkg/client" - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" - "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" diff --git a/internal/controller/postgrescluster/snapshots_test.go b/internal/controller/postgrescluster/snapshots_test.go index caa681e3e1..8c9dd7ece4 100644 --- a/internal/controller/postgrescluster/snapshots_test.go +++ b/internal/controller/postgrescluster/snapshots_test.go @@ -11,6 +11,7 @@ import ( "testing" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" @@ -29,8 +30,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" ) func TestReconcileVolumeSnapshots(t *testing.T) { @@ -60,7 +59,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create cluster (without snapshots spec) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) t.Cleanup(func() { assert.Check(t, r.Client.Delete(ctx, cluster)) }) @@ -200,7 +199,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create a cluster with snapshots enabled cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: volumeSnapshotClassName, } @@ -315,7 +314,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create a cluster with snapshots enabled cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: volumeSnapshotClassName, } @@ -374,7 +373,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) t.Cleanup(func() { assert.Check(t, r.Client.Delete(ctx, cluster)) }) @@ -432,7 +431,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -467,7 +466,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -515,7 +514,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -580,7 +579,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -647,7 +646,7 @@ func TestCreateDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" labelMap := map[string]string{ naming.LabelCluster: cluster.Name, @@ -675,7 +674,7 @@ func TestDedicatedSnapshotVolumeRestore(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" pvc := &corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ @@ -761,7 +760,7 @@ func TestGenerateVolumeSnapshot(t *testing.T) { assert.Equal(t, *snapshot.Spec.VolumeSnapshotClassName, "my-snapshot") assert.Equal(t, *snapshot.Spec.Source.PersistentVolumeClaimName, "dedicated-snapshot-volume") assert.Equal(t, snapshot.Labels[naming.LabelCluster], "hippo") - assert.Equal(t, snapshot.ObjectMeta.OwnerReferences[0].Name, "hippo") + assert.Equal(t, snapshot.OwnerReferences[0].Name, "hippo") } func TestGetDedicatedSnapshotVolumeRestoreJob(t *testing.T) { @@ -982,7 +981,7 @@ func TestGetSnapshotWithLatestError(t *testing.T) { }, } snapshotWithLatestError := getSnapshotWithLatestError(snapshots) - assert.Equal(t, snapshotWithLatestError.ObjectMeta.Name, "bad-snapshot") + assert.Equal(t, snapshotWithLatestError.Name, "bad-snapshot") }) t.Run("TwoSnapshotsWithErrors", func(t *testing.T) { @@ -1015,7 +1014,7 @@ func TestGetSnapshotWithLatestError(t *testing.T) { }, } snapshotWithLatestError := getSnapshotWithLatestError(snapshots) - assert.Equal(t, snapshotWithLatestError.ObjectMeta.Name, "second-bad-snapshot") + assert.Equal(t, snapshotWithLatestError.Name, "second-bad-snapshot") }) } @@ -1205,7 +1204,7 @@ func TestGetLatestReadySnapshot(t *testing.T) { }, } latestReadySnapshot := getLatestReadySnapshot(snapshots) - assert.Equal(t, latestReadySnapshot.ObjectMeta.Name, "good-snapshot") + assert.Equal(t, latestReadySnapshot.Name, "good-snapshot") }) t.Run("TwoReadySnapshots", func(t *testing.T) { @@ -1234,7 +1233,7 @@ func TestGetLatestReadySnapshot(t *testing.T) { }, } latestReadySnapshot := getLatestReadySnapshot(snapshots) - assert.Equal(t, latestReadySnapshot.ObjectMeta.Name, "second-good-snapshot") + assert.Equal(t, latestReadySnapshot.Name, "second-good-snapshot") }) } @@ -1250,13 +1249,13 @@ func TestDeleteSnapshots(t *testing.T) { cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) rhinoCluster := testCluster() rhinoCluster.Name = "rhino" rhinoCluster.Namespace = ns.Name - rhinoCluster.ObjectMeta.UID = "the-uid-456" + rhinoCluster.UID = "the-uid-456" assert.NilError(t, r.Client.Create(ctx, rhinoCluster)) t.Cleanup(func() { diff --git a/internal/controller/postgrescluster/suite_test.go b/internal/controller/postgrescluster/suite_test.go index b9f80df2f9..7918523d0a 100644 --- a/internal/controller/postgrescluster/suite_test.go +++ b/internal/controller/postgrescluster/suite_test.go @@ -14,9 +14,7 @@ import ( . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/util/version" "k8s.io/client-go/discovery" - - // Google Kubernetes Engine / Google Cloud Platform authentication provider - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" + _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" // Google Kubernetes Engine / Google Cloud Platform authentication provider "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" diff --git a/internal/controller/postgrescluster/util_test.go b/internal/controller/postgrescluster/util_test.go index c7332eea4e..8e7d5c434f 100644 --- a/internal/controller/postgrescluster/util_test.go +++ b/internal/controller/postgrescluster/util_test.go @@ -79,7 +79,7 @@ func TestAddDevSHM(t *testing.T) { // check there is an empty dir mounted under the dshm volume for _, v := range template.Spec.Volumes { - if v.Name == "dshm" && v.VolumeSource.EmptyDir != nil && v.VolumeSource.EmptyDir.Medium == corev1.StorageMediumMemory { + if v.Name == "dshm" && v.EmptyDir != nil && v.EmptyDir.Medium == corev1.StorageMediumMemory { found = true break } @@ -221,15 +221,14 @@ func TestAddNSSWrapper(t *testing.T) { // Each container that requires the nss_wrapper envs should be updated var actualUpdatedContainerCount int for i, c := range template.Spec.Containers { - if c.Name == naming.ContainerDatabase || - c.Name == naming.PGBackRestRepoContainerName || - c.Name == naming.PGBackRestRestoreContainerName { + switch c.Name { + case naming.ContainerDatabase, naming.PGBackRestRepoContainerName, naming.PGBackRestRestoreContainerName: assert.DeepEqual(t, expectedEnv, c.Env) actualUpdatedContainerCount++ - } else if c.Name == "pgadmin" { + case "pgadmin": assert.DeepEqual(t, expectedPGAdminEnv, c.Env) actualUpdatedContainerCount++ - } else { + default: assert.DeepEqual(t, beforeAddNSS[i], c) } } diff --git a/internal/controller/postgrescluster/volumes.go b/internal/controller/postgrescluster/volumes.go index 809b2fe8e1..a26fa05e78 100644 --- a/internal/controller/postgrescluster/volumes.go +++ b/internal/controller/postgrescluster/volumes.go @@ -257,7 +257,7 @@ func (r *Reconciler) configureExistingPGVolumes( Spec: cluster.Spec.InstanceSets[0].DataVolumeClaimSpec.AsPersistentVolumeClaimSpec(), } - volume.ObjectMeta.Labels = map[string]string{ + volume.Labels = map[string]string{ naming.LabelCluster: cluster.Name, naming.LabelInstanceSet: cluster.Spec.InstanceSets[0].Name, naming.LabelInstance: instanceName, @@ -310,7 +310,7 @@ func (r *Reconciler) configureExistingPGWALVolume( Spec: cluster.Spec.InstanceSets[0].DataVolumeClaimSpec.AsPersistentVolumeClaimSpec(), } - volume.ObjectMeta.Labels = map[string]string{ + volume.Labels = map[string]string{ naming.LabelCluster: cluster.Name, naming.LabelInstanceSet: cluster.Spec.InstanceSets[0].Name, naming.LabelInstance: instanceName, @@ -465,14 +465,14 @@ func (r *Reconciler) reconcileMovePGDataDir(ctx context.Context, // at this point, the Job either wasn't found or it has failed, so the it // should be created - moveDirJob.ObjectMeta.Annotations = naming.Merge(cluster.Spec.Metadata. + moveDirJob.Annotations = naming.Merge(cluster.Spec.Metadata. GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGDataDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels // `patroni.dynamic.json` holds the previous state of the DCS. Since we are // migrating the volumes, we want to clear out any obsolete configuration info. @@ -588,14 +588,14 @@ func (r *Reconciler) reconcileMoveWALDir(ctx context.Context, } } - moveDirJob.ObjectMeta.Annotations = naming.Merge(cluster.Spec.Metadata. + moveDirJob.Annotations = naming.Merge(cluster.Spec.Metadata. GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGWalDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels script := fmt.Sprintf(`echo "Preparing cluster %s volumes for PGO v5.x" echo "pg_wal_pvc=%s" @@ -610,7 +610,7 @@ func (r *Reconciler) reconcileMoveWALDir(ctx context.Context, cluster.Spec.DataSource.Volumes.PGWALVolume.PVCName, cluster.Spec.DataSource.Volumes.PGWALVolume.Directory, cluster.Spec.DataSource.Volumes.PGWALVolume.Directory, - cluster.ObjectMeta.Name) + cluster.Name) container := corev1.Container{ Command: []string{"bash", "-ceu", script}, @@ -707,14 +707,14 @@ func (r *Reconciler) reconcileMoveRepoDir(ctx context.Context, } } - moveDirJob.ObjectMeta.Annotations = naming.Merge( + moveDirJob.Annotations = naming.Merge( cluster.Spec.Metadata.GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGBackRestRepoDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels script := fmt.Sprintf(`echo "Preparing cluster %s pgBackRest repo volume for PGO v5.x" echo "repo_pvc=%s" diff --git a/internal/controller/runtime/runtime.go b/internal/controller/runtime/runtime.go index 152f490035..e3b0aca230 100644 --- a/internal/controller/runtime/runtime.go +++ b/internal/controller/runtime/runtime.go @@ -5,6 +5,7 @@ package runtime import ( + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -15,8 +16,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" ) type ( diff --git a/internal/controller/standalone_pgadmin/apply.go b/internal/controller/standalone_pgadmin/apply.go index 1108853e7f..0cc3191967 100644 --- a/internal/controller/standalone_pgadmin/apply.go +++ b/internal/controller/standalone_pgadmin/apply.go @@ -22,7 +22,7 @@ func (r *PGAdminReconciler) patch( patch client.Patch, options ...client.PatchOption, ) error { options = append([]client.PatchOption{r.Owner}, options...) - return r.Client.Patch(ctx, object, patch, options...) + return r.Patch(ctx, object, patch, options...) } // apply sends an apply patch to object's endpoint in the Kubernetes API and diff --git a/internal/controller/standalone_pgadmin/configmap.go b/internal/controller/standalone_pgadmin/configmap.go index 5078e0e9fa..ad0da80dfa 100644 --- a/internal/controller/standalone_pgadmin/configmap.go +++ b/internal/controller/standalone_pgadmin/configmap.go @@ -14,9 +14,8 @@ import ( "strconv" "strings" - corev1 "k8s.io/api/core/v1" - "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/initialize" diff --git a/internal/controller/standalone_pgadmin/configmap_test.go b/internal/controller/standalone_pgadmin/configmap_test.go index 267dd77325..3a9bab7b28 100644 --- a/internal/controller/standalone_pgadmin/configmap_test.go +++ b/internal/controller/standalone_pgadmin/configmap_test.go @@ -235,12 +235,12 @@ namespace: some-ns assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, configmap.Annotations, map[string]string{ "a": "v1", "b": "v2", }) // Labels present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, configmap.Labels, map[string]string{ "c": "v3", "d": "v4", "postgres-operator.crunchydata.com/pgadmin": "pg1", "postgres-operator.crunchydata.com/role": "pgadmin", diff --git a/internal/controller/standalone_pgadmin/controller.go b/internal/controller/standalone_pgadmin/controller.go index 23ba7b6793..a8b95b0053 100644 --- a/internal/controller/standalone_pgadmin/controller.go +++ b/internal/controller/standalone_pgadmin/controller.go @@ -166,7 +166,7 @@ func (r *PGAdminReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct func (r *PGAdminReconciler) setControllerReference( owner *v1beta1.PGAdmin, controlled client.Object, ) error { - return controllerutil.SetControllerReference(owner, controlled, r.Client.Scheme()) + return controllerutil.SetControllerReference(owner, controlled, r.Scheme()) } // deleteControlled safely deletes object when it is controlled by pgAdmin. @@ -178,7 +178,7 @@ func (r *PGAdminReconciler) deleteControlled( version := object.GetResourceVersion() exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} - return r.Client.Delete(ctx, object, exactly) + return r.Delete(ctx, object, exactly) } return nil diff --git a/internal/controller/standalone_pgadmin/pod.go b/internal/controller/standalone_pgadmin/pod.go index 734789bd3c..71f785c15e 100644 --- a/internal/controller/standalone_pgadmin/pod.go +++ b/internal/controller/standalone_pgadmin/pod.go @@ -150,7 +150,7 @@ func pod( // Check the configmap to see if we think TLS is enabled // If so, update the readiness check scheme to HTTPS if strings.Contains(gunicornData, "certfile") && strings.Contains(gunicornData, "keyfile") { - readinessProbe.ProbeHandler.HTTPGet.Scheme = corev1.URISchemeHTTPS + readinessProbe.HTTPGet.Scheme = corev1.URISchemeHTTPS } container.ReadinessProbe = readinessProbe diff --git a/internal/controller/standalone_pgadmin/related.go b/internal/controller/standalone_pgadmin/related.go index 50d5a68b09..c7fcb119bc 100644 --- a/internal/controller/standalone_pgadmin/related.go +++ b/internal/controller/standalone_pgadmin/related.go @@ -7,12 +7,12 @@ package standalone_pgadmin import ( "context" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - "k8s.io/apimachinery/pkg/labels" - "sigs.k8s.io/controller-runtime/pkg/client" ) //+kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="pgadmins",verbs={list} @@ -30,7 +30,7 @@ func (r *PGAdminReconciler) findPGAdminsForPostgresCluster( // namespace, we can configure the [manager.Manager] field indexer and pass a // [fields.Selector] here. // - https://book.kubebuilder.io/reference/watching-resources/externally-managed.html - if r.Client.List(ctx, &pgadmins, &client.ListOptions{ + if r.List(ctx, &pgadmins, &client.ListOptions{ Namespace: cluster.GetNamespace(), }) == nil { for i := range pgadmins.Items { @@ -64,7 +64,7 @@ func (r *PGAdminReconciler) findPGAdminsForSecret( // namespace, we can configure the [manager.Manager] field indexer and pass a // [fields.Selector] here. // - https://book.kubebuilder.io/reference/watching-resources/externally-managed.html - if err := r.Client.List(ctx, &pgadmins, &client.ListOptions{ + if err := r.List(ctx, &pgadmins, &client.ListOptions{ Namespace: secret.Namespace, }); err == nil { for i := range pgadmins.Items { @@ -93,7 +93,7 @@ func (r *PGAdminReconciler) getClustersForPGAdmin( for _, serverGroup := range pgAdmin.Spec.ServerGroups { var cluster v1beta1.PostgresCluster if serverGroup.PostgresClusterName != "" { - err = r.Client.Get(ctx, client.ObjectKey{ + err = r.Get(ctx, client.ObjectKey{ Name: serverGroup.PostgresClusterName, Namespace: pgAdmin.GetNamespace(), }, &cluster) @@ -104,7 +104,7 @@ func (r *PGAdminReconciler) getClustersForPGAdmin( } if selector, err = naming.AsSelector(serverGroup.PostgresClusterSelector); err == nil { var list v1beta1.PostgresClusterList - err = r.Client.List(ctx, &list, + err = r.List(ctx, &list, client.InNamespace(pgAdmin.Namespace), client.MatchingLabelsSelector{Selector: selector}, ) diff --git a/internal/controller/standalone_pgadmin/service.go b/internal/controller/standalone_pgadmin/service.go index b465dadb97..bfdc04c6ec 100644 --- a/internal/controller/standalone_pgadmin/service.go +++ b/internal/controller/standalone_pgadmin/service.go @@ -7,16 +7,14 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" - apierrors "k8s.io/apimachinery/pkg/api/errors" - - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -38,7 +36,7 @@ func (r *PGAdminReconciler) reconcilePGAdminService( // need to delete any existing service(s). At the start of every reconcile // get all services that match the current pgAdmin labels. services := corev1.ServiceList{} - if err := r.Client.List(ctx, &services, + if err := r.List(ctx, &services, client.InNamespace(pgadmin.Namespace), client.MatchingLabels{ naming.LabelStandalonePGAdmin: pgadmin.Name, @@ -64,7 +62,7 @@ func (r *PGAdminReconciler) reconcilePGAdminService( if pgadmin.Spec.ServiceName != "" { // Look for an existing service with name ServiceName in the namespace existingService := &corev1.Service{} - err := r.Client.Get(ctx, types.NamespacedName{ + err := r.Get(ctx, types.NamespacedName{ Name: pgadmin.Spec.ServiceName, Namespace: pgadmin.GetNamespace(), }, existingService) diff --git a/internal/controller/standalone_pgadmin/statefulset.go b/internal/controller/standalone_pgadmin/statefulset.go index 6783780eae..b8730b7112 100644 --- a/internal/controller/standalone_pgadmin/statefulset.go +++ b/internal/controller/standalone_pgadmin/statefulset.go @@ -7,14 +7,13 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/controller/postgrescluster" "github.com/crunchydata/postgres-operator/internal/initialize" @@ -35,7 +34,7 @@ func (r *PGAdminReconciler) reconcilePGAdminStatefulSet( // When we delete the StatefulSet, we will leave its Pods in place. They will be claimed by // the StatefulSet that gets created in the next reconcile. existing := &appsv1.StatefulSet{} - if err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(sts), existing)); err != nil { + if err := errors.WithStack(r.Get(ctx, client.ObjectKeyFromObject(sts), existing)); err != nil { if !apierrors.IsNotFound(err) { return err } @@ -48,7 +47,7 @@ func (r *PGAdminReconciler) reconcilePGAdminStatefulSet( exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} propagate := client.PropagationPolicy(metav1.DeletePropagationOrphan) - return errors.WithStack(client.IgnoreNotFound(r.Client.Delete(ctx, existing, exactly, propagate))) + return errors.WithStack(client.IgnoreNotFound(r.Delete(ctx, existing, exactly, propagate))) } } @@ -123,7 +122,7 @@ func statefulset( if collector.OpenTelemetryLogsEnabled(ctx, pgadmin) { // Logs for gunicorn and pgadmin write to /var/lib/pgadmin/logs - // so the collector needs access to that that path. + // so the collector needs access to that path. dataVolumeMount := corev1.VolumeMount{ Name: "pgadmin-data", MountPath: "/var/lib/pgadmin", diff --git a/internal/controller/standalone_pgadmin/users.go b/internal/controller/standalone_pgadmin/users.go index bfea0f444c..027960e90c 100644 --- a/internal/controller/standalone_pgadmin/users.go +++ b/internal/controller/standalone_pgadmin/users.go @@ -53,7 +53,7 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * pod := &corev1.Pod{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} pod.Name += "-0" - err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(pod), pod)) + err := errors.WithStack(r.Get(ctx, client.ObjectKeyFromObject(pod), pod)) if err != nil { return client.IgnoreNotFound(err) } @@ -142,7 +142,7 @@ func (r *PGAdminReconciler) writePGAdminUsers(ctx context.Context, pgadmin *v1be existingUserSecret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(existingUserSecret), existingUserSecret)) + r.Get(ctx, client.ObjectKeyFromObject(existingUserSecret), existingUserSecret)) if client.IgnoreNotFound(err) != nil { return err } @@ -204,10 +204,10 @@ cd $PGADMIN_DIR // Get password from secret userPasswordSecret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{ Namespace: pgadmin.Namespace, - Name: user.PasswordRef.LocalObjectReference.Name, + Name: user.PasswordRef.Name, }} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(userPasswordSecret), userPasswordSecret)) + r.Get(ctx, client.ObjectKeyFromObject(userPasswordSecret), userPasswordSecret)) if err != nil { log.Error(err, "Could not get user password secret") continue diff --git a/internal/controller/standalone_pgadmin/users_test.go b/internal/controller/standalone_pgadmin/users_test.go index bcb97a538b..35c599930d 100644 --- a/internal/controller/standalone_pgadmin/users_test.go +++ b/internal/controller/standalone_pgadmin/users_test.go @@ -337,7 +337,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -397,7 +397,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -469,7 +469,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -514,7 +514,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -556,7 +556,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -583,7 +583,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -636,7 +636,7 @@ func TestWritePGAdminUsers(t *testing.T) { // have succeeded secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -664,7 +664,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -692,7 +692,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -721,7 +721,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) diff --git a/internal/controller/standalone_pgadmin/volume.go b/internal/controller/standalone_pgadmin/volume.go index dbdfaee649..a3e26682ef 100644 --- a/internal/controller/standalone_pgadmin/volume.go +++ b/internal/controller/standalone_pgadmin/volume.go @@ -7,14 +7,13 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/validation/field" - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) diff --git a/internal/kubernetes/discovery.go b/internal/kubernetes/discovery.go index 0a96398e90..62e14fe496 100644 --- a/internal/kubernetes/discovery.go +++ b/internal/kubernetes/discovery.go @@ -165,7 +165,7 @@ func (r *DiscoveryRunner) readAPIs(ctx context.Context) error { r.have.RLock() defer r.have.RUnlock() - logging.FromContext(ctx).V(1).Info("Found APIs", "index_size", r.have.APISet.Len()) + logging.FromContext(ctx).V(1).Info("Found APIs", "index_size", r.have.Len()) return nil } diff --git a/internal/patroni/config.go b/internal/patroni/config.go index 72202fbd78..3e6f7b6c83 100644 --- a/internal/patroni/config.go +++ b/internal/patroni/config.go @@ -465,7 +465,7 @@ func instanceYAML( // created. That value should be injected using the downward API and the // PATRONI_KUBERNETES_POD_IP environment variable. - // Missing here is "ports" which is is connascent with "postgresql.connect_address". + // Missing here is "ports" which is connascent with "postgresql.connect_address". // See the PATRONI_KUBERNETES_PORTS env variable. }, diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index 222c174f40..f1d2a4c5d9 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -794,7 +794,7 @@ func TestPGBackRestCreateReplicaCommand(t *testing.T) { file := filepath.Join(dir, "command.sh") assert.NilError(t, os.WriteFile(file, []byte(command), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", "--shell=sh", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -816,7 +816,7 @@ func TestPGBackRestCreateReplicaCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -895,7 +895,6 @@ func TestProbeTiming(t *testing.T) { FailureThreshold: 1, }}, } { - tt := tt actual := probeTiming(&v1beta1.PatroniSpec{ LeaderLeaseDurationSeconds: &tt.lease, SyncPeriodSeconds: &tt.sync, diff --git a/internal/pgadmin/config_test.go b/internal/pgadmin/config_test.go index e634aee361..0e659c7070 100644 --- a/internal/pgadmin/config_test.go +++ b/internal/pgadmin/config_test.go @@ -77,7 +77,7 @@ func TestStartupCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) @@ -94,7 +94,7 @@ func TestStartupCommand(t *testing.T) { // Expect flake8 to be happy. Ignore "E401 multiple imports on one line" // in addition to the defaults. The file contents appear in PodSpec, so // allow lines longer than the default to save some vertical space. - cmd := exec.Command(flake8, "--extend-ignore=E401", "--max-line-length=99", file) + cmd := exec.CommandContext(t.Context(), flake8, "--extend-ignore=E401", "--max-line-length=99", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) diff --git a/internal/pgadmin/users_test.go b/internal/pgadmin/users_test.go index 4dba70f81a..673a2c4b02 100644 --- a/internal/pgadmin/users_test.go +++ b/internal/pgadmin/users_test.go @@ -180,7 +180,7 @@ with create_app().app_context(): // Expect flake8 to be happy. Ignore "E402 module level import not // at top of file" in addition to the defaults. - cmd := exec.Command(flake8, "--extend-ignore=E402", file) + cmd := exec.CommandContext(ctx, flake8, "--extend-ignore=E402", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 744537fb20..3899c33339 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -251,7 +251,7 @@ func RestoreCommand(pgdata, hugePagesSetting, fetchKeyCommand string, _ []*corev `read -r max_ptxn <<< "${control##*max_prepared_xacts setting:}"`, `read -r max_work <<< "${control##*max_worker_processes setting:}"`, - // During recovery, only allow connections over the the domain socket. + // During recovery, only allow connections over the domain socket. `echo > /tmp/pg_hba.restore.conf 'local all "postgres" peer'`, // Combine parameters from Go with those detected in Bash. diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index 0f518ab7bc..c1b4e0b155 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -621,7 +621,7 @@ func TestReloadCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -647,7 +647,7 @@ func TestRestoreCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -686,7 +686,7 @@ func TestDedicatedSnapshotVolumeRestoreCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbackrest/pgbackrest_test.go b/internal/pgbackrest/pgbackrest_test.go index 07ff3d127a..0930b72f45 100644 --- a/internal/pgbackrest/pgbackrest_test.go +++ b/internal/pgbackrest/pgbackrest_test.go @@ -13,12 +13,10 @@ import ( "testing" "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/api/resource" - corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "github.com/crunchydata/postgres-operator/internal/testing/require" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -94,7 +92,7 @@ fi assert.NilError(t, os.WriteFile(file, []byte(shellCheckScript), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(ctx, shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index 18bbfc455d..fbd146475c 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -128,7 +128,7 @@ func TestAddRepoVolumesToPod(t *testing.T) { for _, r := range tc.repos { var foundVolume bool for _, v := range template.Spec.Volumes { - if v.Name == r.Name && v.VolumeSource.PersistentVolumeClaim.ClaimName == + if v.Name == r.Name && v.PersistentVolumeClaim.ClaimName == naming.PGBackRestRepoVolume(postgresCluster, r.Name).Name { foundVolume = true break diff --git a/internal/pgbackrest/util_test.go b/internal/pgbackrest/util_test.go index e3c98e0dd7..d2fd93455c 100644 --- a/internal/pgbackrest/util_test.go +++ b/internal/pgbackrest/util_test.go @@ -6,7 +6,7 @@ package pgbackrest import ( "io" - "math/rand" + "math/rand/v2" "strconv" "testing" @@ -80,7 +80,7 @@ func TestCalculateConfigHashes(t *testing.T) { assert.Equal(t, preCalculatedRepo3S3Hash, configHashMap["repo3"]) // call CalculateConfigHashes multiple times to ensure consistent results - for i := 0; i < 10; i++ { + for range 10 { hashMap, hash, err := CalculateConfigHashes(postgresCluster) assert.NilError(t, err) assert.Equal(t, configHash, hash) @@ -92,7 +92,7 @@ func TestCalculateConfigHashes(t *testing.T) { // shuffle the repo slice in order to ensure the same result is returned regardless of the // order of the repos slice shuffleCluster := postgresCluster.DeepCopy() - for i := 0; i < 10; i++ { + for range 10 { repos := shuffleCluster.Spec.Backups.PGBackRest.Repos rand.Shuffle(len(repos), func(i, j int) { repos[i], repos[j] = repos[j], repos[i] @@ -103,7 +103,7 @@ func TestCalculateConfigHashes(t *testing.T) { } // now modify some values in each repo and confirm we see a different result - for i := 0; i < 3; i++ { + for i := range 3 { modCluster := postgresCluster.DeepCopy() switch i { case 0: diff --git a/internal/pgbouncer/config_test.go b/internal/pgbouncer/config_test.go index 43c6b77a92..97ba017ef4 100644 --- a/internal/pgbouncer/config_test.go +++ b/internal/pgbouncer/config_test.go @@ -216,7 +216,7 @@ func TestReloadCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbouncer/postgres.go b/internal/pgbouncer/postgres.go index 202c6bd9be..2d0b675067 100644 --- a/internal/pgbouncer/postgres.go +++ b/internal/pgbouncer/postgres.go @@ -181,7 +181,7 @@ REVOKE ALL PRIVILEGES // - https://www.postgresql.org/docs/current/perm-functions.html `ALTER ROLE :"username" SET search_path TO :'namespace';`, - // Allow the PgBouncer user to to login. + // Allow the PgBouncer user to login. `ALTER ROLE :"username" LOGIN PASSWORD :'verifier';`, // Commit (finish) the transaction. diff --git a/internal/pki/encoding_test.go b/internal/pki/encoding_test.go index 2c63099ca4..eb2b1365b3 100644 --- a/internal/pki/encoding_test.go +++ b/internal/pki/encoding_test.go @@ -81,7 +81,7 @@ func TestCertificateTextMarshaling(t *testing.T) { assert.NilError(t, os.WriteFile(certFile, certBytes, 0o600)) // The "openssl x509" command parses X.509 certificates. - cmd := exec.Command(openssl, "x509", + cmd := exec.CommandContext(t.Context(), openssl, "x509", "-in", certFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() @@ -153,7 +153,7 @@ func TestPrivateKeyTextMarshaling(t *testing.T) { assert.NilError(t, os.WriteFile(keyFile, keyBytes, 0o600)) // The "openssl pkey" command processes public and private keys. - cmd := exec.Command(openssl, "pkey", + cmd := exec.CommandContext(t.Context(), openssl, "pkey", "-in", keyFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() @@ -164,12 +164,12 @@ func TestPrivateKeyTextMarshaling(t *testing.T) { "expected valid private key, got:\n%s", output) t.Run("Check", func(t *testing.T) { - output, _ := exec.Command(openssl, "pkey", "-help").CombinedOutput() + output, _ := exec.CommandContext(t.Context(), openssl, "pkey", "-help").CombinedOutput() if !strings.Contains(string(output), "-check") { t.Skip(`requires "-check" flag`) } - cmd := exec.Command(openssl, "pkey", + cmd := exec.CommandContext(t.Context(), openssl, "pkey", "-check", "-in", keyFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() diff --git a/internal/pki/pki_test.go b/internal/pki/pki_test.go index 000f1a5042..fa8f290475 100644 --- a/internal/pki/pki_test.go +++ b/internal/pki/pki_test.go @@ -194,7 +194,7 @@ func TestRootIsInvalid(t *testing.T) { t.Cleanup(func() { currentTime = original }) currentTime = func() time.Time { - return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.Local) + return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.UTC) } root, err := NewRootCertificateAuthority() @@ -395,7 +395,7 @@ func TestLeafIsInvalid(t *testing.T) { t.Cleanup(func() { currentTime = original }) currentTime = func() time.Time { - return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.Local) + return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.UTC) } leaf, err := root.GenerateLeafCertificate("", nil) @@ -439,7 +439,7 @@ func basicOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { verify := func(t testing.TB, args ...string) { t.Helper() // #nosec G204 -- args from this test - cmd := exec.Command(openssl, append([]string{"verify"}, args...)...) + cmd := exec.CommandContext(t.Context(), openssl, append([]string{"verify"}, args...)...) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) @@ -476,7 +476,7 @@ func basicOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { } func strictOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { - output, _ := exec.Command(openssl, "verify", "-help").CombinedOutput() + output, _ := exec.CommandContext(t.Context(), openssl, "verify", "-help").CombinedOutput() if !strings.Contains(string(output), "-x509_strict") { t.Skip(`requires "-x509_strict" flag`) } @@ -487,7 +487,7 @@ func strictOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { verify := func(t testing.TB, args ...string) { t.Helper() // #nosec G204 -- args from this test - cmd := exec.Command(openssl, append([]string{"verify", + cmd := exec.CommandContext(t.Context(), openssl, append([]string{"verify", // Do not use the default trusted CAs. "-no-CAfile", "-no-CApath", // Disable "non-compliant workarounds for broken certificates". diff --git a/internal/postgres/config_test.go b/internal/postgres/config_test.go index 1a7378a50c..59aca4b21a 100644 --- a/internal/postgres/config_test.go +++ b/internal/postgres/config_test.go @@ -52,7 +52,7 @@ func TestWALDirectory(t *testing.T) { func TestBashHalt(t *testing.T) { t.Run("NoPipeline", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; halt ab cd e`) var exit *exec.ExitError @@ -64,7 +64,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("PipelineZeroStatus", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; true && halt message`) var exit *exec.ExitError @@ -76,7 +76,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("PipelineNonZeroStatus", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; (exit 99) || halt $'multi\nline'`) var exit *exec.ExitError @@ -88,7 +88,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("Subshell", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; (halt 'err') || echo 'after'`) stderr := new(bytes.Buffer) @@ -104,7 +104,7 @@ func TestBashHalt(t *testing.T) { func TestBashPermissions(t *testing.T) { // macOS `stat` takes different arguments than BusyBox and GNU coreutils. - if output, err := exec.Command("stat", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "stat", "--help").CombinedOutput(); err != nil { t.Skip(`requires "stat" executable`) } else if !strings.Contains(string(output), "%A") { t.Skip(`requires "stat" with access format sequence`) @@ -116,7 +116,7 @@ func TestBashPermissions(t *testing.T) { assert.NilError(t, os.WriteFile(filepath.Join(dir, "sub", "fn"), nil, 0o624)) // #nosec G306 OK permissions for a temp dir in a test assert.NilError(t, os.Chmod(filepath.Join(dir, "sub", "fn"), 0o624)) - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashPermissions+`; permissions "$@"`, "-", filepath.Join(dir, "sub", "fn")) @@ -131,7 +131,7 @@ func TestBashPermissions(t *testing.T) { func TestBashRecreateDirectory(t *testing.T) { // macOS `stat` takes different arguments than BusyBox and GNU coreutils. - if output, err := exec.Command("stat", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "stat", "--help").CombinedOutput(); err != nil { t.Skip(`requires "stat" executable`) } else if !strings.Contains(string(output), "%a") { t.Skip(`requires "stat" with access format sequence`) @@ -143,7 +143,7 @@ func TestBashRecreateDirectory(t *testing.T) { assert.NilError(t, os.WriteFile(filepath.Join(dir, "d", "file"), nil, 0o644)) // #nosec G306 OK permissions for a temp dir in a test stat := func(args ...string) string { - cmd := exec.Command("stat", "-c", "%i %#a %N") + cmd := exec.CommandContext(t.Context(), "stat", "-c", "%i %#a %N") cmd.Args = append(cmd.Args, args...) out, err := cmd.CombinedOutput() @@ -160,7 +160,7 @@ func TestBashRecreateDirectory(t *testing.T) { filepath.Join(dir, "d", "file"), ) - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-ceu", "--", bashRecreateDirectory+` recreate "$@"`, "-", filepath.Join(dir, "d"), "0740") @@ -199,7 +199,7 @@ func TestBashRecreateDirectory(t *testing.T) { func TestBashSafeLink(t *testing.T) { // macOS `mv` takes different arguments than GNU coreutils. - if output, err := exec.Command("mv", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "mv", "--help").CombinedOutput(); err != nil { t.Skip(`requires "mv" executable`) } else if !strings.Contains(string(output), "no-target-directory") { t.Skip(`requires "mv" that overwrites a directory symlink`) @@ -207,7 +207,7 @@ func TestBashSafeLink(t *testing.T) { // execute calls the bash function with args. execute := func(args ...string) (string, error) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-ceu", "--", bashSafeLink+`safelink "$@"`, "-") cmd.Args = append(cmd.Args, args...) output, err := cmd.CombinedOutput() @@ -474,7 +474,7 @@ func TestStartupCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(ctx, shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/postgres/exec_test.go b/internal/postgres/exec_test.go index b8f5693bef..3ec94717d5 100644 --- a/internal/postgres/exec_test.go +++ b/internal/postgres/exec_test.go @@ -184,7 +184,7 @@ done <<< "${databases}" assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/postgres/password/md5.go b/internal/postgres/password/md5.go index c99b2c0e30..55cc43f5cb 100644 --- a/internal/postgres/password/md5.go +++ b/internal/postgres/password/md5.go @@ -5,7 +5,6 @@ package password import ( - // #nosec G501 "crypto/md5" "errors" diff --git a/internal/postgres/password/scram.go b/internal/postgres/password/scram.go index bbf8dbcbe6..90eb2a54ad 100644 --- a/internal/postgres/password/scram.go +++ b/internal/postgres/password/scram.go @@ -138,7 +138,7 @@ func (s *SCRAMPassword) isASCII() bool { // iterate through each character of the plaintext password and determine if // it is ASCII. if it is not ASCII, exit early // per research, this loop is optimized to be fast for searching - for i := 0; i < len(s.password); i++ { + for i := range len(s.password) { if s.password[i] > unicode.MaxASCII { return false } diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index 33e68c2332..e723e40064 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -76,7 +76,7 @@ func TestMakeDirectories(t *testing.T) { // Expect ShellCheck for "sh" to be happy. // - https://www.shellcheck.net/wiki/SC2148 - cmd := exec.Command(shellcheck, "--enable=all", "--shell=sh", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) diff --git a/internal/testing/events/recorder.go b/internal/testing/events/recorder.go index e76ef21eb3..dad5dccf83 100644 --- a/internal/testing/events/recorder.go +++ b/internal/testing/events/recorder.go @@ -89,7 +89,7 @@ func (*Recorder) AnnotatedEventf(object runtime.Object, annotations map[string]s } func (r *Recorder) Event(object runtime.Object, eventtype, reason, message string) { if r.eventf != nil { - r.eventf(object, nil, eventtype, reason, "", message) + r.eventf(object, nil, eventtype, reason, "", "%v", message) } } func (r *Recorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...any) { diff --git a/internal/testing/require/exec.go b/internal/testing/require/exec.go index 338abef584..a9e028c55e 100644 --- a/internal/testing/require/exec.go +++ b/internal/testing/require/exec.go @@ -38,7 +38,7 @@ func executable(name string, args ...string) func(testing.TB) string { t.Helper() once.Do(func() { path, err := exec.LookPath(name) - cmd := exec.Command(path, args...) // #nosec G204 -- args from init() + cmd := exec.CommandContext(t.Context(), path, args...) // #nosec G204 -- args from init() if err != nil { result = func(t testing.TB) string { diff --git a/internal/util/secrets_test.go b/internal/util/secrets_test.go index e07a430718..ae5f7f5b05 100644 --- a/internal/util/secrets_test.go +++ b/internal/util/secrets_test.go @@ -55,7 +55,7 @@ func TestGenerateAlphaNumericPassword(t *testing.T) { } previous := sets.Set[string]{} - for i := 0; i < 10; i++ { + for range 10 { password, err := GenerateAlphaNumericPassword(5) assert.NilError(t, err) @@ -80,7 +80,7 @@ func TestGenerateASCIIPassword(t *testing.T) { } previous := sets.Set[string]{} - for i := 0; i < 10; i++ { + for range 10 { password, err := GenerateASCIIPassword(5) assert.NilError(t, err) From df8aafb75b6156a2f579a27536eb0b927a868d52 Mon Sep 17 00:00:00 2001 From: Drew Sessler Date: Wed, 10 Dec 2025 16:42:08 -0800 Subject: [PATCH 78/79] Remove optional types from ssl_groups validation rule since they are not supported in k8s versions earlier than 1.29 --- .../postgres-operator.crunchydata.com_postgresclusters.yaml | 4 ++-- .../v1beta1/postgrescluster_types.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 0c6558cc28..eb71aba33b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -18250,8 +18250,8 @@ spec: type: object x-kubernetes-validations: - message: The ssl_groups parameter is only available in pg18 and greater - rule: '!has(self.?config.parameters.ssl_groups) || self.postgresVersion - > 17' + rule: '!has(self.config) || !has(self.config.parameters) || !has(self.config.parameters.ssl_groups) + || self.postgresVersion > 17' status: description: PostgresClusterStatus defines the observed state of PostgresCluster properties: diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 6db6ce7459..46b7d0be43 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -17,7 +17,7 @@ import ( // // # Postgres 18 // -// +kubebuilder:validation:XValidation:rule=`!has(self.?config.parameters.ssl_groups) || self.postgresVersion > 17`,message=`The ssl_groups parameter is only available in pg18 and greater` +// +kubebuilder:validation:XValidation:rule=`!has(self.config) || !has(self.config.parameters) || !has(self.config.parameters.ssl_groups) || self.postgresVersion > 17`,message=`The ssl_groups parameter is only available in pg18 and greater` type PostgresClusterSpec struct { // +optional Metadata *Metadata `json:"metadata,omitempty"` From 8fbb418c9abfd2368d46d5c519f8218445b05d31 Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Fri, 5 Dec 2025 22:03:03 -0500 Subject: [PATCH 79/79] Add back erroneously removed linter handling --- internal/controller/pgupgrade/jobs.go | 1 + internal/controller/postgrescluster/instance.go | 1 + 2 files changed, 2 insertions(+) diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index 53420cb8fe..c7b6e4e010 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -192,6 +192,7 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( settings := upgrade.Spec.PGUpgradeSettings.DeepCopy() // When jobs is undefined, use one less than the number of CPUs. + //nolint:gosec // The CPU count is clamped to MaxInt32. if settings.Jobs == 0 && feature.Enabled(ctx, feature.PGUpgradeCPUConcurrency) { wholeCPUs := int32(min(math.MaxInt32, largestWholeCPU(upgrade.Spec.Resources))) settings.Jobs = wholeCPUs - 1 diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index d2ac4e3bb5..b3bf0b6f75 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -335,6 +335,7 @@ func (r *Reconciler) observeInstances( status.DesiredPGDataVolume = make(map[string]string) for _, instance := range observed.bySet[name] { + //nolint:gosec // This slice is always small. status.Replicas += int32(len(instance.Pods)) if ready, known := instance.IsReady(); known && ready {