diff --git a/.github/workflows/build-pr.yml b/.github/workflows/build-pr.yml new file mode 100644 index 0000000..4ff9afc --- /dev/null +++ b/.github/workflows/build-pr.yml @@ -0,0 +1,60 @@ +name: Build PR Image + +on: + pull_request: + branches: + - main + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}/mpg-operator-postgres + +jobs: + build-pr: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64,linux/arm64 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push - Standard (PG16) + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + BASE_IMAGE_TAG=2.6.0-ppg16.8-postgres + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:pr-${{ github.event.pull_request.number }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push - PG17 + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + BASE_IMAGE_TAG=2.6.0-ppg17.4-postgres + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:pr-${{ github.event.pull_request.number }}-pg17 + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile index 42a4cb1..9be726b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,20 @@ ARG BASE_IMAGE_TAG=2.6.0-ppg16.8-postgres FROM percona/percona-postgresql-operator:${BASE_IMAGE_TAG} -# Switch to root user temporarily to gain necessary privileges for setup -USER 0 - COPY postgres-oom-adjuster.sh /usr/local/bin/ COPY entrypoint-wrapper.sh /usr/local/bin/ -# We don't change the original ENTRYPOINT - we wrap around it +# Switch user for the run command. +USER 0 +# NOTE: We use this file to check if we can execute the entrypoint wrapper with arguments. +# The previous version didn't allow arguments, so if we'd try, we'd break the replica. +# We didn't catch this earlier because postgres-operator is overriding the entrypoint by +# setting a pod command param to execute patroni directly. This change will be accompanied +# by a postgres-operator patch to conditionally call the wrapper when this file exists, +# allowing us to use the wrapper and the oom adjuster. +RUN echo "2" > /usr/local/bin/.entrypoint-wrapper-version + +# Switch back to postgres user +USER 26 + ENTRYPOINT ["/usr/local/bin/entrypoint-wrapper.sh"] diff --git a/entrypoint-wrapper.sh b/entrypoint-wrapper.sh index 99c6cc9..7b28234 100755 --- a/entrypoint-wrapper.sh +++ b/entrypoint-wrapper.sh @@ -3,13 +3,23 @@ set -e # For Percona PostgreSQL Operator, we need to focus on protecting the postmaster process # Original user in the Percona PostgreSQL Operator image -ORIGINAL_USER=26 -ORIGINAL_ENTRYPOINT="/opt/crunchy/bin/postgres-ha/bootstrap-postgres-ha.sh" +ORIGINAL_USER=26 # (postgres user) + +# Ensure a command was provided +if [ $# -eq 0 ]; then + echo "ERROR: No command provided to entrypoint-wrapper.sh" + echo "Usage: entrypoint-wrapper.sh [args...]" + exit 1 +fi # Set OOM score adjustment for our own process (will be inherited) if [ -f "/proc/self/oom_score_adj" ]; then - echo -900 > /proc/self/oom_score_adj - echo "Set OOM score adjustment to -900 for pid 1" + if echo -900 > /proc/self/oom_score_adj 2>/dev/null; then + echo "Set OOM score adjustment to -900 for pid 1" + else + # If we fail, let the postgres oom adjuster handle it. + echo "WARNING: Cannot set OOM score adjustment (will retry via background adjuster)" + fi else echo "WARNING: Cannot set OOM score adjustment (file not found)" fi @@ -22,14 +32,19 @@ echo "Started postmaster OOM adjuster in background" export PG_OOM_ADJUST_FILE=/proc/self/oom_score_adj export PG_OOM_ADJUST_VALUE=0 -# Switching to the original user and executing original entrypoint -echo "Switching to user $ORIGINAL_USER and executing original entrypoint: $ORIGINAL_ENTRYPOINT $@" +CURRENT_UID=$(id -u) +if [ "$CURRENT_UID" = "$ORIGINAL_USER" ]; then + echo "Already running as user $ORIGINAL_USER (uid=$CURRENT_UID), executing: $@" + exec "$@" +fi + +echo "Running as uid=$CURRENT_UID, switching to user $ORIGINAL_USER and executing: $@" # Check which user-switching command is available if command -v runuser >/dev/null 2>&1; then # Use runuser (available on RHEL/CentOS/Fedora) - exec runuser -u "#$ORIGINAL_USER" -- "$ORIGINAL_ENTRYPOINT" "$@" + exec runuser -u "#$ORIGINAL_USER" -- "$@" else # Fall back to su - exec su -s /bin/bash $ORIGINAL_USER -c "$ORIGINAL_ENTRYPOINT $*" + exec su -s /bin/bash $ORIGINAL_USER -c "$*" fi diff --git a/postgres-oom-adjuster.sh b/postgres-oom-adjuster.sh index 63294c9..9f1cdf4 100755 --- a/postgres-oom-adjuster.sh +++ b/postgres-oom-adjuster.sh @@ -43,11 +43,32 @@ while true; do if [ "$CURRENT_SCORE" != "-900" ]; then echo -900 > /proc/$POSTMASTER_PID/oom_score_adj echo "$(date): Adjusted OOM score for postmaster PID $POSTMASTER_PID from $CURRENT_SCORE to -900" >> $LOG_FILE - else - echo "$(date): Postmaster PID $POSTMASTER_PID already has OOM score -900" >> $LOG_FILE fi else - echo "$(date): Postmaster not found yet or OOM score file not accessible" >> $LOG_FILE + # Band-aid for the Postmaster PID problem. Actual fix is in Patroni >= 4.0.6. + # Ref.: https://github.com/patroni/patroni/pull/3372 + echo "$(date): Postmaster not found, checking for orphaned shared memory" >> $LOG_FILE + + # Clean up orphaned POSIX shared memory segments + # These block PostgreSQL startup if left behind after a crash + if [ -d "/dev/shm" ]; then + for shm_file in /dev/shm/PostgreSQL.*; do + if [ -f "$shm_file" ]; then + echo "$(date): Removing orphaned POSIX shared memory: $shm_file" >> $LOG_FILE + rm -f "$shm_file" 2>/dev/null + fi + done + fi + + # Clean up orphaned System V shared memory segments (nattch=0 means no attached processes) + if command -v ipcs >/dev/null 2>&1 && command -v ipcrm >/dev/null 2>&1; then + for shmid in $(ipcs -m 2>/dev/null | awk 'NR>3 && $6==0 {print $2}'); do + if [ -n "$shmid" ]; then + echo "$(date): Removing orphaned SysV shared memory segment: $shmid" >> $LOG_FILE + ipcrm -m "$shmid" 2>/dev/null + fi + done + fi fi # Sleep for a while before checking again