diff --git a/.gitignore b/.gitignore index 3051ef7a..8d2fdc30 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .scratch docker/control-plane-dev/data clustertest/data +lima/data .vagrant vagrant-ssh.cfg .terraform @@ -21,5 +22,6 @@ docs/plans *-results.xml dist control-plane +pgedge-control-plane !docker/control-plane e2e/debug diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 8ed317b2..76c421ea 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -1,8 +1,8 @@ version: 2 -project_name: control-plane +project_name: pgedge-control-plane builds: - main: ./server - binary: control-plane + binary: pgedge-control-plane env: - CGO_ENABLED=0 goos: diff --git a/Makefile b/Makefile index 22d9fe90..4338e89b 100644 --- a/Makefile +++ b/Makefile @@ -258,12 +258,12 @@ control-plane-images: goreleaser-build: GORELEASER_CURRENT_TAG=$(CONTROL_PLANE_VERSION) \ $(goreleaser) build --snapshot --clean - tar -C dist/control-plane_linux_amd64_v1 -c -z \ - -f dist/control-plane_$(CONTROL_PLANE_VERSION:v%=%)_linux_amd64.tar.gz \ - control-plane - tar -C dist/control-plane_linux_arm64_v8.0 -c -z \ - -f dist/control-plane_$(CONTROL_PLANE_VERSION:v%=%)_linux_arm64.tar.gz \ - control-plane + tar -C dist/pgedge-control-plane_linux_amd64_v1 -c -z \ + -f dist/pgedge-control-plane_$(CONTROL_PLANE_VERSION:v%=%)_linux_amd64.tar.gz \ + pgedge-control-plane + tar -C dist/pgedge-control-plane_linux_arm64_v8.0 -c -z \ + -f dist/pgedge-control-plane_$(CONTROL_PLANE_VERSION:v%=%)_linux_arm64.tar.gz \ + pgedge-control-plane goreleaser-test-release: GORELEASER_CURRENT_TAG=$(CONTROL_PLANE_VERSION) \ @@ -332,7 +332,7 @@ build: dev-build dev-build: GOOS=linux go build \ -gcflags "all=-N -l" \ - -o docker/control-plane-dev/control-plane \ + -o docker/control-plane-dev/pgedge-control-plane \ $(shell pwd)/server .PHONY: docker-swarm-init @@ -389,6 +389,30 @@ dev-teardown: dev-down api-docs: WORKSPACE_DIR=$(shell pwd) DEBUG=0 docker compose -f ./docker/control-plane-dev/docker-compose.yaml up api-docs +######################## +# lima dev environment # +######################## + +.PHONY: dev-lima-deploy +dev-lima-deploy: + $(MAKE) -C lima deploy + +.PHONY: dev-lima-build +dev-lima-build: + $(MAKE) -C lima build + +.PHONY: dev-lima-run +dev-lima-run: + $(MAKE) -C lima run + +.PHONY: dev-lima-reset +dev-lima-reset: + $(MAKE) -C lima reset + +.PHONY: dev-lima-teardown +dev-lima-teardown: + $(MAKE) -C lima teardown + ################################# # docker compose ci environment # ################################# @@ -397,7 +421,7 @@ api-docs: ci-compose-build: GOOS=linux go build \ -gcflags "all=-N -l" \ - -o docker/control-plane-ci/control-plane \ + -o docker/control-plane-ci/pgedge-control-plane \ $(shell pwd)/server .PHONY: ci-compose-detached diff --git a/NOTICE.txt b/NOTICE.txt index e3ba85a1..ac882e4e 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1248,9 +1248,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` -## github.com/coreos/go-systemd/v22/journal +## github.com/coreos/go-systemd/v22 -* Name: github.com/coreos/go-systemd/v22/journal +* Name: github.com/coreos/go-systemd/v22 * Version: v22.5.0 * License: [Apache-2.0](https://github.com/coreos/go-systemd/blob/v22.5.0/LICENSE) @@ -2414,6 +2414,217 @@ SOFTWARE. ``` +## github.com/elastic/gosigar + +* Name: github.com/elastic/gosigar +* Version: v0.14.3 +* License: [Apache-2.0](https://github.com/elastic/gosigar/blob/v0.14.3/LICENSE) + +``` + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +``` + ## github.com/fatih/structs * Name: github.com/fatih/structs @@ -3070,6 +3281,41 @@ SOFTWARE. ``` +## github.com/godbus/dbus/v5 + +* Name: github.com/godbus/dbus/v5 +* Version: v5.1.0 +* License: [BSD-2-Clause](https://github.com/godbus/dbus/blob/v5.1.0/LICENSE) + +``` +Copyright (c) 2013, Georg Reinke (), Google +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +``` + ## github.com/gogo/protobuf * Name: github.com/gogo/protobuf diff --git a/api/apiv1/design/database.go b/api/apiv1/design/database.go index 69493e3e..c2d26e1d 100644 --- a/api/apiv1/design/database.go +++ b/api/apiv1/design/database.go @@ -575,7 +575,7 @@ var DatabaseSpec = g.Type("DatabaseSpec", func() { g.Meta("struct:tag:json", "port,omitempty") }) g.Attribute("patroni_port", g.Int, func() { - g.Description("The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.") + g.Description("The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.") g.Minimum(0) g.Maximum(65535) g.Example(8888) diff --git a/api/apiv1/gen/control_plane/service.go b/api/apiv1/gen/control_plane/service.go index 6da4a0a7..631aed4d 100644 --- a/api/apiv1/gen/control_plane/service.go +++ b/api/apiv1/gen/control_plane/service.go @@ -389,8 +389,9 @@ type DatabaseSpec struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include diff --git a/api/apiv1/gen/http/control_plane/client/types.go b/api/apiv1/gen/http/control_plane/client/types.go index ffa31778..8d6add7e 100644 --- a/api/apiv1/gen/http/control_plane/client/types.go +++ b/api/apiv1/gen/http/control_plane/client/types.go @@ -1814,8 +1814,9 @@ type DatabaseSpecRequestBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include @@ -2213,8 +2214,9 @@ type DatabaseSpecResponseBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include @@ -2534,8 +2536,9 @@ type DatabaseSpecRequestBodyRequestBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include diff --git a/api/apiv1/gen/http/control_plane/server/types.go b/api/apiv1/gen/http/control_plane/server/types.go index 1f3fd0b6..9090c1f2 100644 --- a/api/apiv1/gen/http/control_plane/server/types.go +++ b/api/apiv1/gen/http/control_plane/server/types.go @@ -1896,8 +1896,9 @@ type DatabaseSpecResponseBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include @@ -2226,8 +2227,9 @@ type DatabaseSpecRequestBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include @@ -2544,8 +2546,9 @@ type DatabaseSpecRequestBodyRequestBody struct { // be assigned a random port. If the port is unspecified, the database will not // be exposed on any port, dependent on orchestrator support for that feature. Port *int `json:"port,omitempty"` - // The port used by Patroni for this database. NOTE: This field is not - // currently supported for Docker Swarm. + // The port used by Patroni for this database. If the port is 0, each instance + // will be assigned a random port. NOTE: This field is not currently supported + // for Docker Swarm. PatroniPort *int `json:"patroni_port,omitempty"` // The number of CPUs to allocate for the database and to use for tuning // Postgres. Defaults to the number of available CPUs on the host. Can include diff --git a/api/apiv1/gen/http/openapi.json b/api/apiv1/gen/http/openapi.json index 03b837ba..dd0d8892 100644 --- a/api/apiv1/gen/http/openapi.json +++ b/api/apiv1/gen/http/openapi.json @@ -4622,7 +4622,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, diff --git a/api/apiv1/gen/http/openapi.yaml b/api/apiv1/gen/http/openapi.yaml index 2f12e29d..069f5a2c 100644 --- a/api/apiv1/gen/http/openapi.yaml +++ b/api/apiv1/gen/http/openapi.yaml @@ -3303,7 +3303,7 @@ definitions: $ref: '#/definitions/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 diff --git a/api/apiv1/gen/http/openapi3.json b/api/apiv1/gen/http/openapi3.json index 490f519c..608dded9 100644 --- a/api/apiv1/gen/http/openapi3.json +++ b/api/apiv1/gen/http/openapi3.json @@ -11766,7 +11766,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -13570,7 +13570,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -15633,7 +15633,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -17693,7 +17693,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -19641,7 +19641,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -21427,7 +21427,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, @@ -23297,7 +23297,7 @@ }, "patroni_port": { "type": "integer", - "description": "The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.", + "description": "The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.", "example": 8888, "format": "int64", "minimum": 0, diff --git a/api/apiv1/gen/http/openapi3.yaml b/api/apiv1/gen/http/openapi3.yaml index dda18e5d..b79d8880 100644 --- a/api/apiv1/gen/http/openapi3.yaml +++ b/api/apiv1/gen/http/openapi3.yaml @@ -8252,7 +8252,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -9514,7 +9514,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -10982,7 +10982,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -12447,7 +12447,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -13820,7 +13820,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -15089,7 +15089,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 @@ -16413,7 +16413,7 @@ components: $ref: '#/components/schemas/OrchestratorOpts' patroni_port: type: integer - description: 'The port used by Patroni for this database. NOTE: This field is not currently supported for Docker Swarm.' + description: 'The port used by Patroni for this database. If the port is 0, each instance will be assigned a random port. NOTE: This field is not currently supported for Docker Swarm.' example: 8888 format: int64 minimum: 0 diff --git a/changes/unreleased/Added-20260309-165318.yaml b/changes/unreleased/Added-20260309-165318.yaml new file mode 100644 index 00000000..8fe3f135 --- /dev/null +++ b/changes/unreleased/Added-20260309-165318.yaml @@ -0,0 +1,3 @@ +kind: Added +body: Preliminary support for managing database instances with systemd. +time: 2026-03-09T16:53:18.26052-04:00 diff --git a/docker/control-plane-ci/Dockerfile b/docker/control-plane-ci/Dockerfile index 73e51909..e8e551b6 100644 --- a/docker/control-plane-ci/Dockerfile +++ b/docker/control-plane-ci/Dockerfile @@ -4,6 +4,6 @@ RUN apt-get update && \ apt-get install -y curl && \ rm -rf /var/lib/apt/lists/* -COPY ./control-plane /control-plane +COPY ./pgedge-control-plane /pgedge-control-plane -ENTRYPOINT ["/control-plane"] +ENTRYPOINT ["/pgedge-control-plane"] diff --git a/docker/control-plane-dev/Dockerfile b/docker/control-plane-dev/Dockerfile index ed894e14..a49df879 100644 --- a/docker/control-plane-dev/Dockerfile +++ b/docker/control-plane-dev/Dockerfile @@ -5,6 +5,6 @@ ENV CGO_ENABLED=0 RUN go install github.com/go-delve/delve/cmd/dlv@latest COPY ./entrypoint.sh /entrypoint.sh -COPY ./control-plane /control-plane +COPY ./pgedge-control-plane /pgedge-control-plane ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/docker/control-plane-dev/docker-compose.yaml b/docker/control-plane-dev/docker-compose.yaml index 62790c0c..2d3071f9 100644 --- a/docker/control-plane-dev/docker-compose.yaml +++ b/docker/control-plane-dev/docker-compose.yaml @@ -6,9 +6,9 @@ services: watch: - path: ./Dockerfile action: rebuild - - path: ./control-plane + - path: ./pgedge-control-plane action: sync+restart - target: /control-plane + target: /pgedge-control-plane working_dir: ${WORKSPACE_DIR} environment: - DEBUG=${DEBUG:-0} diff --git a/docker/control-plane-dev/entrypoint.sh b/docker/control-plane-dev/entrypoint.sh index f5bfa049..fa9fefec 100755 --- a/docker/control-plane-dev/entrypoint.sh +++ b/docker/control-plane-dev/entrypoint.sh @@ -13,13 +13,13 @@ if [[ "${DEBUG}" == 1 ]]; then --log-output=debugger,debuglineerr,gdbwire,lldbout,rpc \ --accept-multiclient \ --api-version=2 \ - exec /control-plane \ + exec /pgedge-control-plane \ -- \ run \ --config-path /config.json \ --logging.pretty else - exec /control-plane run \ + exec /pgedge-control-plane run \ --config-path /config.json \ --logging.pretty fi diff --git a/docker/control-plane/Dockerfile b/docker/control-plane/Dockerfile index 5cda49dc..f34534fb 100644 --- a/docker/control-plane/Dockerfile +++ b/docker/control-plane/Dockerfile @@ -4,6 +4,6 @@ ARG TARGETOS ARG TARGETARCH ARG ARCHIVE_VERSION -ENTRYPOINT ["/control-plane"] +ENTRYPOINT ["/pgedge-control-plane"] -ADD control-plane_${ARCHIVE_VERSION}_${TARGETOS}_${TARGETARCH}.tar.gz / +ADD pgedge-control-plane_${ARCHIVE_VERSION}_${TARGETOS}_${TARGETARCH}.tar.gz / diff --git a/docs/development/running-locally.md b/docs/development/running-locally.md index 1144ba22..33f0b6f2 100644 --- a/docs/development/running-locally.md +++ b/docs/development/running-locally.md @@ -1,18 +1,52 @@ # Running the Control Plane locally +- [Running the Control Plane locally](#running-the-control-plane-locally) + - [Common prerequisites](#common-prerequisites) + - [Developing the Swarm orchestrator](#developing-the-swarm-orchestrator) + - [Prerequisites](#prerequisites) + - [Configuration](#configuration) + - [Running the Control Plane](#running-the-control-plane) + - [Interact with the Control Plane API](#interact-with-the-control-plane-api) + - [Resetting your Development Environment](#resetting-your-development-environment) + - [Development Workflow](#development-workflow) + - [Rebuilding the `pgedge-control-plane` binary](#rebuilding-the-pgedge-control-plane-binary) + - [Debugging](#debugging) + - [API Documentation](#api-documentation) + - [Optional Development Tools](#optional-development-tools) + - [Restish](#restish) + - [`dev-env.zsh` Script](#dev-envzsh-script) + - [Bruno](#bruno) + - [Bruno's `wait_for_task` Helper](#brunos-wait_for_task-helper) + - [When Should I Add to the Test Scenarios?](#when-should-i-add-to-the-test-scenarios) + - [Developing the SystemD orchestrator](#developing-the-systemd-orchestrator) + - [Prerequisites](#prerequisites-1) + - [`pipx`](#pipx) + - [Circus](#circus) + - [Ansible](#ansible) + - [Lima](#lima) + - [Running the Control Plane](#running-the-control-plane-1) + - [Resetting your Development Environment](#resetting-your-development-environment-1) + +## Common prerequisites + +- Go >= 1.25 + - [Official download page](https://go.dev/doc/install) +- Restish + - [Official installation guide](https://rest.sh/#/guide) + - See the [Restish](#restish) section below for usage and configuration + +## Developing the Swarm orchestrator + The `docker/control-plane-dev` directory contains configuration for a six-host Control Plane cluster that runs in Docker via Docker Compose. -## Prerequisites +### Prerequisites Before deploying the Control Plane in a development environment, you must install: -* Docker Desktop - for details, visit the [official download page](https://www.docker.com/products/docker-desktop/). - -* Go 1.20 + - for details, visit the [official download page](https://go.dev/doc/install) +- Docker Desktop - for details, visit the [official download page](https://www.docker.com/products/docker-desktop/). - -### Configuration +#### Configuration After meeting prerequisites on your system, make sure to change the settings to provide adequate [disk space, CPU, and RAM](https://docs.docker.com/desktop/settings-and-maintenance/settings/#resources). Use the following as a baseline configuration: @@ -26,99 +60,7 @@ provide adequate [disk space, CPU, and RAM](https://docs.docker.com/desktop/sett > Our Docker Compose configuration uses host networking, so you must also enable > [the host networking setting](https://docs.docker.com/engine/network/drivers/host/#docker-desktop). -### Restish - -Restish is a CLI tool to interact with REST APIs that expose an OpenAPI spec, -like the Control Plane API. It's not strictly required, but we recommend it. - -[Installation guide](https://rest.sh/#/guide) - -```sh -brew install rest-sh/tap/restish -``` - -We recommend you add this environment variable to your `.zshrc` as well to -disable Restish's default retry behavior: - -```sh -export RSH_RETRY=0 -``` - -The changes to your `.zshrc` will automatically apply to new sessions. To reload the configuration in your current shell session, run: - -```sh -exec zsh -``` - -After installing Restish, use the following command to verify the installation and initialize your configuration file: - -```sh -restish --help -``` - -On MacOS, the full path to the Restish configuration file is `~/Library/Application Support/restish/apis.json`. See [the configuration documentation](https://rest.sh/#/configuration) to find the configuration file location for non-MacOS systems. Update the configuration file to contain the following details for the Control Plane deployment: - -```json -{ - "$schema": "https://rest.sh/schemas/apis.json", - "control-plane-local-1": { - "base": "http://localhost:3000", - "profiles": { - "default": { - - } - }, - "tls": {} - }, - "control-plane-local-2": { - "base": "http://localhost:3001", - "profiles": { - "default": { - - } - }, - "tls": {} - }, - "control-plane-local-3": { - "base": "http://localhost:3002", - "profiles": { - "default": { - - } - }, - "tls": {} - }, - "control-plane-local-4": { - "base": "http://localhost:3003", - "profiles": { - "default": { - - } - }, - "tls": {} - }, - "control-plane-local-5": { - "base": "http://localhost:3004", - "profiles": { - "default": { - - } - }, - "tls": {} - }, - "control-plane-local-6": { - "base": "http://localhost:3005", - "profiles": { - "default": { - - } - }, - "tls": {} - } -} -``` - -## Running the Control Plane +### Running the Control Plane To start the Control Plane instances, navigate into the `control-plane` repository root and run: @@ -126,20 +68,57 @@ To start the Control Plane instances, navigate into the `control-plane` reposito make dev-watch ``` -This will build a `control-plane` binary, build the Docker image in `docker/control-plane-dev`, and run the Docker Compose configuration in `watch` mode. See the [Development workflow](#development-workflow) section to learn how to use this setup for development. +This will build a `pgedge-control-plane` binary, build the Docker image in +`docker/control-plane-dev`, and run the Docker Compose configuration in `watch` +mode. See the [Development workflow](#development-workflow) section to learn how +to use this setup for development. -## Interact with the Control Plane API +### Interact with the Control Plane API Now, you should be able to interact with the API using Restish. For example, to initialize a new cluster and create a new database: ```sh +# If you're using the dev-env.zsh script, you can initialize the cluster and +# join all hosts with one command: +cp-init + +# If you're not using the dev-env.zsh script: restish control-plane-local-1 init-cluster restish control-plane-local-2 join-cluster "$(restish control-plane-local-1 get-join-token)" restish control-plane-local-3 join-cluster "$(restish control-plane-local-1 get-join-token)" restish control-plane-local-4 join-cluster "$(restish control-plane-local-1 get-join-token)" restish control-plane-local-5 join-cluster "$(restish control-plane-local-1 get-join-token)" restish control-plane-local-6 join-cluster "$(restish control-plane-local-1 get-join-token)" + +# If you're using the dev-env.zsh scripts: +cp1-req create-database '{ + "id": "storefront", + "spec": { + "database_name": "storefront", + "database_users": [ + { + "username": "admin", + "password": "password", + "db_owner": true, + "attributes": ["SUPERUSER", "LOGIN"] + }, + { + "username": "app", + "password": "password", + "attributes": ["LOGIN"], + "roles": ["pgedge_application"] + } + ], + "nodes": [ + { "name": "n1", "host_ids": ["host-1", "host-4"] }, + { "name": "n2", "host_ids": ["host-2", "host-5"] }, + { "name": "n3", "host_ids": ["host-3", "host-6"] } + ] + } +}' + +# If you're not using the dev-env.zsh script: restish control-plane-local-1 create-database '{ "id": "storefront", "spec": { @@ -176,7 +155,7 @@ in: Endpoints that are unimplemented will return a `not implemented` error. -## Resetting your Development Environment +### Resetting your Development Environment To reset your environment to its initial state, run: @@ -195,15 +174,15 @@ uninitialized state. Then, you can follow the instructions in the [Interact with the Control Plane API](#interact-with-the-control-plane-api) section to reinitialize your cluster. -## Development Workflow +### Development Workflow The following sections detail the steps in the development process. -### Rebuilding the `control-plane` binary +#### Rebuilding the `pgedge-control-plane` binary The Docker Compose file is configured to watch for changes to the -`control-plane` binary. You can update the binary in the running containers by -running: +`pgedge-control-plane` binary. You can update the binary in the running +containers by running: ```sh make dev-build @@ -213,7 +192,7 @@ You'll see messages in the docker compose output to indicate that it's stopping the containers, syncing the files, and then starting them up again. This takes about 10 seconds due to the graceful shutdown in the Control Plane server. -### Debugging +#### Debugging The `control-plane-dev` image includes the Delve Go debugger. You can run the debugger by adding the `DEBUG` environment variable to the `make dev-watch` @@ -246,7 +225,7 @@ This is an example remote debugging configuration for VSCode: After attaching the debugger, the server will start normally. -## API Documentation +### API Documentation The `docker-compose.yaml` file for this configuration includes an API documentation server. You can access the documentation in your browser at @@ -259,6 +238,100 @@ see the updates. ## Optional Development Tools +### Restish + +Restish is a CLI tool to interact with REST APIs that expose an OpenAPI spec, +like the Control Plane API. It's not strictly required, but we recommend it. + +[Installation guide](https://rest.sh/#/guide) + +```sh +brew install rest-sh/tap/restish +``` + +We recommend you add this environment variable to your `.zshrc` as well to +disable Restish's default retry behavior: + +```sh +export RSH_RETRY=0 +``` + +The changes to your `.zshrc` will automatically apply to new sessions. To reload the configuration in your current shell session, run: + +```sh +exec zsh +``` + +After installing Restish, use the following command to verify the installation and initialize your configuration file: + +```sh +restish --help +``` + +If you're a Zsh user, we strongly recommend using the [`dev-env.zsh`](#dev-envzsh-script), which will configure Restish for you and add helpful wrappers and aliases to your shell environment. + +If you choose not use the `dev-env.zsh` script, you can configure Restish manually. On macOS, the full path to the Restish configuration file is `~/Library/Application Support/restish/apis.json`. See [the configuration documentation](https://rest.sh/#/configuration) to find the configuration file location for non-macOS systems. Update the configuration file to contain the following details for the Control Plane deployment: + +```json +{ + "$schema": "https://rest.sh/schemas/apis.json", + "control-plane-local-1": { + "base": "http://localhost:3000", + "profiles": { + "default": { + + } + }, + "tls": {} + }, + "control-plane-local-2": { + "base": "http://localhost:3001", + "profiles": { + "default": { + + } + }, + "tls": {} + }, + "control-plane-local-3": { + "base": "http://localhost:3002", + "profiles": { + "default": { + + } + }, + "tls": {} + }, + "control-plane-local-4": { + "base": "http://localhost:3003", + "profiles": { + "default": { + + } + }, + "tls": {} + }, + "control-plane-local-5": { + "base": "http://localhost:3004", + "profiles": { + "default": { + + } + }, + "tls": {} + }, + "control-plane-local-6": { + "base": "http://localhost:3005", + "profiles": { + "default": { + + } + }, + "tls": {} + } +} +``` + The tools listed below may be helpful in your development environment. ### `dev-env.zsh` Script @@ -280,7 +353,7 @@ Bruno client. We recommend using the standalone Bruno API client rather than the VSCode extension because we make extensive use of the developer console. If you're -using MacOS, you can install Bruno through HomeBrew: +using macOS, you can install Bruno through HomeBrew: ``` brew install bruno @@ -307,3 +380,83 @@ develop and test changes. They can also be helpful for reviewers who need to test your changes. Consider adding new requests or scenarios if you find yourself repeating the same sequence of requests during development, and those requests aren't already covered by an existing scenarios. + +## Developing the SystemD orchestrator + +### Prerequisites + +#### `pipx` + +`pipx` is a tool that runs Python programs in isolated environments. It's the +recommended way to run Ansible, which we use to deploy the test fixtures. + +[Homepage](https://pipx.pypa.io/stable/) + +```sh +brew install pipx +pipx ensurepath +sudo pipx ensurepath --global # optional to allow pipx actions with --global argument +``` + +Be sure to restart your terminal session after running the `ensurepath` commands +so that the profile changes take effect. + +#### Circus + +Circus is a process monitor, similar to Supervisord. We use it to manage the Control Plane server processes and stream their log output to the terminal. + +[Homepage](https://circus.readthedocs.io/en/latest/) + +```sh +pipx install --include-deps circus +``` + +#### Ansible + +We're using Ansible to configure the test fixtures and install the Control Plane +and other software on them. + +[Installation instructions +page](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) + +```sh +pipx install --include-deps ansible +pipx inject ansible 'botocore>=1.34.0' +pipx inject ansible 'boto3>=1.34.0' +``` + +#### Lima + +Lima is an easy-to-use virtual machine runner that works well on macOS. + +```sh +# Installation through homebrew + +brew install lima +``` + +### Running the Control Plane + +To run the Control Plane, start by deploying the Lima virtual machines where we'll run the Control Plane servers: + +```sh +make dev-lima-deploy +``` + +Note that this may take a while to create, configure, and install the pre-requisites. Once this command exits, you can build and run the Control Plane servers with: + +```sh +make dev-lima-run +``` + +If you're using the `dev-env.zsh` script, make sure to run `use-dev-lima` to switch your environment and setup the `cp-*` aliases. + +At this point, you can interact with the Control Plane servers using the same commands described above in [Interact with the Control Plane API](#interact-with-the-control-plane-api). + +### Resetting your Development Environment + +To reset this environment to its initial state, stop the servers by hitting `ctrl+c` in the terminal where you ran `make dev-lima-run`. Then, run the following to stop and remove all databases and Control Plane data: + +```sh +make dev-lima-reset +``` diff --git a/e2e/backup_restore_test.go b/e2e/backup_restore_test.go index 6489894f..accd8d38 100644 --- a/e2e/backup_restore_test.go +++ b/e2e/backup_restore_test.go @@ -26,7 +26,48 @@ func TestPosixBackupRestore(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() - t.Log("Creating database") + var backupRepositories []*controlplane.BackupRepositorySpec + var restoreRepository *controlplane.RestoreRepositorySpec + var orchestratorOpts *controlplane.OrchestratorOpts + + switch fixture.Orchestrator() { + case "swarm": + backupRepositories = []*controlplane.BackupRepositorySpec{ + { + Type: client.RepositoryTypePosix, + BasePath: pointerTo("/backups"), + }, + } + restoreRepository = &controlplane.RestoreRepositorySpec{ + Type: client.RepositoryTypePosix, + BasePath: pointerTo("/backups"), + } + orchestratorOpts = &controlplane.OrchestratorOpts{ + Swarm: &controlplane.SwarmOpts{ + ExtraVolumes: []*controlplane.ExtraVolumesSpec{ + { + HostPath: tmpDir, + DestinationPath: "/backups", + }, + }, + }, + } + case "systemd": + backupRepositories = []*controlplane.BackupRepositorySpec{ + { + Type: client.RepositoryTypePosix, + BasePath: &tmpDir, + }, + } + restoreRepository = &controlplane.RestoreRepositorySpec{ + Type: client.RepositoryTypePosix, + BasePath: &tmpDir, + } + default: + t.Fatalf("unsupported orchestrator '%s'", fixture.Orchestrator()) + } + + tLog(t, "Creating database") db := fixture.NewDatabaseFixture(ctx, t, &controlplane.CreateDatabaseRequest{ Spec: &controlplane.DatabaseSpec{ @@ -39,29 +80,16 @@ func TestPosixBackupRestore(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", HostIds: []controlplane.Identifier{controlplane.Identifier(host1)}, BackupConfig: &controlplane.BackupConfigSpec{ - Repositories: []*controlplane.BackupRepositorySpec{ - { - Type: client.RepositoryTypePosix, - BasePath: pointerTo("/backups"), - }, - }, - }, - OrchestratorOpts: &controlplane.OrchestratorOpts{ - Swarm: &controlplane.SwarmOpts{ - ExtraVolumes: []*controlplane.ExtraVolumesSpec{ - { - HostPath: tmpDir, - DestinationPath: "/backups", - }, - }, - }, + Repositories: backupRepositories, }, + OrchestratorOpts: orchestratorOpts, }, }, }, @@ -73,7 +101,7 @@ func TestPosixBackupRestore(t *testing.T) { Password: "password", } - t.Log("Inserting test data") + tLog(t, "Inserting test data") db.WithConnection(ctx, opts, t, func(conn *pgx.Conn) { _, err := conn.Exec(ctx, "CREATE TABLE foo (id INT PRIMARY KEY, val TEXT)") @@ -86,7 +114,7 @@ func TestPosixBackupRestore(t *testing.T) { require.NoError(t, err) }) - t.Log("Creating a full backup") + tLog(t, "Creating a full backup") db.BackupDatabaseNode(ctx, BackupDatabaseNodeOptions{ Node: "n1", @@ -95,18 +123,18 @@ func TestPosixBackupRestore(t *testing.T) { }, }) - t.Log("Deleting all data") + tLog(t, "Deleting all data") db.WithConnection(ctx, opts, t, func(conn *pgx.Conn) { _, err := conn.Exec(ctx, "DELETE FROM foo") require.NoError(t, err) }) - t.Log("Getting set name for latest full backup") + tLog(t, "Getting set name for latest full backup") setName := fixture.LatestPosixBackup(t, host1, tmpDir, string(db.ID)) - t.Log("Creating another backup to ensure we can restore the correct one") + tLog(t, "Creating another backup to ensure we can restore the correct one") db.BackupDatabaseNode(ctx, BackupDatabaseNodeOptions{ Node: "n1", Options: &controlplane.BackupOptions{ @@ -114,17 +142,14 @@ func TestPosixBackupRestore(t *testing.T) { }, }) - t.Log("Restoring to the first backup") + tLog(t, "Restoring to the first backup") err := db.RestoreDatabase(ctx, RestoreDatabaseOptions{ RestoreConfig: &controlplane.RestoreConfigSpec{ SourceDatabaseID: db.ID, SourceNodeName: "n1", SourceDatabaseName: db.Spec.DatabaseName, - Repository: &controlplane.RestoreRepositorySpec{ - Type: client.RepositoryTypePosix, - BasePath: pointerTo("/backups"), - }, + Repository: restoreRepository, RestoreOptions: map[string]string{ "set": strings.TrimSpace(setName), "type": "immediate", @@ -133,7 +158,7 @@ func TestPosixBackupRestore(t *testing.T) { }) require.NoError(t, err) - t.Log("Validating restored data") + tLog(t, "Validating restored data") // Validate that our data is restored db.WithConnection(ctx, opts, t, func(conn *pgx.Conn) { @@ -151,9 +176,7 @@ func TestPosixBackupRestore(t *testing.T) { func TestS3BackupRestore(t *testing.T) { t.Parallel() - if !fixture.S3Enabled() { - t.Skip("s3 not enabled for this fixture") - } + fixture.SkipIfS3Unsupported(t) hostIDs := fixture.HostIDs() host1 := hostIDs[0] @@ -180,7 +203,8 @@ func TestS3BackupRestore(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ {Name: "n1", HostIds: []controlplane.Identifier{controlplane.Identifier(hostIDs[0])}}, {Name: "n2", HostIds: []controlplane.Identifier{controlplane.Identifier(hostIDs[1])}}, @@ -280,9 +304,7 @@ func TestS3BackupRestore(t *testing.T) { func TestS3AddNodeFromBackup(t *testing.T) { t.Parallel() - if !fixture.S3Enabled() { - t.Skip("s3 not enabled for this fixture") - } + fixture.SkipIfS3Unsupported(t) host1 := fixture.HostIDs()[0] host2 := fixture.HostIDs()[1] @@ -309,7 +331,8 @@ func TestS3AddNodeFromBackup(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -365,7 +388,8 @@ func TestS3AddNodeFromBackup(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -414,9 +438,7 @@ func TestS3AddNodeFromBackup(t *testing.T) { func TestS3CreateDBFromBackup(t *testing.T) { t.Parallel() - if !fixture.S3Enabled() { - t.Skip("s3 not enabled for this fixture") - } + fixture.SkipIfS3Unsupported(t) host1 := fixture.HostIDs()[0] host2 := fixture.HostIDs()[1] @@ -443,7 +465,8 @@ func TestS3CreateDBFromBackup(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -499,7 +522,8 @@ func TestS3CreateDBFromBackup(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -546,6 +570,38 @@ func TestRemoveBackupConfig(t *testing.T) { ctx, cancel := context.WithTimeout(t.Context(), 5*time.Minute) defer cancel() + var backupRepositories []*controlplane.BackupRepositorySpec + var orchestratorOpts *controlplane.OrchestratorOpts + + switch fixture.Orchestrator() { + case "swarm": + backupRepositories = []*controlplane.BackupRepositorySpec{ + { + Type: client.RepositoryTypePosix, + BasePath: pointerTo("/backups"), + }, + } + orchestratorOpts = &controlplane.OrchestratorOpts{ + Swarm: &controlplane.SwarmOpts{ + ExtraVolumes: []*controlplane.ExtraVolumesSpec{ + { + HostPath: tmpDir, + DestinationPath: "/backups", + }, + }, + }, + } + case "systemd": + backupRepositories = []*controlplane.BackupRepositorySpec{ + { + Type: client.RepositoryTypePosix, + BasePath: &tmpDir, + }, + } + default: + t.Fatalf("unsupported orchestrator '%s'", fixture.Orchestrator()) + } + tLog(t, "creating database") db := fixture.NewDatabaseFixture(ctx, t, &controlplane.CreateDatabaseRequest{ @@ -559,29 +615,16 @@ func TestRemoveBackupConfig(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", HostIds: []controlplane.Identifier{controlplane.Identifier(host1)}, BackupConfig: &controlplane.BackupConfigSpec{ - Repositories: []*controlplane.BackupRepositorySpec{ - { - Type: client.RepositoryTypePosix, - BasePath: pointerTo("/backups"), - }, - }, - }, - OrchestratorOpts: &controlplane.OrchestratorOpts{ - Swarm: &controlplane.SwarmOpts{ - ExtraVolumes: []*controlplane.ExtraVolumesSpec{ - { - HostPath: tmpDir, - DestinationPath: "/backups", - }, - }, - }, + Repositories: backupRepositories, }, + OrchestratorOpts: orchestratorOpts, }, }, }, @@ -599,21 +642,13 @@ func TestRemoveBackupConfig(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { - Name: "n1", - HostIds: []controlplane.Identifier{controlplane.Identifier(host1)}, - OrchestratorOpts: &controlplane.OrchestratorOpts{ - Swarm: &controlplane.SwarmOpts{ - ExtraVolumes: []*controlplane.ExtraVolumesSpec{ - { - HostPath: tmpDir, - DestinationPath: "/backups", - }, - }, - }, - }, + Name: "n1", + HostIds: []controlplane.Identifier{controlplane.Identifier(host1)}, + OrchestratorOpts: orchestratorOpts, }, }, }, diff --git a/e2e/cancel_task_test.go b/e2e/cancel_task_test.go index 0a7dc459..895acd2b 100644 --- a/e2e/cancel_task_test.go +++ b/e2e/cancel_task_test.go @@ -35,7 +35,8 @@ func testCancelDB(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -44,12 +45,10 @@ func testCancelDB(t *testing.T) { }, }, }) + require.NoError(t, err) + creation_task := create_resp.Task database := create_resp.Database - if err != nil { - t.Logf("problem creating test db %s", err) - return - } t.Logf("successfully created cancel task test db") cancelation_task, err := fixture.Client.CancelDatabaseTask(t.Context(), &controlplane.CancelDatabaseTaskPayload{ diff --git a/e2e/custom_db_create_test.go b/e2e/custom_db_create_test.go index 3b8c3b91..be5d0009 100644 --- a/e2e/custom_db_create_test.go +++ b/e2e/custom_db_create_test.go @@ -58,6 +58,7 @@ func TestCreateDbWithVersions(t *testing.T) { }, }, Port: pointerTo(0), + PatroniPort: pointerTo(0), PostgresVersion: pointerTo(version.PostgresVersion), SpockVersion: pointerTo(version.SpockVersion), Nodes: []*controlplane.DatabaseNodeSpec{ diff --git a/e2e/db_create_test.go b/e2e/db_create_test.go index c8341b3d..590f3429 100644 --- a/e2e/db_create_test.go +++ b/e2e/db_create_test.go @@ -74,8 +74,9 @@ func testCreateDB(t *testing.T, nodeCount int, deployReplicas bool) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), - Nodes: nodes, + Port: pointerTo(0), + PatroniPort: pointerTo(0), + Nodes: nodes, }, }) diff --git a/e2e/db_update_add_node_test.go b/e2e/db_update_add_node_test.go index 81509143..07d55652 100644 --- a/e2e/db_update_add_node_test.go +++ b/e2e/db_update_add_node_test.go @@ -95,8 +95,9 @@ func createDatabaseFixture(ctx context.Context, t *testing.T, username, password DbOwner: pointerTo(true), Attributes: []string{"LOGIN", "SUPERUSER"}, }}, - Port: pointerTo(0), - Nodes: nodes, + Port: pointerTo(0), + PatroniPort: pointerTo(0), + Nodes: nodes, }, }) } diff --git a/e2e/failover_test.go b/e2e/failover_test.go index 13562876..03e30e9d 100644 --- a/e2e/failover_test.go +++ b/e2e/failover_test.go @@ -35,7 +35,8 @@ func TestFailoverScenarios(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", diff --git a/e2e/fixture_test.go b/e2e/fixture_test.go index 8a619997..d8cdd883 100644 --- a/e2e/fixture_test.go +++ b/e2e/fixture_test.go @@ -117,11 +117,12 @@ func DefaultTestConfig() TestConfig { } type TestFixture struct { - Client *client.MultiServerClient - config TestConfig - skipCleanup bool - debug bool - debugDir string + Client *client.MultiServerClient + config TestConfig + skipCleanup bool + debug bool + debugDir string + orchestrator string } func NewTestFixture(ctx context.Context, config TestConfig, skipCleanup bool, debug bool, debugDir string) (*TestFixture, error) { @@ -149,12 +150,25 @@ func NewTestFixture(ctx context.Context, config TestConfig, skipCleanup bool, de log.Print("cluster initialized") + // List hosts to get orchestrator + hosts, err := cli.ListHosts(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list hosts: %w", err) + } + if len(hosts.Hosts) == 0 { + return nil, fmt.Errorf("list hosts returned no hosts") + } + // We don't support heterogeneous clusters right now, so we only need the + // orchestrator from one host. + orchestrator := hosts.Hosts[0].Orchestrator + return &TestFixture{ - Client: cli, - config: config, - skipCleanup: skipCleanup, - debug: debug, - debugDir: debugDir, + Client: cli, + config: config, + skipCleanup: skipCleanup, + debug: debug, + debugDir: debugDir, + orchestrator: orchestrator, }, nil } @@ -261,8 +275,26 @@ func (f *TestFixture) TempDir(hostID string, t testing.TB) string { return dir } -func (f *TestFixture) S3Enabled() bool { - return f.config.S3.Enabled +func (f *TestFixture) Orchestrator() string { + return f.orchestrator +} + +func (f *TestFixture) SkipIfS3Unsupported(t testing.TB) { + if !f.config.S3.Enabled { + t.Skip("s3 not enabled for this fixture") + } +} + +func (f *TestFixture) SkipIfServicesUnsupported(t testing.TB) { + if f.orchestrator == "systemd" { + t.Skip("services not supported for systemd") + } +} + +func (f *TestFixture) SkipIfUpgradesUnsupported(t testing.TB) { + if f.orchestrator == "systemd" { + t.Skip("database upgrades via dbspec not supported for systemd") + } } func (f *TestFixture) S3BackupRepository() *controlplane.BackupRepositorySpec { diff --git a/e2e/fixtures/ansible.cfg b/e2e/fixtures/ansible.cfg index 5f7057fd..eff98f14 100644 --- a/e2e/fixtures/ansible.cfg +++ b/e2e/fixtures/ansible.cfg @@ -2,3 +2,4 @@ localhost_warning = False inventory_unparsed_warning = False host_key_checking = False +interpreter_python = auto_silent diff --git a/e2e/fixtures/roles/build_image/vars/main.yaml b/e2e/fixtures/roles/build_image/vars/main.yaml index 8a223795..b642c8e5 100644 --- a/e2e/fixtures/roles/build_image/vars/main.yaml +++ b/e2e/fixtures/roles/build_image/vars/main.yaml @@ -4,4 +4,4 @@ _arch_transform: x86_64: amd64 aarch64: arm64 arm64: arm64 -_archive_name: control-plane_{{ version }}_linux_{{ _arch_transform[architecture] }}.tar.gz +_archive_name: pgedge-control-plane_{{ version }}_linux_{{ _arch_transform[architecture] }}.tar.gz diff --git a/e2e/load_test.go b/e2e/load_test.go index cb783821..449c953f 100644 --- a/e2e/load_test.go +++ b/e2e/load_test.go @@ -119,6 +119,7 @@ func (l *LoadTest) Run(t *testing.T) { Spec: &controlplane.DatabaseSpec{ DatabaseName: "load_test", Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: l.StartingNodes, DatabaseUsers: users, }, @@ -151,6 +152,7 @@ func (l *LoadTest) Run(t *testing.T) { Spec: &controlplane.DatabaseSpec{ DatabaseName: "load_test", Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: l.UpdatedNodes, DatabaseUsers: users, }, diff --git a/e2e/minor_version_upgrade_test.go b/e2e/minor_version_upgrade_test.go index 66f347c4..74360fac 100644 --- a/e2e/minor_version_upgrade_test.go +++ b/e2e/minor_version_upgrade_test.go @@ -17,6 +17,8 @@ import ( func TestMinorVersionUpgrade(t *testing.T) { t.Parallel() + fixture.SkipIfUpgradesUnsupported(t) + host1 := fixture.HostIDs()[0] host2 := fixture.HostIDs()[1] @@ -36,6 +38,7 @@ func TestMinorVersionUpgrade(t *testing.T) { DatabaseName: "test", PostgresVersion: &fromVersion, Port: pointerTo(0), + PatroniPort: pointerTo(0), DatabaseUsers: []*controlplane.DatabaseUserSpec{ { Username: username, @@ -88,6 +91,7 @@ func TestMinorVersionUpgrade(t *testing.T) { DatabaseName: "test", PostgresVersion: &toVersion, Port: pointerTo(0), + PatroniPort: pointerTo(0), DatabaseUsers: []*controlplane.DatabaseUserSpec{ { Username: username, diff --git a/e2e/service_provisioning_test.go b/e2e/service_provisioning_test.go index 658c6d52..d1e5525e 100644 --- a/e2e/service_provisioning_test.go +++ b/e2e/service_provisioning_test.go @@ -19,6 +19,8 @@ import ( func TestProvisionMCPService(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -38,7 +40,8 @@ func TestProvisionMCPService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -145,6 +148,8 @@ func TestProvisionMCPService(t *testing.T) { func TestProvisionMultiHostMCPService(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] host2 := fixture.HostIDs()[1] host3 := fixture.HostIDs()[2] @@ -166,7 +171,8 @@ func TestProvisionMultiHostMCPService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -225,6 +231,8 @@ func TestProvisionMultiHostMCPService(t *testing.T) { func TestUpdateDatabaseAddService(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] host2 := fixture.HostIDs()[1] @@ -245,7 +253,8 @@ func TestUpdateDatabaseAddService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -272,7 +281,8 @@ func TestUpdateDatabaseAddService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -321,6 +331,8 @@ func TestUpdateDatabaseAddService(t *testing.T) { func TestProvisionMCPServiceUnsupportedVersion(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -339,7 +351,8 @@ func TestProvisionMCPServiceUnsupportedVersion(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -424,6 +437,8 @@ func TestProvisionMCPServiceUnsupportedVersion(t *testing.T) { func TestProvisionMCPServiceRecovery(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -444,7 +459,8 @@ func TestProvisionMCPServiceRecovery(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -532,7 +548,8 @@ func TestProvisionMCPServiceRecovery(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -624,6 +641,8 @@ func TestProvisionMCPServiceRecovery(t *testing.T) { func TestUpdateDatabaseServiceStable(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -642,7 +661,8 @@ func TestUpdateDatabaseServiceStable(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -708,7 +728,8 @@ func TestUpdateDatabaseServiceStable(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -764,6 +785,8 @@ func TestUpdateDatabaseServiceStable(t *testing.T) { func TestUpdateMCPServiceConfig(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -782,7 +805,8 @@ func TestUpdateMCPServiceConfig(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -845,7 +869,8 @@ func TestUpdateMCPServiceConfig(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -905,6 +930,8 @@ func TestUpdateMCPServiceConfig(t *testing.T) { func TestUpdateDatabaseRemoveService(t *testing.T) { t.Parallel() + fixture.SkipIfServicesUnsupported(t) + host1 := fixture.HostIDs()[0] ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -924,7 +951,8 @@ func TestUpdateDatabaseRemoveService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", @@ -966,7 +994,8 @@ func TestUpdateDatabaseRemoveService(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", diff --git a/e2e/switchover_test.go b/e2e/switchover_test.go index a1b1b979..ef5a74ab 100644 --- a/e2e/switchover_test.go +++ b/e2e/switchover_test.go @@ -35,7 +35,8 @@ func TestSwitchoverScenarios(t *testing.T) { Attributes: []string{"LOGIN", "SUPERUSER"}, }, }, - Port: pointerTo(0), + Port: pointerTo(0), + PatroniPort: pointerTo(0), Nodes: []*controlplane.DatabaseNodeSpec{ { Name: "n1", diff --git a/e2e/whole_cluster_test.go b/e2e/whole_cluster_test.go index 6d0b31d3..bff4054c 100644 --- a/e2e/whole_cluster_test.go +++ b/e2e/whole_cluster_test.go @@ -37,6 +37,7 @@ func TestWholeCluster(t *testing.T) { Spec: &controlplane.DatabaseSpec{ DatabaseName: "test", Port: pointerTo(0), + PatroniPort: pointerTo(0), DatabaseUsers: []*controlplane.DatabaseUserSpec{ { Username: username, diff --git a/go.mod b/go.mod index 9d34f1ed..5aaa87ed 100644 --- a/go.mod +++ b/go.mod @@ -7,11 +7,13 @@ toolchain go1.25.8 require ( github.com/alessio/shellescape v1.4.2 github.com/cilium/ipam v0.0.0-20230509084518-fd66eae7909b + github.com/coreos/go-systemd/v22 v22.5.0 github.com/cschleiden/go-workflows v0.19.0 github.com/docker/docker v27.1.1+incompatible github.com/docker/go-connections v0.5.0 github.com/dustin/go-humanize v1.0.1 github.com/eclipse/paho.golang v0.22.0 + github.com/elastic/gosigar v0.14.3 github.com/goccy/go-yaml v1.18.0 github.com/google/uuid v1.6.0 github.com/jackc/pgerrcode v0.0.0-20250907135507-afb5586c32a6 @@ -58,7 +60,6 @@ require ( github.com/containerd/log v0.1.0 // indirect github.com/containerd/platforms v0.2.1 // indirect github.com/coreos/go-semver v0.3.1 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/dockercfg v0.3.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dimfeld/httppath v0.0.0-20170720192232-ee938bf73598 // indirect @@ -74,6 +75,7 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/gohugoio/hashstructure v0.6.0 // indirect github.com/golang-jwt/jwt/v5 v5.2.2 // indirect diff --git a/go.sum b/go.sum index e9dda150..dca35205 100644 --- a/go.sum +++ b/go.sum @@ -103,6 +103,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eclipse/paho.golang v0.22.0 h1:JhhUngr8TBlyUZDZw/L6WVayPi9qmSmdWeki48i5AVE= github.com/eclipse/paho.golang v0.22.0/go.mod h1:9ZiYJ93iEfGRJri8tErNeStPKLXIGBHiqbHV74t5pqI= +github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= +github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -142,6 +144,8 @@ github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlnd github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/gohugoio/hashstructure v0.6.0 h1:7wMB/2CfXoThFYhdWRGv3u3rUM761Cq29CxUW+NltUg= @@ -589,6 +593,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/hack/dev-env.zsh b/hack/dev-env.zsh index 1f35db76..ba5caa6d 100644 --- a/hack/dev-env.zsh +++ b/hack/dev-env.zsh @@ -83,7 +83,7 @@ _choose-scope() { _choose-host() { local host_choice=$(restish host-1 list-hosts \ - | jq -c '.hosts[]? | { id, state: .status.state, hostname, ipv4_address }' \ + | jq -c '.hosts[]? | { id, state: .status.state, client_addresses, peer_addresses }' \ | sk --preview 'echo {} | jq') if [[ -z "${host_choice}" ]]; then @@ -217,7 +217,7 @@ _psql-local() { fi local ip_addr=$(<<<"${conn_info}" \ - jq -r '.ipv4_address') + jq -r '.addresses[0]') local port=$(<<<"${conn_info}" \ jq -r '.port') @@ -242,6 +242,23 @@ use-compose() { http://localhost:3005 \ } +use-dev-lima() { + export CP_ENV=dev-lima + + _update-restish-config \ + http://localhost:3000 \ + http://localhost:3001 \ + http://localhost:3002 \ + http://localhost:3003 \ + http://localhost:3004 \ + http://localhost:3005 \ + + local i + for ((i = 1; i <= 6; i++ )); do + alias cp${i}-ssh="ssh -F ${HOME}/.lima/control-plane-dev-${i}/ssh.config -t lima-control-plane-dev-${i}" + done +} + use-lima() { export CP_ENV=lima @@ -297,9 +314,17 @@ cp-psql() { local instance_id="${o_instance_id[-1]}" local username="${o_username[-1]}" - local method="${o_method[-1]:-docker}" + local method="${o_method[-1]}" local database_id + if [[ -z "${method}" ]]; then + if [[ "${CP_ENV}" == "dev-lima" ]]; then + method=local + else + method=docker + fi + fi + if [[ -z "${instance_id}" ]]; then local instance=$(_choose-instance) @@ -597,6 +622,35 @@ cp-follow-task() { echo "${scope} entity ${entity_id} task ${task_id} ${task_status}" } +cp-etcdctl() { + local data_dir + case ${CP_ENV} in + compose) + data_dir="${_cp_dir}/docker/control-plane-dev/data" + ;; + dev-lima) + data_dir="${_cp_dir}/lima/data" + ;; + *) + echo "cannot use cp-etcdctl with environment ${CP_ENV}" + return 1 + ;; + esac + + local host_1_data="${data_dir}/host-1" + local host_1_certs="${host_1_data}/certificates" + local host_1_cfg="${host_1_data}/generated.config.json" + + etcdctl \ + --endpoints=https://localhost:2379 \ + --cacert "${host_1_certs}/ca.crt" \ + --cert "${host_1_certs}/etcd-user.crt" \ + --key "${host_1_certs}/etcd-user.key" \ + --user $(jq -r '.etcd_username' "${host_1_cfg}") \ + --password $(jq -r '.etcd_password' "${host_1_cfg}") \ + $@ +} + ######### # setup # ######### @@ -608,18 +662,6 @@ use-compose # static aliases # ################## -_host_1_data="${_cp_dir}/docker/control-plane-dev/data/host-1" -_host_1_certs="${_host_1_data}/certificates" -_host_1_cfg="${_host_1_data}/generated.config.json" - -alias cp-etcdctl="etcdctl \ - --endpoints=https://localhost:2379 \ - --cacert '${_host_1_certs}/ca.crt' \ - --cert '${_host_1_certs}/etcd-user.crt' \ - --key '${_host_1_certs}/etcd-user.key' \ - --user \$(jq -r '.etcd_username' '${_host_1_cfg}') \ - --password \$(jq -r '.etcd_password' '${_host_1_cfg}')" - alias cp-docker-compose="WORKSPACE_DIR=${_cp_dir} \ DEBUG=\${DEBUG:-0} \ LOG_LEVEL=\${LOG_LEVEL:-info} \ diff --git a/hack/pgedge-cp-env.plugin.zsh b/hack/pgedge-cp-env.plugin.zsh index 6fd0c6ca..6ca00a0e 100644 --- a/hack/pgedge-cp-env.plugin.zsh +++ b/hack/pgedge-cp-env.plugin.zsh @@ -6,6 +6,7 @@ ZSH_THEME_PGEDGE_CP_ENV_PROMPT_PREFIX='cp-env:(' ZSH_THEME_PGEDGE_CP_ENV_PROMPT_SUFFIX=')' ZSH_THEME_PGEDGE_CP_ENV_PROMPT_COMPOSE_STYLE="%{$fg_bold[green]%}" +ZSH_THEME_PGEDGE_CP_ENV_PROMPT_DEV_LIMA_STYLE="%{$fg_bold[green]%}" ZSH_THEME_PGEDGE_CP_ENV_PROMPT_LIMA_STYLE="%{$fg_bold[cyan]%}" ZSH_THEME_PGEDGE_CP_ENV_PROMPT_EC2_STYLE="%{$fg_bold[yellow]%}" ZSH_THEME_PGEDGE_CP_ENV_PROMPT_OTHER_STYLE="%{$fg_bold[magenta]%}" @@ -25,6 +26,9 @@ pgedge_cp_env_prompt_info() { compose) style="${ZSH_THEME_PGEDGE_CP_ENV_PROMPT_COMPOSE_STYLE}" ;; + dev-lima) + style="${ZSH_THEME_PGEDGE_CP_ENV_PROMPT_DEV_LIMA_STYLE}" + ;; lima) style="${ZSH_THEME_PGEDGE_CP_ENV_PROMPT_LIMA_STYLE}" ;; diff --git a/lima/Makefile b/lima/Makefile new file mode 100644 index 00000000..ab803092 --- /dev/null +++ b/lima/Makefile @@ -0,0 +1,31 @@ +ansible_playbook=ansible-playbook \ + --extra-vars='@vars.yaml' + +.PHONY: deploy +deploy: + $(ansible_playbook) deploy.yaml + +.PHONY: teardown +teardown: + $(ansible_playbook) teardown.yaml + rm -rf ./data + +.PHONY: reset +reset: + $(ansible_playbook) stop-dbs.yaml + rm -rf ./data + +.PHONY: build +build: + GOOS=linux go build \ + -gcflags "all=-N -l" \ + -o ./pgedge-control-plane \ + $(shell pwd)/../server + +.PHONY: fix-clocks +fix-clocks: + ansible --become -i ./inventory.yaml all -m command -a 'chronyc -a makestep' + +.PHONY: run +run: build fix-clocks + LIMA_DIR=$(shell pwd) circusd ./circus.ini diff --git a/lima/ansible.cfg b/lima/ansible.cfg new file mode 100644 index 00000000..2bcd975c --- /dev/null +++ b/lima/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +localhost_warning = False +host_key_checking = False +inventory = inventory.yaml +interpreter_python = auto_silent diff --git a/lima/circus.ini b/lima/circus.ini new file mode 100644 index 00000000..183e43cd --- /dev/null +++ b/lima/circus.ini @@ -0,0 +1,25 @@ +[circus] + +[watcher:host-1] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-1/ssh.config -tt lima-control-plane-dev-1 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' + +[watcher:host-2] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-2/ssh.config -tt lima-control-plane-dev-2 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' + +[watcher:host-3] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-3/ssh.config -tt lima-control-plane-dev-3 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' + +[watcher:host-4] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-4/ssh.config -tt lima-control-plane-dev-4 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' + +[watcher:host-5] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-5/ssh.config -tt lima-control-plane-dev-5 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' + +[watcher:host-6] +cmd = ssh +args = -F $(CIRCUS.ENV.HOME)/.lima/control-plane-dev-6/ssh.config -tt lima-control-plane-dev-6 'sudo LIMA_DIR=$(CIRCUS.ENV.LIMA_DIR) $(CIRCUS.ENV.LIMA_DIR)/run.sh' diff --git a/lima/config.json b/lima/config.json new file mode 100644 index 00000000..ffd68716 --- /dev/null +++ b/lima/config.json @@ -0,0 +1,11 @@ +{ + "orchestrator": "systemd", + "profiling_enabled": true, + "client_addresses": ["127.0.0.1"], + "logging": { + "pretty": true, + "component_levels": { + "api_server": "error" + } + } +} diff --git a/lima/deploy.yaml b/lima/deploy.yaml new file mode 100644 index 00000000..3c6a1201 --- /dev/null +++ b/lima/deploy.yaml @@ -0,0 +1,18 @@ +--- +- name: Start VMs + hosts: localhost + become: false + roles: + - role: start_vms + +- name: Install prerequisites + hosts: all + become: true + roles: + - role: install_prerequisites + +- name: Write test configs + hosts: localhost + gather_facts: false + roles: + - role: generate_test_config diff --git a/lima/inventory.yaml b/lima/inventory.yaml new file mode 100644 index 00000000..dba1b098 --- /dev/null +++ b/lima/inventory.yaml @@ -0,0 +1,21 @@ +--- +control_plane: + hosts: + control-plane-dev-1: + ansible_host: lima-control-plane-dev-1 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-1/ssh.config" + control-plane-dev-2: + ansible_host: lima-control-plane-dev-2 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-2/ssh.config" + control-plane-dev-3: + ansible_host: lima-control-plane-dev-3 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-3/ssh.config" + control-plane-dev-4: + ansible_host: lima-control-plane-dev-4 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-4/ssh.config" + control-plane-dev-5: + ansible_host: lima-control-plane-dev-5 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-5/ssh.config" + control-plane-dev-6: + ansible_host: lima-control-plane-dev-6 + ansible_ssh_common_args: "-F {{ lookup('env', 'HOME') }}/.lima/control-plane-dev-6/ssh.config" diff --git a/lima/lima-template.yaml b/lima/lima-template.yaml new file mode 100644 index 00000000..4d1e922a --- /dev/null +++ b/lima/lima-template.yaml @@ -0,0 +1,16 @@ +--- +minimumLimaVersion: 1.1.0 + +base: template://rocky-9 + +cpus: 4 +memory: 8GiB +disk: 20GiB +containerd: + system: false + user: false +networks: +- lima: user-v2 +mounts: +- location: "~" + writable: true diff --git a/lima/roles/generate_test_config/tasks/main.yaml b/lima/roles/generate_test_config/tasks/main.yaml new file mode 100644 index 00000000..b14bf1b6 --- /dev/null +++ b/lima/roles/generate_test_config/tasks/main.yaml @@ -0,0 +1,10 @@ +--- +- name: Create test_configs directory + ansible.builtin.file: + path: ../e2e/fixtures/outputs + state: directory + +- name: Generate test_config + ansible.builtin.template: + src: test_config.yaml.tmpl + dest: ../e2e/fixtures/outputs/dev-lima.test_config.yaml diff --git a/lima/roles/generate_test_config/templates/test_config.yaml.tmpl b/lima/roles/generate_test_config/templates/test_config.yaml.tmpl new file mode 100644 index 00000000..c33a4044 --- /dev/null +++ b/lima/roles/generate_test_config/templates/test_config.yaml.tmpl @@ -0,0 +1,8 @@ +--- +hosts: +{% for machine in machines %} + {{ machine.host_id }}: + external_ip: 127.0.0.1 + port: {{ machine.http_port }} + ssh_command: ssh {{ hostvars[machine.name].ansible_ssh_common_args }} {{ hostvars[machine.name].ansible_host }} +{% endfor %} diff --git a/lima/roles/install_prerequisites/tasks/main.yaml b/lima/roles/install_prerequisites/tasks/main.yaml new file mode 100644 index 00000000..0b1133a8 --- /dev/null +++ b/lima/roles/install_prerequisites/tasks/main.yaml @@ -0,0 +1,85 @@ +--- +- name: Install epel-release + ansible.builtin.package: + name: '{{ item }}' + state: present + with_items: + - epel-release + - dnf +- name: Enable crb + community.general.dnf_config_manager: + name: crb + state: enabled +- name: Install pgEdge repository + ansible.builtin.package: + name: https://dnf.pgedge.com/reporpm/pgedge-release-latest.noarch.rpm + state: present + disable_gpg_check: true +- name: Install prerequisites + ansible.builtin.package: + name: '{{ item }}' + state: present + with_items: + - python3-pip + - pgedge-postgresql18 + - pgedge-postgresql17 + - pgedge-postgresql16 + - pgedge-spock50_18 + - pgedge-spock50_17 + - pgedge-spock50_16 + - pgedge-snowflake_18 + - pgedge-snowflake_17 + - pgedge-snowflake_16 + - pgedge-lolor_18 + - pgedge-lolor_17 + - pgedge-lolor_16 + - pgedge-postgresql18-contrib + - pgedge-postgresql17-contrib + - pgedge-postgresql16-contrib + - pgedge-pgbackrest + - pgedge-python3-psycopg2 + - which + - wget + - git +- name: Install python modules + pip: + state: present + name: + - "patroni[etcd,jsonlogger]==4.1.0" +- name: Install etcdctl and etcdutl + ansible.builtin.unarchive: + src: "{{ etcd_download_url }}" + dest: /usr/bin + remote_src: yes + extra_opts: + - "--strip-components=1" + - "{{ etcd_archive_name }}/etcdctl" + - "{{ etcd_archive_name }}/etcdutl" + creates: /usr/bin/etcdctl +- name: Install Go + ansible.builtin.unarchive: + src: "{{ go_download_url }}" + dest: /usr/local + remote_src: yes + creates: /usr/local/go +- name: Add profile environment + ansible.builtin.copy: + content: | + export PATH=$PATH:/usr/local/go/bin + dest: /etc/profile.d/control-plane-dev.sh + mode: '0755' +- name: Allow large time jumps in chronyd + ansible.builtin.lineinfile: + path: /etc/chrony.conf + regexp: '^makestep' + line: 'makestep 1 -1' + register: chronycfg +- name: Restart chronyd + ansible.builtin.systemd_service: + name: chronyd + state: restarted + when: chronycfg.changed +- name: Install delve debugger + ansible.builtin.command: /usr/local/go/bin/go install github.com/go-delve/delve/cmd/dlv@latest +- name: Fix clocks + ansible.builtin.command: chronyc -a makestep diff --git a/lima/roles/install_prerequisites/vars/main.yaml b/lima/roles/install_prerequisites/vars/main.yaml new file mode 100644 index 00000000..05da23aa --- /dev/null +++ b/lima/roles/install_prerequisites/vars/main.yaml @@ -0,0 +1,12 @@ +--- +_arch_transform: + amd64: amd64 + x86_64: amd64 + aarch64: arm64 + arm64: arm64 +etcd_version: v3.6.5 +etcd_archive_name: etcd-{{ etcd_version }}-linux-{{ _arch_transform[ansible_architecture] }} +etcd_download_url: https://github.com/etcd-io/etcd/releases/download/{{ etcd_version }}/{{ etcd_archive_name }}.tar.gz +go_version: 1.25.5 +go_archive_name: go{{ go_version }}.linux-{{ _arch_transform[ansible_architecture] }}.tar.gz +go_download_url: https://dl.google.com/go/{{ go_archive_name }} diff --git a/lima/roles/start_vms/tasks/main.yaml b/lima/roles/start_vms/tasks/main.yaml new file mode 100644 index 00000000..344a96ff --- /dev/null +++ b/lima/roles/start_vms/tasks/main.yaml @@ -0,0 +1,12 @@ +--- +- include_tasks: ../../../../e2e/fixtures/roles/lima_common/tasks/list.yaml + +- name: Create hosts + loop: '{{ machines }}' + command: limactl start --yes --name={{ item.name }} lima-template.yaml + when: item.name not in lima_vms + +- name: Start stopped hosts + loop: '{{ machines }}' + command: limactl start --name={{ item.name }} + when: item.name in lima_vms and lima_vms[item.name].status != 'Running' diff --git a/lima/roles/stop_dbs/tasks/main.yaml b/lima/roles/stop_dbs/tasks/main.yaml new file mode 100644 index 00000000..dd6e8dc5 --- /dev/null +++ b/lima/roles/stop_dbs/tasks/main.yaml @@ -0,0 +1,39 @@ +--- +- name: Reset failed services + ansible.builtin.command: systemctl reset-failed + changed_when: false + +- name: Populate service facts + ansible.builtin.service_facts: + +- name: Stop and disable all patroni services + loop: "{{ ansible_facts['services'].values() + | selectattr('source', 'equalto', 'systemd') + | rejectattr('status', 'equalto', 'not-found') + | rejectattr('status', 'equalto', 'bad') + | selectattr('name', 'match', '^patroni-') + | list }}" + ansible.builtin.systemd_service: + name: '{{ item.name }}' + state: stopped + enabled: false + +- name: Find leftover patroni unit files + ansible.builtin.find: + paths: /etc/systemd/system + patterns: 'patroni-*.service' + + register: files_to_delete + +- name: Delete leftover patroni unit files + ansible.builtin.file: + path: '{{ item.path }}' + state: absent + loop: '{{ files_to_delete.files }}' + loop_control: + label: '{{ item.path }}' + when: files_to_delete.matched > 0 + +- name: Reload systemd + ansible.builtin.command: systemctl daemon-reload + changed_when: false diff --git a/lima/roles/teardown_vms/tasks/main.yaml b/lima/roles/teardown_vms/tasks/main.yaml new file mode 100644 index 00000000..d3ecf503 --- /dev/null +++ b/lima/roles/teardown_vms/tasks/main.yaml @@ -0,0 +1,12 @@ +--- +- include_tasks: ../../../../e2e/fixtures/roles/lima_common/tasks/list.yaml + +- name: Stop hosts + loop: '{{ machines }}' + command: limactl stop --yes {{ item.name }} + when: item.name in lima_vms and lima_vms[item.name].status == 'Running' + +- name: Remove hosts + loop: '{{ machines }}' + command: limactl remove --yes {{ item.name }} + when: item.name in lima_vms diff --git a/lima/run.sh b/lima/run.sh new file mode 100755 index 00000000..c8762773 --- /dev/null +++ b/lima/run.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +set_env() { + case ${HOSTNAME} in + lima-control-plane-dev-1) + export PGEDGE_HOST_ID=host-1 + ;; + lima-control-plane-dev-2) + export PGEDGE_HOST_ID=host-2 + export PGEDGE_HTTP__PORT=3001 + export PGEDGE_ETCD_SERVER__PEER_PORT=2480 + export PGEDGE_ETCD_SERVER__CLIENT_PORT=2479 + ;; + lima-control-plane-dev-3) + export PGEDGE_HOST_ID=host-3 + export PGEDGE_HTTP__PORT=3002 + export PGEDGE_ETCD_SERVER__PEER_PORT=2580 + export PGEDGE_ETCD_SERVER__CLIENT_PORT=2579 + ;; + lima-control-plane-dev-4) + export PGEDGE_HOST_ID=host-4 + export PGEDGE_HTTP__PORT=3003 + export PGEDGE_ETCD_MODE=client + ;; + lima-control-plane-dev-5) + export PGEDGE_HOST_ID=host-5 + export PGEDGE_HTTP__PORT=3004 + export PGEDGE_ETCD_MODE=client + ;; + lima-control-plane-dev-6) + export PGEDGE_HOST_ID=host-6 + export PGEDGE_HTTP__PORT=3005 + export PGEDGE_ETCD_MODE=client + ;; + *) + echo "unrecognized hostname ${HOSTNAME}" + exit 1 + ;; + esac + + export PGEDGE_DATA_DIR=${LIMA_DIR}/data/${PGEDGE_HOST_ID} + export PGEDGE_SYSTEMD__INSTANCE_DATA_DIR=${LIMA_DIR}/data/${PGEDGE_HOST_ID}/instances + +} + +if [[ $(whoami) != "root" ]]; then + echo "this script must be run as root" + exit 1 +fi + +# Copy the binary to another location so that we can safely rebuild on the host +# without disrupting the running server. +cp ${LIMA_DIR}/pgedge-control-plane /usr/sbin + +# Set environment variable configuration. +set_env + +# The sed prefixes each output line with the host ID. +/usr/sbin/pgedge-control-plane run \ + --config-path ${LIMA_DIR}/config.json \ + 2>&1 | sed "s/^/[${PGEDGE_HOST_ID}] /" diff --git a/lima/stop-dbs.yaml b/lima/stop-dbs.yaml new file mode 100644 index 00000000..98070e0f --- /dev/null +++ b/lima/stop-dbs.yaml @@ -0,0 +1,6 @@ +--- +- name: Stop DBs + hosts: all + become: true + roles: + - role: stop_dbs diff --git a/lima/teardown.yaml b/lima/teardown.yaml new file mode 100644 index 00000000..3089f2dc --- /dev/null +++ b/lima/teardown.yaml @@ -0,0 +1,6 @@ +--- +- name: Teardown VMs + hosts: localhost + become: false + roles: + - role: teardown_vms diff --git a/lima/vars.yaml b/lima/vars.yaml new file mode 100644 index 00000000..0545649d --- /dev/null +++ b/lima/vars.yaml @@ -0,0 +1,20 @@ +--- +machines: + - name: control-plane-dev-1 + http_port: 3000 + host_id: host-1 + - name: control-plane-dev-2 + http_port: 3001 + host_id: host-2 + - name: control-plane-dev-3 + http_port: 3002 + host_id: host-3 + - name: control-plane-dev-4 + http_port: 3003 + host_id: host-4 + - name: control-plane-dev-5 + http_port: 3004 + host_id: host-5 + - name: control-plane-dev-6 + http_port: 3005 + host_id: host-6 diff --git a/server/cmd/root.go b/server/cmd/root.go index 65a37498..aeaee4f7 100644 --- a/server/cmd/root.go +++ b/server/cmd/root.go @@ -24,6 +24,7 @@ import ( "github.com/pgEdge/control-plane/server/internal/orchestrator" "github.com/pgEdge/control-plane/server/internal/orchestrator/common" "github.com/pgEdge/control-plane/server/internal/orchestrator/swarm" + "github.com/pgEdge/control-plane/server/internal/orchestrator/systemd" "github.com/pgEdge/control-plane/server/internal/ports" "github.com/pgEdge/control-plane/server/internal/resource" "github.com/pgEdge/control-plane/server/internal/resource/migrations" @@ -100,6 +101,7 @@ func newRootCmd(i *do.Injector) *cobra.Command { scheduler.RegisterResourceTypes(registry) common.RegisterResourceTypes(registry) swarm.RegisterResourceTypes(registry) + systemd.RegisterResourceTypes(registry) if err := orchestrator.Provide(i); err != nil { return fmt.Errorf("failed to register orchestrator provider: %w", err) diff --git a/server/internal/api/apiv1/convert.go b/server/internal/api/apiv1/convert.go index 71c46b2f..3541402d 100644 --- a/server/internal/api/apiv1/convert.go +++ b/server/internal/api/apiv1/convert.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" api "github.com/pgEdge/control-plane/api/apiv1/gen/control_plane" + "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/host" "github.com/pgEdge/control-plane/server/internal/pgbackrest" @@ -709,7 +710,11 @@ func apiToServiceSpecs(apiServices []*api.ServiceSpec) ([]*database.ServiceSpec, return services, nil } -func apiToDatabaseSpec(id, tID *api.Identifier, apiSpec *api.DatabaseSpec) (*database.Spec, error) { +func apiToDatabaseSpec( + orchestrator config.Orchestrator, + id, tID *api.Identifier, + apiSpec *api.DatabaseSpec, +) (*database.Spec, error) { var databaseID string var err error if id != nil { @@ -728,7 +733,7 @@ func apiToDatabaseSpec(id, tID *api.Identifier, apiSpec *api.DatabaseSpec) (*dat } tenantID = &t } - if err := validateDatabaseSpec(apiSpec); err != nil { + if err := validateDatabaseSpec(orchestrator, apiSpec); err != nil { return nil, err } diff --git a/server/internal/api/apiv1/post_init_handlers.go b/server/internal/api/apiv1/post_init_handlers.go index ade360b7..536fe084 100644 --- a/server/internal/api/apiv1/post_init_handlers.go +++ b/server/internal/api/apiv1/post_init_handlers.go @@ -298,7 +298,7 @@ func (s *PostInitHandlers) ListDatabases(ctx context.Context) (*api.ListDatabase } func (s *PostInitHandlers) CreateDatabase(ctx context.Context, req *api.CreateDatabaseRequest) (*api.CreateDatabaseResponse, error) { - spec, err := apiToDatabaseSpec(req.ID, req.TenantID, req.Spec) + spec, err := apiToDatabaseSpec(s.cfg.Orchestrator, req.ID, req.TenantID, req.Spec) if err != nil { return nil, makeInvalidInputErr(err) } @@ -353,7 +353,7 @@ func (s *PostInitHandlers) GetDatabase(ctx context.Context, req *api.GetDatabase } func (s *PostInitHandlers) UpdateDatabase(ctx context.Context, req *api.UpdateDatabasePayload) (*api.UpdateDatabaseResponse, error) { - spec, err := apiToDatabaseSpec(&req.DatabaseID, req.Request.TenantID, req.Request.Spec) + spec, err := apiToDatabaseSpec(s.cfg.Orchestrator, &req.DatabaseID, req.Request.TenantID, req.Request.Spec) if err != nil { return nil, makeInvalidInputErr(err) } diff --git a/server/internal/api/apiv1/validate.go b/server/internal/api/apiv1/validate.go index fcff19e2..2037ea1e 100644 --- a/server/internal/api/apiv1/validate.go +++ b/server/internal/api/apiv1/validate.go @@ -10,6 +10,7 @@ import ( "strings" api "github.com/pgEdge/control-plane/api/apiv1/gen/control_plane" + "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/host" @@ -61,7 +62,7 @@ func appendPath(path []string, new ...string) []string { return append(slices.Clone(path), new...) } -func validateDatabaseSpec(spec *api.DatabaseSpec) error { +func validateDatabaseSpec(orchestrator config.Orchestrator, spec *api.DatabaseSpec) error { var errs []error errs = append(errs, validateCPUs(spec.Cpus, []string{"cpus"})...) @@ -90,7 +91,7 @@ func validateDatabaseSpec(spec *api.DatabaseSpec) error { } // Per-node validation (includes self-ref and restore vs source_node conflict) - errs = append(errs, validateNode(node, nodePath)...) + errs = append(errs, validateNode(orchestrator, spec, node, nodePath)...) } // Cross-node existence check for source_node @@ -208,7 +209,12 @@ func validateDatabaseUpdate(old *database.Spec, new *api.DatabaseSpec) error { return errors.Join(errs...) } -func validateNode(node *api.DatabaseNodeSpec, path []string) []error { +func validateNode( + orchestrator config.Orchestrator, + db *api.DatabaseSpec, + node *api.DatabaseNodeSpec, + path []string, +) []error { var errs []error cpusPath := appendPath(path, "cpus") @@ -258,6 +264,18 @@ func validateNode(node *api.DatabaseNodeSpec, path []string) []error { errs = append(errs, validateRestoreConfig(node.RestoreConfig, restoreConfigPath)...) } + switch orchestrator { + case config.OrchestratorSystemD: + if db.Port == nil && node.Port == nil { + portPath := appendPath(path, "port") + errs = append(errs, newValidationError(errors.New("port must be defined"), portPath)) + } + if db.PatroniPort == nil && node.PatroniPort == nil { + portPath := appendPath(path, "patroni_port") + errs = append(errs, newValidationError(errors.New("patroni_port must be defined"), portPath)) + } + } + // Validate orchestrator_opts (per-node) errs = append(errs, validateOrchestratorOpts(node.OrchestratorOpts, appendPath(path, "orchestrator_opts"))...) diff --git a/server/internal/api/apiv1/validate_test.go b/server/internal/api/apiv1/validate_test.go index a2603125..148fb152 100644 --- a/server/internal/api/apiv1/validate_test.go +++ b/server/internal/api/apiv1/validate_test.go @@ -5,6 +5,7 @@ import ( "testing" api "github.com/pgEdge/control-plane/api/apiv1/gen/control_plane" + "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/utils" @@ -408,12 +409,15 @@ func TestValidateBackupConfig(t *testing.T) { func TestValidateNode(t *testing.T) { for _, tc := range []struct { - name string - node *api.DatabaseNodeSpec - expected []string + name string + orchestrator config.Orchestrator + db *api.DatabaseSpec + node *api.DatabaseNodeSpec + expected []string }{ { - name: "valid minimal", + name: "valid minimal", + orchestrator: config.OrchestratorSwarm, node: &api.DatabaseNodeSpec{ HostIds: []api.Identifier{ api.Identifier("host-1"), @@ -421,7 +425,8 @@ func TestValidateNode(t *testing.T) { }, }, { - name: "valid all", + name: "valid all", + orchestrator: config.OrchestratorSwarm, node: &api.DatabaseNodeSpec{ Cpus: utils.PointerTo("16"), Memory: utils.PointerTo("64GiB"), @@ -448,7 +453,47 @@ func TestValidateNode(t *testing.T) { }, }, { - name: "invalid", + name: "valid minimal systemd", + orchestrator: config.OrchestratorSystemD, + db: &api.DatabaseSpec{ + Port: utils.PointerTo(5432), + PatroniPort: utils.PointerTo(8888), + }, + node: &api.DatabaseNodeSpec{ + HostIds: []api.Identifier{ + api.Identifier("host-1"), + }, + }, + }, + { + name: "valid minimal systemd with per-node", + orchestrator: config.OrchestratorSystemD, + db: &api.DatabaseSpec{}, + node: &api.DatabaseNodeSpec{ + Port: utils.PointerTo(5432), + PatroniPort: utils.PointerTo(8888), + HostIds: []api.Identifier{ + api.Identifier("host-1"), + }, + }, + }, + { + name: "invalid systemd", + orchestrator: config.OrchestratorSystemD, + db: &api.DatabaseSpec{}, + node: &api.DatabaseNodeSpec{ + HostIds: []api.Identifier{ + api.Identifier("host-1"), + }, + }, + expected: []string{ + "port: port must be defined", + "patroni_port: patroni_port must be defined", + }, + }, + { + name: "invalid", + orchestrator: config.OrchestratorSwarm, node: &api.DatabaseNodeSpec{ Cpus: utils.PointerTo("0.00001"), Memory: utils.PointerTo("%^&*"), @@ -484,7 +529,7 @@ func TestValidateNode(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - err := errors.Join(validateNode(tc.node, nil)...) + err := errors.Join(validateNode(tc.orchestrator, tc.db, tc.node, nil)...) if len(tc.expected) < 1 { assert.NoError(t, err) } else { @@ -797,7 +842,7 @@ func TestValidateDatabaseSpec(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - err := validateDatabaseSpec(tc.spec) + err := validateDatabaseSpec(config.OrchestratorSwarm, tc.spec) if len(tc.expected) < 1 { assert.NoError(t, err) } else { diff --git a/server/internal/app/app.go b/server/internal/app/app.go index 75ceb42f..a2ce21cb 100644 --- a/server/internal/app/app.go +++ b/server/internal/app/app.go @@ -13,11 +13,11 @@ import ( "github.com/pgEdge/control-plane/server/internal/api" "github.com/pgEdge/control-plane/server/internal/certificates" "github.com/pgEdge/control-plane/server/internal/config" - "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/etcd" "github.com/pgEdge/control-plane/server/internal/host" "github.com/pgEdge/control-plane/server/internal/migrate" "github.com/pgEdge/control-plane/server/internal/monitor" + "github.com/pgEdge/control-plane/server/internal/orchestrator" "github.com/pgEdge/control-plane/server/internal/resource" "github.com/pgEdge/control-plane/server/internal/scheduler" "github.com/pgEdge/control-plane/server/internal/workflows" @@ -27,11 +27,6 @@ type ErrorProducer interface { Error() <-chan error } -type Orchestrator interface { - host.Orchestrator - database.Orchestrator -} - type App struct { i *do.Injector cfg config.Config @@ -166,6 +161,14 @@ func (a *App) runInitialized(parentCtx context.Context) error { return handleError(fmt.Errorf("failed to start certificate service: %w", err)) } + orch, err := do.Invoke[orchestrator.Orchestrator](a.i) + if err != nil { + return handleError(fmt.Errorf("failed to initialize orchestrator: %w", err)) + } + if err := orch.Start(a.serviceCtx); err != nil { + return handleError(fmt.Errorf("failed to start orchestrator: %w", err)) + } + hostSvc, err := do.Invoke[*host.Service](a.i) if err != nil { return handleError(fmt.Errorf("failed to initialize host service: %w", err)) diff --git a/server/internal/config/config.go b/server/internal/config/config.go index beea4e9c..2b67b584 100644 --- a/server/internal/config/config.go +++ b/server/internal/config/config.go @@ -112,6 +112,29 @@ var defaultDockerSwarm = DockerSwarm{ DatabaseNetworksSubnetBits: 26, } +type SystemD struct { + InstanceDataDir string `koanf:"instance_data_dir" json:"instance_data_dir,omitempty"` + PgBackRestPath string `koanf:"pgbackrest_path" json:"pgbackrest_path,omitempty"` + PatroniPath string `koanf:"patroni_path" json:"patroni_path,omitempty"` +} + +func (s SystemD) validate() []error { + var errs []error + if s.PgBackRestPath == "" { + errs = append(errs, errors.New("pgbackrest_path cannot be empty")) + } + if s.PatroniPath == "" { + errs = append(errs, errors.New("patroni_path cannot be empty")) + } + + return errs +} + +var defaultSystemD = SystemD{ + PgBackRestPath: "/usr/bin/pgbackrest", + PatroniPath: "/usr/local/bin/patroni", +} + type HTTP struct { Enabled bool `koanf:"enabled" json:"enabled,omitempty"` BindAddr string `koanf:"bind_addr" json:"bind_addr,omitempty"` @@ -184,7 +207,8 @@ var etcdClientDefault = EtcdClient{ type Orchestrator string const ( - OrchestratorSwarm Orchestrator = "swarm" + OrchestratorSwarm Orchestrator = "swarm" + OrchestratorSystemD Orchestrator = "systemd" ) type EtcdMode string @@ -240,6 +264,7 @@ type Config struct { TraefikEnabled bool `koanf:"traefik_enabled" json:"traefik_enabled,omitempty"` VectorEnabled bool `koanf:"vector_enabled" json:"vector_enabled,omitempty"` DockerSwarm DockerSwarm `koanf:"docker_swarm" json:"docker_swarm,omitzero"` + SystemD SystemD `koanf:"systemd" json:"systemd,omitzero"` DatabaseOwnerUID int `koanf:"database_owner_uid" json:"database_owner_uid,omitempty"` ProfilingEnabled bool `koanf:"profiling_enabled" json:"profiling_enabled,omitempty"` RandomPorts RandomPorts `koanf:"random_ports" json:"random_ports,omitzero"` @@ -340,7 +365,7 @@ func (c Config) Validate() error { for _, err := range c.RandomPorts.validate() { errs = append(errs, fmt.Errorf("random_ports.%w", err)) } - if c.Orchestrator != OrchestratorSwarm { + if c.Orchestrator != OrchestratorSwarm && c.Orchestrator != OrchestratorSystemD { errs = append(errs, fmt.Errorf("orchestrator: unsupported orchestrator %q", c.Orchestrator)) } switch c.Orchestrator { @@ -348,8 +373,12 @@ func (c Config) Validate() error { for _, err := range c.DockerSwarm.validate() { errs = append(errs, fmt.Errorf("docker_swarm.%w", err)) } + case OrchestratorSystemD: + for _, err := range c.SystemD.validate() { + errs = append(errs, fmt.Errorf("systemd.%w", err)) + } default: - errs = append(errs, fmt.Errorf("host_type: unsupported host type %q", c.Orchestrator)) + errs = append(errs, fmt.Errorf("orchestrator: unsupported orchestrator %q", c.Orchestrator)) } switch c.EtcdMode { case EtcdModeServer: @@ -390,6 +419,7 @@ func DefaultConfig() (Config, error) { EtcdServer: etcdServerDefault, EtcdClient: etcdClientDefault, DockerSwarm: defaultDockerSwarm, + SystemD: defaultSystemD, DatabaseOwnerUID: 26, RandomPorts: defaultRandomPorts, }, nil diff --git a/server/internal/database/instance.go b/server/internal/database/instance.go index 5ef4eda6..b4343088 100644 --- a/server/internal/database/instance.go +++ b/server/internal/database/instance.go @@ -71,15 +71,18 @@ func patroniToInstanceState(state *patroni.State) InstanceState { } type Instance struct { - InstanceID string `json:"instance_id"` - DatabaseID string `json:"database_id"` - HostID string `json:"host_id"` - NodeName string `json:"node_name"` - State InstanceState `json:"state"` - Status *InstanceStatus `json:"status"` - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` - Error string `json:"error,omitempty"` + InstanceID string `json:"instance_id"` + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + NodeName string `json:"node_name"` + State InstanceState `json:"state"` + Status *InstanceStatus `json:"status"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + Port *int `json:"port"` + PatroniPort *int `json:"patroni_port"` + PgEdgeVersion *ds.PgEdgeVersion `json:"pgedge_version"` + Error string `json:"error,omitempty"` } type SubscriptionStatus struct { @@ -113,14 +116,17 @@ func storedToInstance(instance *StoredInstance, status *StoredInstanceStatus) *I return nil } out := &Instance{ - InstanceID: instance.InstanceID, - DatabaseID: instance.DatabaseID, - HostID: instance.HostID, - NodeName: instance.NodeName, - State: instance.State, - CreatedAt: instance.CreatedAt, - UpdatedAt: instance.UpdateAt, - Error: instance.Error, + InstanceID: instance.InstanceID, + DatabaseID: instance.DatabaseID, + HostID: instance.HostID, + NodeName: instance.NodeName, + State: instance.State, + CreatedAt: instance.CreatedAt, + UpdatedAt: instance.UpdateAt, + Port: instance.Port, + PatroniPort: instance.PatroniPort, + PgEdgeVersion: instance.PgEdgeVersion, + Error: instance.Error, } if status != nil { out.Status = status.Status diff --git a/server/internal/database/instance_resource.go b/server/internal/database/instance_resource.go index 6d9ba199..91a5719d 100644 --- a/server/internal/database/instance_resource.go +++ b/server/internal/database/instance_resource.go @@ -162,7 +162,7 @@ func (r *InstanceResource) initializeInstance(ctx context.Context, rc *resource. r.PrimaryInstanceID = primaryInstanceID if r.Spec.InstanceID != r.PrimaryInstanceID { - err = r.updateInstanceState(ctx, rc, &InstanceUpdateOptions{State: InstanceStateAvailable}) + err = r.updateInstanceRecord(ctx, rc, &InstanceUpdateOptions{State: InstanceStateAvailable}) if err != nil { return r.recordError(ctx, rc, err) } @@ -213,7 +213,7 @@ func (r *InstanceResource) initializeInstance(ctx context.Context, rc *resource. return fmt.Errorf("failed to commit transaction: %w", err) } - err = r.updateInstanceState(ctx, rc, &InstanceUpdateOptions{State: InstanceStateAvailable}) + err = r.updateInstanceRecord(ctx, rc, &InstanceUpdateOptions{State: InstanceStateAvailable}) if err != nil { return r.recordError(ctx, rc, err) } @@ -221,7 +221,7 @@ func (r *InstanceResource) initializeInstance(ctx context.Context, rc *resource. return nil } -func (r *InstanceResource) updateInstanceState(ctx context.Context, rc *resource.Context, opts *InstanceUpdateOptions) error { +func (r *InstanceResource) updateInstanceRecord(ctx context.Context, rc *resource.Context, opts *InstanceUpdateOptions) error { svc, err := do.Invoke[*Service](rc.Injector) if err != nil { return err @@ -230,6 +230,9 @@ func (r *InstanceResource) updateInstanceState(ctx context.Context, rc *resource opts.DatabaseID = r.Spec.DatabaseID opts.HostID = r.Spec.HostID opts.NodeName = r.Spec.NodeName + opts.Port = r.Spec.Port + opts.PatroniPort = r.Spec.PatroniPort + opts.PgEdgeVersion = r.Spec.PgEdgeVersion err = svc.UpdateInstance(ctx, opts) if err != nil { return fmt.Errorf("failed to update instance state: %w", err) @@ -244,7 +247,7 @@ func (r *InstanceResource) recordError(ctx context.Context, rc *resource.Context return err } - err = r.updateInstanceState(ctx, rc, &InstanceUpdateOptions{ + err = r.updateInstanceRecord(ctx, rc, &InstanceUpdateOptions{ State: InstanceStateFailed, Error: cause.Error(), }) @@ -260,7 +263,11 @@ func (r *InstanceResource) updateConnectionInfo(ctx context.Context, rc *resourc if err != nil { return err } - connInfo, err := orch.GetInstanceConnectionInfo(ctx, r.Spec.DatabaseID, r.Spec.InstanceID) + connInfo, err := orch.GetInstanceConnectionInfo(ctx, + r.Spec.DatabaseID, r.Spec.InstanceID, + r.Spec.Port, r.Spec.PatroniPort, + r.Spec.PgEdgeVersion, + ) if err != nil { return fmt.Errorf("failed to get instance connection info: %w", err) } diff --git a/server/internal/database/instance_store.go b/server/internal/database/instance_store.go index 9e0a33b8..047af123 100644 --- a/server/internal/database/instance_store.go +++ b/server/internal/database/instance_store.go @@ -5,29 +5,36 @@ import ( clientv3 "go.etcd.io/etcd/client/v3" + "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/storage" ) type StoredInstance struct { storage.StoredValue - InstanceID string `json:"instance_id"` - DatabaseID string `json:"database_id"` - HostID string `json:"host_id"` - NodeName string `json:"node_name"` - State InstanceState `json:"state"` - CreatedAt time.Time `json:"created_at"` - UpdateAt time.Time `json:"updated_at"` - Error string `json:"error,omitempty"` + InstanceID string `json:"instance_id"` + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + NodeName string `json:"node_name"` + State InstanceState `json:"state"` + CreatedAt time.Time `json:"created_at"` + UpdateAt time.Time `json:"updated_at"` + Port *int `json:"port"` + PatroniPort *int `json:"patroni_port"` + PgEdgeVersion *ds.PgEdgeVersion `json:"pgedge_version"` + Error string `json:"error,omitempty"` } type InstanceUpdateOptions struct { - InstanceID string `json:"instance_id"` - DatabaseID string `json:"database_id"` - HostID string `json:"host_id"` - NodeName string `json:"node_name"` - State InstanceState `json:"state"` - Error string `json:"error,omitempty"` - Now time.Time `json:"now"` + InstanceID string `json:"instance_id"` + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + NodeName string `json:"node_name"` + State InstanceState `json:"state"` + Port *int `json:"port"` + PatroniPort *int `json:"patroni_port"` + PgEdgeVersion *ds.PgEdgeVersion `json:"pgedge_version"` + Error string `json:"error,omitempty"` + Now time.Time `json:"now"` } func (o *InstanceUpdateOptions) now() time.Time { @@ -40,18 +47,47 @@ func (o *InstanceUpdateOptions) now() time.Time { func NewStoredInstance(opts *InstanceUpdateOptions) *StoredInstance { now := opts.now() return &StoredInstance{ - InstanceID: opts.InstanceID, - DatabaseID: opts.DatabaseID, - HostID: opts.HostID, - NodeName: opts.NodeName, - State: opts.State, - CreatedAt: now, - UpdateAt: now, - Error: opts.Error, + InstanceID: opts.InstanceID, + DatabaseID: opts.DatabaseID, + HostID: opts.HostID, + NodeName: opts.NodeName, + State: opts.State, + Port: opts.Port, + PatroniPort: opts.PatroniPort, + PgEdgeVersion: opts.PgEdgeVersion, + CreatedAt: now, + UpdateAt: now, + Error: opts.Error, } } func (i *StoredInstance) Update(opts *InstanceUpdateOptions) { + i.Port = opts.Port + i.PatroniPort = opts.PatroniPort + i.PgEdgeVersion = opts.PgEdgeVersion + i.State = opts.State + i.Error = opts.Error + i.UpdateAt = opts.now() +} + +type InstanceStateUpdateOptions struct { + InstanceID string `json:"instance_id"` + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + NodeName string `json:"node_name"` + State InstanceState `json:"state"` + Error string `json:"error,omitempty"` + Now time.Time `json:"now"` +} + +func (o *InstanceStateUpdateOptions) now() time.Time { + if !o.Now.IsZero() { + return o.Now + } + return time.Now() +} + +func (i *StoredInstance) UpdateState(opts *InstanceStateUpdateOptions) { i.State = opts.State i.Error = opts.Error i.UpdateAt = opts.now() diff --git a/server/internal/database/orchestrator.go b/server/internal/database/orchestrator.go index 184a575a..1a8ff9f2 100644 --- a/server/internal/database/orchestrator.go +++ b/server/internal/database/orchestrator.go @@ -9,6 +9,7 @@ import ( "strconv" "github.com/google/uuid" + "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/pgbackrest" "github.com/pgEdge/control-plane/server/internal/postgres" "github.com/pgEdge/control-plane/server/internal/resource" @@ -157,9 +158,12 @@ type Orchestrator interface { GenerateInstanceResources(spec *InstanceSpec) (*InstanceResources, error) GenerateInstanceRestoreResources(spec *InstanceSpec, taskID uuid.UUID) (*InstanceResources, error) GenerateServiceInstanceResources(spec *ServiceInstanceSpec) (*ServiceInstanceResources, error) - GetInstanceConnectionInfo(ctx context.Context, databaseID, instanceID string) (*ConnectionInfo, error) + GetInstanceConnectionInfo(ctx context.Context, + databaseID, instanceID string, + postgresPort, patroniPort *int, + pgEdgeVersion *ds.PgEdgeVersion) (*ConnectionInfo, error) GetServiceInstanceStatus(ctx context.Context, serviceInstanceID string) (*ServiceInstanceStatus, error) - CreatePgBackRestBackup(ctx context.Context, w io.Writer, instanceID string, options *pgbackrest.BackupOptions) error + CreatePgBackRestBackup(ctx context.Context, w io.Writer, spec *InstanceSpec, options *pgbackrest.BackupOptions) error ExecuteInstanceCommand(ctx context.Context, w io.Writer, databaseID, instanceID string, args ...string) error ValidateInstanceSpecs(ctx context.Context, changes []*InstanceSpecChange) ([]*ValidationResult, error) StopInstance(ctx context.Context, instanceID string) error diff --git a/server/internal/database/service.go b/server/internal/database/service.go index e78a1eae..09ef2338 100644 --- a/server/internal/database/service.go +++ b/server/internal/database/service.go @@ -4,6 +4,8 @@ import ( "context" "errors" "fmt" + "io" + "strings" "time" "github.com/google/uuid" @@ -11,6 +13,7 @@ import ( "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/host" + "github.com/pgEdge/control-plane/server/internal/pgbackrest" "github.com/pgEdge/control-plane/server/internal/ports" "github.com/pgEdge/control-plane/server/internal/storage" "github.com/pgEdge/control-plane/server/internal/utils" @@ -292,6 +295,34 @@ func (s *Service) UpdateInstance(ctx context.Context, opts *InstanceUpdateOption return nil } +func (s *Service) UpdateInstanceState(ctx context.Context, opts *InstanceStateUpdateOptions) error { + instance, err := s.store.Instance. + GetByKey(opts.DatabaseID, opts.InstanceID). + Exec(ctx) + if errors.Is(err, storage.ErrNotFound) { + instance = NewStoredInstance(&InstanceUpdateOptions{ + InstanceID: opts.InstanceID, + DatabaseID: opts.DatabaseID, + HostID: opts.HostID, + NodeName: opts.NodeName, + State: opts.State, + }) + } else if err != nil { + return fmt.Errorf("failed to get stored instance: %w", err) + } else { + instance.UpdateState(opts) + } + + err = s.store.Instance. + Put(instance). + Exec(ctx) + if err != nil { + return fmt.Errorf("failed to update stored instance: %w", err) + } + + return nil +} + func (s *Service) DeleteInstance(ctx context.Context, databaseID, instanceID string) error { _, err := s.store.Instance. DeleteByKey(databaseID, instanceID). @@ -434,10 +465,20 @@ func (s *Service) GetAllServiceInstances(ctx context.Context) ([]*ServiceInstanc return serviceInstances, nil } +func (s *Service) CreatePgBackRestBackup(ctx context.Context, w io.Writer, databaseID, instanceID string, options *pgbackrest.BackupOptions) error { + instance, err := s.store.InstanceSpec. + GetByKey(databaseID, instanceID). + Exec(ctx) + if errors.Is(err, storage.ErrNotFound) { + return ErrInstanceNotFound + } else if err != nil { + return err + } + return s.orchestrator.CreatePgBackRestBackup(ctx, w, instance.Spec, options) +} + func (s *Service) GetInstanceConnectionInfo(ctx context.Context, databaseID, instanceID string) (*ConnectionInfo, error) { - // This serves as an existence check for now. We'll make more use of the - // stored instance when we add support for systemd. - _, err := s.store.Instance. + storedInstance, err := s.store.Instance. GetByKey(databaseID, instanceID). Exec(ctx) if errors.Is(err, storage.ErrNotFound) { @@ -445,7 +486,10 @@ func (s *Service) GetInstanceConnectionInfo(ctx context.Context, databaseID, ins } else if err != nil { return nil, fmt.Errorf("failed to get stored instance: %w", err) } - return s.orchestrator.GetInstanceConnectionInfo(ctx, databaseID, instanceID) + return s.orchestrator.GetInstanceConnectionInfo(ctx, + storedInstance.DatabaseID, storedInstance.InstanceID, + storedInstance.Port, storedInstance.PatroniPort, + storedInstance.PgEdgeVersion) } func (s *Service) InstanceCountForHost(ctx context.Context, hostID string) (int, error) { @@ -464,19 +508,35 @@ func (s *Service) InstanceCountForHost(ctx context.Context, hostID string) (int, return count, nil } +func validateHostIDs(hostIDs ds.Set[string], hosts []*host.Host) error { + found := ds.NewSet[string]() + for _, host := range hosts { + found.Add(host.ID) + } + notFound := hostIDs.Difference(found).ToSortedSlice(strings.Compare) + if len(notFound) != 0 { + return fmt.Errorf("got invalid host ids: %s", strings.Join(notFound, ", ")) + } + + return nil +} + func (s *Service) PopulateSpecDefaults(ctx context.Context, spec *Spec) error { - var hostIDs []string + hostIDs := ds.NewSet[string]() // First pass to build out hostID list for _, node := range spec.Nodes { - hostIDs = append(hostIDs, node.HostIDs...) + hostIDs.Add(node.HostIDs...) } for _, svc := range spec.Services { - hostIDs = append(hostIDs, svc.HostIDs...) + hostIDs.Add(svc.HostIDs...) } - hosts, err := s.hostSvc.GetHosts(ctx, hostIDs) + hosts, err := s.hostSvc.GetHosts(ctx, hostIDs.ToSlice()) if err != nil { return fmt.Errorf("failed to get hosts: %w", err) } + if err := validateHostIDs(hostIDs, hosts); err != nil { + return err + } defaultVersion, err := host.GreatestCommonDefaultVersion(hosts...) if err != nil { return fmt.Errorf("unable to find greatest common default version among specified hosts: %w", err) diff --git a/server/internal/database/spec.go b/server/internal/database/spec.go index 0c610f54..ffd466a9 100644 --- a/server/internal/database/spec.go +++ b/server/internal/database/spec.go @@ -8,6 +8,7 @@ import ( "math/big" "slices" "strconv" + "strings" "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/pgbackrest" @@ -523,6 +524,7 @@ type InstanceSpec struct { ClusterSize int `json:"cluster_size"` OrchestratorOpts *OrchestratorOpts `json:"orchestrator_opts,omitempty"` InPlaceRestore bool `json:"in_place_restore,omitempty"` + AllHostIDs []string `json:"all_host_ids"` // All host IDs in the database } func (s *InstanceSpec) CopySettingsFrom(current *InstanceSpec) { @@ -574,6 +576,7 @@ func (s *InstanceSpec) Clone() *InstanceSpec { PostgreSQLConf: maps.Clone(s.PostgreSQLConf), ClusterSize: s.ClusterSize, OrchestratorOpts: s.OrchestratorOpts.Clone(), + AllHostIDs: slices.Clone(s.AllHostIDs), } } @@ -601,6 +604,13 @@ func (s *Spec) NodeInstances() ([]*NodeInstances, error) { return nil, fmt.Errorf("failed to parse version from spec: %w", err) } + // First pass to gather host IDs + hostIDSet := ds.NewSet[string]() + for _, node := range s.Nodes { + hostIDSet.Add(node.HostIDs...) + } + allHostIDs := hostIDSet.ToSortedSlice(strings.Compare) + var owners []string for _, user := range s.DatabaseUsers { if user.DBOwner { @@ -660,6 +670,7 @@ func (s *Spec) NodeInstances() ([]*NodeInstances, error) { PostgreSQLConf: overridableMapValue(s.PostgreSQLConf, node.PostgreSQLConf), ClusterSize: clusterSize, OrchestratorOpts: overridableValue(s.OrchestratorOpts, node.OrchestratorOpts), + AllHostIDs: allHostIDs, } } diff --git a/server/internal/ds/versions.go b/server/internal/ds/versions.go index 0dd1efb2..21ef942e 100644 --- a/server/internal/ds/versions.go +++ b/server/internal/ds/versions.go @@ -56,6 +56,23 @@ func (v *Version) Major() (uint64, bool) { return v.Components[0], true } +func (v *Version) MajorString() (string, bool) { + major, ok := v.Major() + if !ok { + return "", false + } + return strconv.FormatUint(major, 10), true +} + +func (v *Version) MajorVersion() *Version { + if len(v.Components) == 0 { + return &Version{} + } + return &Version{ + Components: slices.Clone(v.Components[:1]), + } +} + func (v *Version) String() string { components := make([]string, len(v.Components)) for i, c := range v.Components { @@ -115,7 +132,7 @@ func (v *Version) Compare(other *Version) int { return slices.Compare(v.Components, other.Components) } -var semverRegexp = regexp.MustCompile(`^\d+(.\d+){0,2}$`) +var semverRegexp = regexp.MustCompile(`^\d+(\.\d+){0,2}$`) func MustParseVersion(s string) *Version { v, err := ParseVersion(s) diff --git a/server/internal/monitor/instance_monitor.go b/server/internal/monitor/instance_monitor.go index 3d0a9c58..57ef2505 100644 --- a/server/internal/monitor/instance_monitor.go +++ b/server/internal/monitor/instance_monitor.go @@ -101,11 +101,10 @@ func (m *InstanceMonitor) checkStatus(ctx context.Context) error { return m.updateInstanceErrStatus(ctx, status, err) } if currentInstance != nil && currentInstance.State != database.InstanceStateAvailable { - _ = m.dbSvc.UpdateInstance(ctx, &database.InstanceUpdateOptions{ + _ = m.dbSvc.UpdateInstanceState(ctx, &database.InstanceStateUpdateOptions{ InstanceID: m.instanceID, DatabaseID: m.databaseID, State: database.InstanceStateAvailable, - Error: "", }) } return m.updateInstanceStatus(ctx, status) diff --git a/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/enable_fast_basebackup.yaml b/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/enable_fast_basebackup.yaml index d80ac238..fc667ed1 100644 --- a/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/enable_fast_basebackup.yaml +++ b/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/enable_fast_basebackup.yaml @@ -89,12 +89,12 @@ postgresql: - local replication all trust - host replication all 127.0.0.1/32 trust - host replication all ::1/128 trust - - hostssl all pgedge,patroni_replicator 10.10.0.2 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.2 cert clientcert=verify-full - - hostssl all pgedge,patroni_replicator 10.10.0.3 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.3 cert clientcert=verify-full - - hostssl all pgedge,patroni_replicator 10.10.0.4 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.4 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.2/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.2/32 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.3/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.3/32 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.4/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.4/32 cert clientcert=verify-full - host all pgedge,patroni_replicator 0.0.0.0/0 reject - host all pgedge,patroni_replicator ::/0 reject - host all all 0.0.0.0/0 md5 diff --git a/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/minimal_systemd.yaml b/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/minimal_systemd.yaml index 727d626e..4059171a 100644 --- a/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/minimal_systemd.yaml +++ b/server/internal/orchestrator/common/golden_test/TestPatroniConfigGenerator/minimal_systemd.yaml @@ -89,12 +89,12 @@ postgresql: - local replication all trust - host replication all 127.0.0.1/32 trust - host replication all ::1/128 trust - - hostssl all pgedge,patroni_replicator 10.10.0.2 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.2 cert clientcert=verify-full - - hostssl all pgedge,patroni_replicator 10.10.0.3 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.3 cert clientcert=verify-full - - hostssl all pgedge,patroni_replicator 10.10.0.4 cert clientcert=verify-full - - hostssl replication pgedge,patroni_replicator 10.10.0.4 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.2/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.2/32 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.3/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.3/32 cert clientcert=verify-full + - hostssl all pgedge,patroni_replicator 10.10.0.4/32 cert clientcert=verify-full + - hostssl replication pgedge,patroni_replicator 10.10.0.4/32 cert clientcert=verify-full - host all pgedge,patroni_replicator 0.0.0.0/0 reject - host all pgedge,patroni_replicator ::/0 reject - host all all 0.0.0.0/0 md5 diff --git a/server/internal/orchestrator/provide.go b/server/internal/orchestrator/provide.go index 9adc38b9..235f47ed 100644 --- a/server/internal/orchestrator/provide.go +++ b/server/internal/orchestrator/provide.go @@ -1,20 +1,24 @@ package orchestrator import ( + "context" "fmt" + "github.com/samber/do" + "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/host" "github.com/pgEdge/control-plane/server/internal/orchestrator/swarm" + "github.com/pgEdge/control-plane/server/internal/orchestrator/systemd" "github.com/pgEdge/control-plane/server/internal/workflows" - "github.com/samber/do" ) type Orchestrator interface { host.Orchestrator database.Orchestrator workflows.Orchestrator + Start(context.Context) error } func Provide(i *do.Injector) error { @@ -26,6 +30,9 @@ func Provide(i *do.Injector) error { case config.OrchestratorSwarm: swarm.Provide(i) provideOrchestrator[*swarm.Orchestrator](i) + case config.OrchestratorSystemD: + systemd.Provide(i) + provideOrchestrator[*systemd.Orchestrator](i) default: return fmt.Errorf("unsupported orchestrator: %q", cfg.Orchestrator) } diff --git a/server/internal/orchestrator/swarm/orchestrator.go b/server/internal/orchestrator/swarm/orchestrator.go index 91ffd240..da8f764b 100644 --- a/server/internal/orchestrator/swarm/orchestrator.go +++ b/server/internal/orchestrator/swarm/orchestrator.go @@ -28,6 +28,7 @@ import ( "github.com/pgEdge/control-plane/server/internal/config" "github.com/pgEdge/control-plane/server/internal/database" "github.com/pgEdge/control-plane/server/internal/docker" + "github.com/pgEdge/control-plane/server/internal/ds" "github.com/pgEdge/control-plane/server/internal/filesystem" "github.com/pgEdge/control-plane/server/internal/healthcheck" "github.com/pgEdge/control-plane/server/internal/host" @@ -107,6 +108,10 @@ func NewOrchestrator( }, nil } +func (o *Orchestrator) Start(_ context.Context) error { + return nil +} + func (o *Orchestrator) PopulateHost(ctx context.Context, h *host.Host) error { h.CPUs = o.cpus h.MemBytes = o.memBytes @@ -579,7 +584,11 @@ func (o *Orchestrator) GenerateServiceInstanceResources(spec *database.ServiceIn }, nil } -func (o *Orchestrator) GetInstanceConnectionInfo(ctx context.Context, databaseID, instanceID string) (*database.ConnectionInfo, error) { +func (o *Orchestrator) GetInstanceConnectionInfo(ctx context.Context, + databaseID, instanceID string, + postgresPort, patroniPort *int, + pgEdgeVersion *ds.PgEdgeVersion, +) (*database.ConnectionInfo, error) { container, err := GetPostgresContainer(ctx, o.docker, instanceID) if err != nil { if errors.Is(err, ErrNoPostgresContainer) { @@ -700,10 +709,10 @@ func (o *Orchestrator) WorkerQueues() ([]workflow.Queue, error) { return queues, nil } -func (o *Orchestrator) CreatePgBackRestBackup(ctx context.Context, w io.Writer, instanceID string, options *pgbackrest.BackupOptions) error { +func (o *Orchestrator) CreatePgBackRestBackup(ctx context.Context, w io.Writer, spec *database.InstanceSpec, options *pgbackrest.BackupOptions) error { backupCmd := PgBackRestBackupCmd("backup", options.StringSlice()...) - err := PostgresContainerExec(ctx, w, o.docker, instanceID, backupCmd.StringSlice()) + err := PostgresContainerExec(ctx, w, o.docker, spec.InstanceID, backupCmd.StringSlice()) if err != nil { return fmt.Errorf("failed to exec backup command: %w", err) } diff --git a/server/internal/orchestrator/systemd/client.go b/server/internal/orchestrator/systemd/client.go new file mode 100644 index 00000000..50dc82de --- /dev/null +++ b/server/internal/orchestrator/systemd/client.go @@ -0,0 +1,291 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "os" + "syscall" + "time" + + "github.com/coreos/go-systemd/v22/dbus" + "github.com/rs/zerolog" + + "github.com/pgEdge/control-plane/server/internal/logging" +) + +const stopTimeout = 30 * time.Second + +var ErrUnitNotFound = errors.New("unit does not exist") + +type Client struct { + logger zerolog.Logger + conn *dbus.Conn +} + +func NewClient(loggerFactory *logging.Factory) *Client { + return &Client{ + logger: loggerFactory.Logger("systemd_client"), + } +} + +func (c *Client) Start(ctx context.Context) error { + c.logger.Debug().Msg("starting systemd client") + + conn, err := dbus.NewWithContext(ctx) + if err != nil { + return fmt.Errorf("failed to start dbus connection: %w", err) + } + + c.conn = conn + + return nil +} + +func (c *Client) Reload(ctx context.Context) error { + c.logger.Debug().Msg("reloading systemd") + + if err := c.conn.ReloadContext(ctx); err != nil { + return fmt.Errorf("failed to reload systemd: %w", err) + } + + c.logger.Debug().Msg("reloaded systemd") + + return nil +} + +func (c *Client) StartUnit(ctx context.Context, name string) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("starting unit") + + resCh := make(chan string, 1) + pid, err := c.conn.StartUnitContext(ctx, name, "replace", resCh) + if err != nil { + return fmt.Errorf("failed to start unit '%s': %w", name, err) + } + res, err := awaitJob(ctx, resCh) + if err != nil { + return fmt.Errorf("failed to start unit '%s': %w", name, err) + } + + c.logger.Debug(). + Str("response", res). + Int("pid", pid). + Msg("started unit") + + return nil +} + +func (c *Client) StopUnit(ctx context.Context, name string, wait bool) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("stopping unit") + + resCh := make(chan string, 1) + pid, err := c.conn.StopUnitContext(ctx, name, "replace", resCh) + if err != nil { + return fmt.Errorf("failed to stop unit '%s': %w", name, err) + } + res, err := awaitJob(ctx, resCh) + if err != nil { + return fmt.Errorf("failed to stop unit '%s': %w", name, err) + } + + c.logger.Debug(). + Str("response", res). + Int("pid", pid). + Msg("stopped unit") + + if wait && pid != 0 { + c.logger.Debug(). + Int("pid", pid). + Float64("timeout_seconds", stopTimeout.Seconds()). + Msg("waiting for main process to exit") + + if err := waitForPid(ctx, pid, stopTimeout); err != nil { + return fmt.Errorf("failed to wait for pid %d to exit: %w", pid, err) + } + } + + return nil +} + +func (c *Client) RestartUnit(ctx context.Context, name string) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("restarting unit") + + resCh := make(chan string, 1) + pid, err := c.conn.ReloadOrRestartUnitContext(ctx, name, "replace", resCh) + if err != nil { + return fmt.Errorf("failed to restart unit '%s': %w", name, err) + } + res, err := awaitJob(ctx, resCh) + if err != nil { + return fmt.Errorf("failed to restart unit '%s': %w", name, err) + } + + c.logger.Debug(). + Str("response", res). + Int("pid", pid). + Msg("restarted unit") + + return nil +} + +func (c *Client) EnableUnit(ctx context.Context, name string) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("enabling unit") + + _, res, err := c.conn.EnableUnitFilesContext(ctx, []string{name}, false, false) + if err != nil { + return fmt.Errorf("failed to enable unit '%s': %w", name, err) + } + + var change dbus.EnableUnitFileChange + if len(res) > 0 { + change = res[0] + } + + c.logger.Debug(). + Str("change.filename", change.Filename). + Str("change.destination", change.Destination). + Str("change.type", change.Type). + Msg("enabled unit") + + return nil +} + +func (c *Client) DisableUnit(ctx context.Context, path string) error { + logger := c.logger.With().Str("unit", path).Logger() + logger.Debug().Msg("disabling unit") + + res, err := c.conn.DisableUnitFilesContext(ctx, []string{path}, false) + if err != nil { + return fmt.Errorf("failed to disable unit '%s': %w", path, err) + } + + var change dbus.DisableUnitFileChange + if len(res) > 0 { + change = res[0] + } + + c.logger.Debug(). + Str("change.filename", change.Filename). + Str("change.destination", change.Destination). + Str("change.type", change.Type). + Msg("disabled unit") + + return nil +} + +func (c *Client) GetUnitFilePath(ctx context.Context, name string) (string, error) { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("getting unit file path") + + prop, err := c.conn.GetUnitPropertyContext(ctx, name, "FragmentPath") + if err != nil { + return "", fmt.Errorf("failed to get unit property: %w", err) + } + path := prop.Value.String() + + logger.Debug(). + Str("path", path). + Msg("got unit file path") + + return path, nil +} + +func (c *Client) UnitExists(ctx context.Context, name string) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("checking if unit exists") + + resp, err := c.conn.ListUnitsContext(ctx) + if err != nil { + return fmt.Errorf("failed to list units: %w", err) + } + + for _, unit := range resp { + if unit.Name == name { + return nil + } + } + + return ErrUnitNotFound +} + +func (c *Client) RemoveUnitFile(ctx context.Context, name string) error { + logger := c.logger.With().Str("unit", name).Logger() + logger.Debug().Msg("removing unit file") + + if err := c.UnitExists(ctx, name); err != nil { + return err + } + + path, err := c.GetUnitFilePath(ctx, name) + if err != nil { + return err + } + if path == "" { + return fmt.Errorf("got empty fragment path for unit '%s'", name) + } + + err = os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("failed to remove unit file '%s': %w", name, err) + } + + logger.Debug(). + Str("path", path). + Msg("removed unit file") + + return c.Reload(ctx) +} + +func (c *Client) Shutdown() error { + c.logger.Debug().Msg("stopping systemd client") + + if c.conn != nil { + c.conn.Close() + } + + return nil +} + +func awaitJob(ctx context.Context, resCh <-chan string) (string, error) { + select { + case res := <-resCh: + if res != "done" { + return res, fmt.Errorf("systemd job finished with status %q", res) + } + return res, nil + case <-ctx.Done(): + return "", ctx.Err() + } +} + +// waitForPid waits for the given PID to not exist using a method that works +// for non-child processes. +func waitForPid(ctx context.Context, pid int, timeout time.Duration) error { + // FindProcess will return + proc, err := os.FindProcess(pid) + if err != nil { + return err + } + + deadline := time.Now().Add(timeout) + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + err := proc.Signal(syscall.Signal(0)) + if err != nil { + return nil // process is gone + } + if time.Now().After(deadline) { + return fmt.Errorf("timed out waiting for pid %d to exit after %.2f seconds", pid, timeout.Seconds()) + } + } + } +} diff --git a/server/internal/orchestrator/systemd/dnf.go b/server/internal/orchestrator/systemd/dnf.go new file mode 100644 index 00000000..60ad1ba4 --- /dev/null +++ b/server/internal/orchestrator/systemd/dnf.go @@ -0,0 +1,136 @@ +package systemd + +import ( + "context" + "fmt" + "maps" + "os/exec" + "path/filepath" + "regexp" + "slices" + "strings" + "time" + + "github.com/pgEdge/control-plane/server/internal/ds" +) + +var _ PackageManager = (*Dnf)(nil) + +type Dnf struct{} + +func (d *Dnf) InstanceDataBaseDir(pgMajor string) string { + return filepath.Join("/var/lib/pgsql", pgMajor) +} + +func (d *Dnf) BinDir(pgMajor string) string { + return fmt.Sprintf("/usr/pgsql-%s/bin", pgMajor) +} + +func (d *Dnf) InstalledPostgresVersions(ctx context.Context) ([]*InstalledPostgres, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + args := append([]string{"list", "--installed"}, supportedDnfPackages()...) + cmd := exec.CommandContext(ctx, "dnf", args...) + + out, err := cmd.CombinedOutput() + if err != nil { + if strings.Contains(strings.ToLower(string(out)), "no matching packages to list") { + return nil, nil + } + return nil, fmt.Errorf("failed to execute command: %w, output: %s", err, string(out)) + } + + installed := map[string]*InstalledPostgres{} + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + + if len(fields) < 2 { + continue + } + + pkg, ver := fields[0], fields[1] + switch { + case strings.HasPrefix(pkg, "pgedge-postgresql"): + inst, err := InstalledPostgresPackage(pkg, ver) + if err != nil { + return nil, err + } + postgres, ok := installed[inst.PostgresMajor] + if !ok { + postgres = &InstalledPostgres{} + installed[inst.PostgresMajor] = postgres + } + postgres.Postgres = inst + case strings.HasPrefix(pkg, "pgedge-spock"): + inst, err := InstalledSpockPackage(pkg, ver) + if err != nil { + return nil, err + } + postgres, ok := installed[inst.PostgresMajor] + if !ok { + postgres = &InstalledPostgres{} + installed[inst.PostgresMajor] = postgres + } + postgres.Spock = append(postgres.Spock, inst) + } + } + + ret := slices.Collect(maps.Values(installed)) + for i := range ret { + slices.SortFunc(ret[i].Spock, PackageCmp) + } + slices.SortFunc(ret, InstalledPostgresCmp) + + return ret, nil +} + +var supportedPostgresVersions = []string{"16", "17", "18"} +var supportedSpockVersions = []string{"50"} + +func supportedDnfPackages() []string { + var packages []string + for _, postgres := range supportedPostgresVersions { + packages = append(packages, fmt.Sprintf("pgedge-postgresql%s", postgres)) + + for _, spock := range supportedSpockVersions { + packages = append(packages, fmt.Sprintf("pgedge-spock%s_%s", spock, postgres)) + } + } + + return packages +} + +var digits = regexp.MustCompile(`\d+`) + +func postgresVersionFromSpockPkg(pkg string) (string, error) { + // pkg should look like pgedge-spock50_18.aarch64, so we want to extract the + // second match. + matches := digits.FindAllString(pkg, 2) + if len(matches) < 2 { + return "", fmt.Errorf("unexpected format for spock package '%s'", pkg) + } + return matches[1], nil +} + +func postgresVersionFromPostgresPkg(pkg string) (string, error) { + // pkg should look like pgedge-postgresql18.aarch64, so we want to extract the + // first match. + matches := digits.FindAllString(pkg, 1) + if len(matches) == 0 { + return "", fmt.Errorf("unexpected format for postgres package '%s'", pkg) + } + return matches[0], nil +} + +var semverRegexp = regexp.MustCompile(`^\d+(\.\d+){0,2}`) + +func toVersion(ver string) (*ds.Version, error) { + // Extract the major.minor.patch segment without epoch and modifiers + v := semverRegexp.FindString(ver) + if v == "" { + return nil, fmt.Errorf("invalid version format '%s'", ver) + } + + return ds.ParseVersion(v) +} diff --git a/server/internal/orchestrator/systemd/orchestrator.go b/server/internal/orchestrator/systemd/orchestrator.go new file mode 100644 index 00000000..dbde56a9 --- /dev/null +++ b/server/internal/orchestrator/systemd/orchestrator.go @@ -0,0 +1,584 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "io" + "net" + "os/exec" + "path/filepath" + "runtime" + "slices" + "syscall" + + "github.com/cschleiden/go-workflows/workflow" + "github.com/elastic/gosigar" + "github.com/google/uuid" + "github.com/rs/zerolog" + + "github.com/pgEdge/control-plane/server/internal/config" + "github.com/pgEdge/control-plane/server/internal/database" + "github.com/pgEdge/control-plane/server/internal/ds" + "github.com/pgEdge/control-plane/server/internal/filesystem" + "github.com/pgEdge/control-plane/server/internal/host" + "github.com/pgEdge/control-plane/server/internal/logging" + "github.com/pgEdge/control-plane/server/internal/orchestrator/common" + "github.com/pgEdge/control-plane/server/internal/pgbackrest" + "github.com/pgEdge/control-plane/server/internal/postgres" + "github.com/pgEdge/control-plane/server/internal/resource" + "github.com/pgEdge/control-plane/server/internal/scheduler" + "github.com/pgEdge/control-plane/server/internal/utils" +) + +type Orchestrator struct { + cfg config.Config + logger zerolog.Logger + client *Client + packageManager PackageManager + cpus int + memBytes uint64 +} + +func NewOrchestrator( + cfg config.Config, + loggerFactory *logging.Factory, + client *Client, + packageManager PackageManager, +) (*Orchestrator, error) { + logger := loggerFactory.Logger("systemd_orchestrator") + logger.Debug().Msg("initializing orchestrator") + + mem := gosigar.Mem{} + if err := mem.Get(); err != nil { + return nil, fmt.Errorf("failed to inspect system memory: %w", err) + } + cpu := runtime.NumCPU() + + logger.Debug(). + Uint64("mem", mem.Total). + Int("cpu", cpu). + Msg("got system stats") + + return &Orchestrator{ + cfg: cfg, + logger: logger, + client: client, + packageManager: packageManager, + cpus: cpu, + memBytes: mem.Total, + }, nil +} + +func (o *Orchestrator) Start(ctx context.Context) error { + return o.client.Start(ctx) +} + +func (o *Orchestrator) PopulateHost(ctx context.Context, h *host.Host) error { + o.logger.Debug().Msg("querying installed versions") + + versions, err := o.packageManager.InstalledPostgresVersions(ctx) + if err != nil { + return fmt.Errorf("failed to get installed postgres versions: %w", err) + } + + o.logger.Debug(). + Int("version_count", len(versions)). + Msg("got installed versions") + + var supported []*ds.PgEdgeVersion + for _, installed := range versions { + if len(installed.Spock) == 0 { + o.logger.Debug(). + Str("postgres_name", installed.Postgres.Name). + Str("postgres_major", installed.Postgres.PostgresMajor). + Str("postgres_version", installed.Postgres.Version.String()). + Msg("missing spock for this postgres version") + // We need spock + continue + } + + o.logger.Debug(). + Str("postgres_name", installed.Postgres.Name). + Str("postgres_major", installed.Postgres.PostgresMajor). + Str("postgres_version", installed.Postgres.Version.String()). + Msg("postgres version") + + for _, spock := range installed.Spock { + o.logger.Debug(). + Str("spock_name", spock.Name). + Str("spock_postgres_major", spock.PostgresMajor). + Str("spock_version", spock.Version.String()). + Msg("spock version") + + version := &ds.PgEdgeVersion{ + PostgresVersion: installed.Postgres.Version, + SpockVersion: spock.Version.MajorVersion(), + } + supported = append(supported, version) + + o.logger.Debug(). + Str("version", version.String()). + Msg("pgedge version") + } + + } + if len(supported) == 0 { + return errors.New("pgedge postgres not installed") + } + slices.SortFunc(supported, func(a, b *ds.PgEdgeVersion) int { + // Sort in reverse order + return -a.Compare(b) + }) + + h.CPUs = int(o.cpus) + h.MemBytes = o.memBytes + h.DefaultPgEdgeVersion = supported[0] + h.SupportedPgEdgeVersions = supported + + return nil +} + +func (o *Orchestrator) PopulateHostStatus(ctx context.Context, h *host.HostStatus) error { + // TODO: are there any systemd-specific components to report here? + // We could use gosigar to query some system stats like mem or CPU usage + + return nil +} + +func (o *Orchestrator) GenerateInstanceResources(spec *database.InstanceSpec) (*database.InstanceResources, error) { + paths, err := o.instancePaths(spec.PgEdgeVersion.PostgresVersion, spec.InstanceID) + if err != nil { + return nil, err + } + + // directory resources + instanceDir := &filesystem.DirResource{ + ID: spec.InstanceID + "-instance", + HostID: spec.HostID, + Path: paths.Host.BaseDir, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + dataDir := &filesystem.DirResource{ + ID: spec.InstanceID + "-data", + HostID: spec.HostID, + ParentID: instanceDir.ID, + Path: "data", + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + configsDir := &filesystem.DirResource{ + ID: spec.InstanceID + "-configs", + HostID: spec.HostID, + ParentID: instanceDir.ID, + Path: "configs", + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + certificatesDir := &filesystem.DirResource{ + ID: spec.InstanceID + "-certificates", + HostID: spec.HostID, + ParentID: instanceDir.ID, + Path: "certificates", + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + + // patroni resources - used to clean up etcd on deletion + patroniCluster := &common.PatroniCluster{ + DatabaseID: spec.DatabaseID, + NodeName: spec.NodeName, + } + patroniMember := &common.PatroniMember{ + DatabaseID: spec.DatabaseID, + NodeName: spec.NodeName, + InstanceID: spec.InstanceID, + } + + // file resources + etcdCreds := &common.EtcdCreds{ + InstanceID: spec.InstanceID, + HostID: spec.HostID, + DatabaseID: spec.DatabaseID, + NodeName: spec.NodeName, + ParentID: certificatesDir.ID, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + postgresCerts := &common.PostgresCerts{ + InstanceID: spec.InstanceID, + HostID: spec.HostID, + ParentID: certificatesDir.ID, + InstanceAddresses: o.cfg.Addresses(), + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + } + + // These should be caught by `ValidateInstanceSpecs`, but just in case + patroniPort := utils.FromPointer(spec.PatroniPort) + if patroniPort == 0 { + return nil, fmt.Errorf("patroni_port is required for systemd instances, missing for instance '%s'", spec.InstanceID) + } + postgresPort := utils.FromPointer(spec.Port) + if postgresPort == 0 { + return nil, fmt.Errorf("port is required for systemd instances, missing for instance '%s'", spec.InstanceID) + } + + patroniConfig := &PatroniConfig{ + DatabaseID: spec.DatabaseID, + AllHostIDs: spec.AllHostIDs, + Base: &common.PatroniConfig{ + InstanceID: spec.InstanceID, + HostID: spec.HostID, + NodeName: spec.NodeName, + Generator: common.NewPatroniConfigGenerator(common.PatroniConfigGeneratorOptions{ + Instance: spec, + HostCPUs: float64(o.cpus), + HostMemoryBytes: o.memBytes, + PatroniPort: patroniPort, + PostgresPort: postgresPort, + OrchestratorParameters: map[string]any{ + "shared_preload_libraries": "pg_stat_statements,spock", + }, + FQDN: o.cfg.PeerAddress(), + Paths: paths, + }), + ParentID: configsDir.ID, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + }, + } + + pgMajor, ok := spec.PgEdgeVersion.PostgresVersion.MajorString() + if !ok { + return nil, errors.New("got empty postgres version") + } + + patroniUnit := &UnitResource{ + DatabaseID: spec.DatabaseID, + HostID: spec.HostID, + Name: patroniServiceName(spec.InstanceID), + Options: patroniUnitOptions(paths, o.packageManager.BinDir(pgMajor)), + ExtraDependencies: []resource.Identifier{ + patroniConfig.Identifier(), + instanceDir.Identifier(), + dataDir.Identifier(), + configsDir.Identifier(), + certificatesDir.Identifier(), + etcdCreds.Identifier(), + postgresCerts.Identifier(), + }, + } + + instance := &database.InstanceResource{ + Spec: spec, + InstanceHostname: o.cfg.PeerAddress(), + OrchestratorDependencies: []resource.Identifier{ + patroniUnit.Identifier(), + }, + } + + orchestratorResources := []resource.Resource{ + patroniCluster, + patroniMember, + instanceDir, + dataDir, + configsDir, + certificatesDir, + etcdCreds, + postgresCerts, + patroniConfig, + patroniUnit, + } + + dbDependencyResources := []resource.Resource{&common.PgServiceConf{ + ParentID: configsDir.ID, + HostID: spec.HostID, + InstanceID: spec.InstanceID, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + }} + + if spec.BackupConfig != nil { + orchestratorResources = append(orchestratorResources, + &common.PgBackRestConfig{ + InstanceID: spec.InstanceID, + HostID: spec.HostID, + DatabaseID: spec.DatabaseID, + NodeName: spec.NodeName, + Repositories: spec.BackupConfig.Repositories, + ParentID: configsDir.ID, + Type: common.PgBackRestConfigTypeBackup, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + Paths: paths, + Port: postgresPort, + }, + &common.PgBackRestStanza{ + DatabaseID: spec.DatabaseID, + NodeName: spec.NodeName, + Paths: paths, + }, + ) + for _, schedule := range spec.BackupConfig.Schedules { + orchestratorResources = append(orchestratorResources, scheduler.NewScheduledJobResource( + fmt.Sprintf("%s-%s-%s", schedule.ID, spec.DatabaseID, spec.NodeName), + schedule.CronExpression, + scheduler.WorkflowCreatePgBackRestBackup, + map[string]any{ + "database_id": spec.DatabaseID, + "node_name": spec.NodeName, + "type": pgbackrest.BackupType(schedule.Type).String(), + }, + []resource.Identifier{common.PgBackRestStanzaIdentifier(spec.NodeName)}, + )) + } + } + + if spec.RestoreConfig != nil { + orchestratorResources = append(orchestratorResources, &common.PgBackRestConfig{ + InstanceID: spec.InstanceID, + HostID: spec.HostID, + DatabaseID: spec.RestoreConfig.SourceDatabaseID, + NodeName: spec.RestoreConfig.SourceNodeName, + Repositories: []*pgbackrest.Repository{spec.RestoreConfig.Repository}, + ParentID: configsDir.ID, + Type: common.PgBackRestConfigTypeRestore, + OwnerUID: o.cfg.DatabaseOwnerUID, + OwnerGID: o.cfg.DatabaseOwnerUID, + Paths: paths, + Port: postgresPort, + }) + } + + return database.NewInstanceResources(instance, orchestratorResources, dbDependencyResources) +} + +func (o *Orchestrator) GenerateServiceInstanceResources(spec *database.ServiceInstanceSpec) (*database.ServiceInstanceResources, error) { + return nil, errors.New("unimplemented") +} + +func (o *Orchestrator) GenerateInstanceRestoreResources(spec *database.InstanceSpec, taskID uuid.UUID) (*database.InstanceResources, error) { + if spec.RestoreConfig == nil { + return nil, fmt.Errorf("missing restore config for node %s instance %s", spec.NodeName, spec.InstanceID) + } + paths, err := o.instancePaths(spec.PgEdgeVersion.PostgresVersion, spec.InstanceID) + if err != nil { + return nil, err + } + + restoreSpec := *spec + restoreSpec.InPlaceRestore = true + + instance, err := o.GenerateInstanceResources(&restoreSpec) + if err != nil { + return nil, err + } + + err = instance.AddResources(&PgBackRestRestore{ + DatabaseID: spec.DatabaseID, + HostID: spec.HostID, + InstanceID: spec.InstanceID, + TaskID: taskID, + NodeName: spec.NodeName, + RestoreOptions: spec.RestoreConfig.RestoreOptions, + Paths: paths, + }) + if err != nil { + return nil, fmt.Errorf("failed to add restore resource to instance resources: %w", err) + } + + return instance, nil +} + +func (o *Orchestrator) GetInstanceConnectionInfo(ctx context.Context, + databaseID, instanceID string, + postgresPort, patroniPort *int, + pgEdgeVersion *ds.PgEdgeVersion, +) (*database.ConnectionInfo, error) { + if postgresPort == nil { + return nil, fmt.Errorf("postgres port is not yet recorded for this instance") + } + if patroniPort == nil { + return nil, fmt.Errorf("patroni port is not yet recorded for this instance") + } + if pgEdgeVersion == nil { + return nil, fmt.Errorf("postgres version is not yet recorded for this instance") + } + + paths, err := o.instancePaths(pgEdgeVersion.PostgresVersion, instanceID) + if err != nil { + return nil, err + } + + postgresPortInt := utils.FromPointer(postgresPort) + patroniPortInt := utils.FromPointer(patroniPort) + + return &database.ConnectionInfo{ + AdminHost: "localhost", + AdminPort: postgresPortInt, + PeerHost: o.cfg.PeerAddress(), + PeerPort: postgresPortInt, + PeerSSLCert: paths.Instance.PostgresSuperuserCert(), + PeerSSLKey: paths.Instance.PostgresSuperuserKey(), + PeerSSLRootCert: paths.Instance.PostgresCaCert(), + PatroniPort: patroniPortInt, + ClientAddresses: o.cfg.ClientAddresses, + ClientPort: postgresPortInt, + InstanceHostname: o.cfg.PeerAddress(), + }, nil +} + +func (o *Orchestrator) GetServiceInstanceStatus(ctx context.Context, serviceInstanceID string) (*database.ServiceInstanceStatus, error) { + return nil, errors.New("unimplemented") +} + +func (o *Orchestrator) ExecuteInstanceCommand(ctx context.Context, w io.Writer, databaseID, instanceID string, args ...string) error { + if len(args) == 0 { + return errors.New("got empty args") + } + cmd := exec.CommandContext(ctx, args[0], args[1:]...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: uint32(o.cfg.DatabaseOwnerUID), + Gid: uint32(o.cfg.DatabaseOwnerUID), + }, + } + cmd.Stdout = w + cmd.Stderr = w + if err := cmd.Run(); err != nil { + return fmt.Errorf("instance '%s' command '%s' failed: %w", instanceID, args[0], err) + } + return nil +} + +func (o *Orchestrator) CreatePgBackRestBackup(ctx context.Context, w io.Writer, spec *database.InstanceSpec, options *pgbackrest.BackupOptions) error { + paths, err := o.instancePaths(spec.PgEdgeVersion.PostgresVersion, spec.InstanceID) + if err != nil { + return err + } + + cmd := paths.PgBackRestBackupCmd("backup", options.StringSlice()...) + return o.ExecuteInstanceCommand(ctx, w, spec.DatabaseID, spec.InstanceID, cmd.StringSlice()...) +} + +func (o *Orchestrator) ValidateInstanceSpecs(_ context.Context, changes []*database.InstanceSpecChange) ([]*database.ValidationResult, error) { + // TODO: validate posix backup and restore repository directories + results := make([]*database.ValidationResult, 0) + + for _, ch := range changes { + result := &database.ValidationResult{ + Valid: true, + NodeName: ch.Current.NodeName, + HostID: ch.Current.HostID, + } + var prevPort *int + var prevPatroniPort *int + if ch.Previous != nil { + prevPort = ch.Previous.Port + prevPatroniPort = ch.Previous.PatroniPort + } + if err := validatePort(prevPort, ch.Current.Port); err != nil { + result.Valid = false + result.Errors = append(result.Errors, fmt.Sprintf("postgres port: %v", err)) + } + if err := validatePort(prevPatroniPort, ch.Current.PatroniPort); err != nil { + result.Valid = false + result.Errors = append(result.Errors, fmt.Sprintf("patroni port: %v", err)) + } + + results = append(results, result) + } + + return results, nil +} + +func validatePort(previous, current *int) error { + if current == nil { + return errors.New("port must be defined") + } + if *current == 0 { + // When port is 0, we'll allocate a free port at deploy time + return nil + } + if *current > 65535 { + return fmt.Errorf("port %d is out of range", *current) + } + if ptrEqual(previous, current) { + return nil + } + return checkPortAvailable(*current) +} + +func checkPortAvailable(port int) error { + l, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + return fmt.Errorf("cannot bind port %d: %w", port, err) + } + defer l.Close() + return nil +} + +func ptrEqual[T comparable](a, b *T) bool { + if a == nil && b == nil { + return true + } + if a == nil || b == nil { + return false + } + return *a == *b +} + +func (o *Orchestrator) StopInstance(ctx context.Context, instanceID string) error { + if err := o.client.StopUnit(ctx, patroniServiceName(instanceID), true); err != nil { + return fmt.Errorf("failed to stop patroni unit: %w", err) + } + return nil +} + +func (o *Orchestrator) StartInstance(ctx context.Context, instanceID string) error { + if err := o.client.StartUnit(ctx, patroniServiceName(instanceID)); err != nil { + return fmt.Errorf("failed to start patroni unit: %w", err) + } + return nil +} + +func (o *Orchestrator) WorkerQueues() ([]workflow.Queue, error) { + return []workflow.Queue{ + utils.AnyQueue(), + utils.HostQueue(o.cfg.HostID), + }, nil +} + +func (o *Orchestrator) NodeDSN(ctx context.Context, rc *resource.Context, nodeName string, fromInstanceID string, dbName string) (*postgres.DSN, error) { + return &postgres.DSN{ + Service: nodeName, + DBName: dbName, + }, nil +} + +func (o *Orchestrator) instancePaths(pgVersion *ds.Version, instanceID string) (common.InstancePaths, error) { + pgMajor, ok := pgVersion.MajorString() + if !ok { + return common.InstancePaths{}, errors.New("got empty postgres version") + } + + var baseDir string + if o.cfg.SystemD.InstanceDataDir != "" { + baseDir = filepath.Join(o.cfg.SystemD.InstanceDataDir, pgMajor, instanceID) + } else { + baseDir = filepath.Join(o.packageManager.InstanceDataBaseDir(pgMajor), instanceID) + } + + return common.InstancePaths{ + Instance: common.Paths{BaseDir: baseDir}, + Host: common.Paths{BaseDir: baseDir}, + PgBackRestPath: o.cfg.SystemD.PgBackRestPath, + PatroniPath: o.cfg.SystemD.PatroniPath, + }, nil +} + +func patroniServiceName(instanceID string) string { + return fmt.Sprintf("patroni-%s.service", instanceID) +} diff --git a/server/internal/orchestrator/systemd/packages.go b/server/internal/orchestrator/systemd/packages.go new file mode 100644 index 00000000..87362d74 --- /dev/null +++ b/server/internal/orchestrator/systemd/packages.go @@ -0,0 +1,64 @@ +package systemd + +import ( + "context" + + "github.com/pgEdge/control-plane/server/internal/ds" +) + +type InstalledPackage struct { + PostgresMajor string + Version *ds.Version + Name string +} + +func InstalledPostgresPackage(pkg, ver string) (*InstalledPackage, error) { + version, err := toVersion(ver) + if err != nil { + return nil, err + } + pgMajor, err := postgresVersionFromPostgresPkg(pkg) + if err != nil { + return nil, err + } + return &InstalledPackage{ + PostgresMajor: pgMajor, + Version: version, + Name: pkg, + }, nil +} + +func InstalledSpockPackage(pkg, ver string) (*InstalledPackage, error) { + version, err := toVersion(ver) + if err != nil { + return nil, err + } + pgMajor, err := postgresVersionFromSpockPkg(pkg) + if err != nil { + return nil, err + } + return &InstalledPackage{ + PostgresMajor: pgMajor, + Version: version, + Name: pkg, + }, nil +} + +func PackageCmp(a, b *InstalledPackage) int { + return a.Version.Compare(b.Version) +} + +type InstalledPostgres struct { + Postgres *InstalledPackage + Spock []*InstalledPackage +} + +func InstalledPostgresCmp(a, b *InstalledPostgres) int { + return a.Postgres.Version.Compare(b.Postgres.Version) +} + +type PackageManager interface { + InstalledPostgresVersions(ctx context.Context) ([]*InstalledPostgres, error) + InstanceDataBaseDir(pgMajor string) string + BinDir(pgMajor string) string +} diff --git a/server/internal/orchestrator/systemd/patroni_config.go b/server/internal/orchestrator/systemd/patroni_config.go new file mode 100644 index 00000000..b06b73fb --- /dev/null +++ b/server/internal/orchestrator/systemd/patroni_config.go @@ -0,0 +1,87 @@ +package systemd + +import ( + "context" + "fmt" + "strings" + + "github.com/pgEdge/control-plane/server/internal/ds" + "github.com/pgEdge/control-plane/server/internal/host" + "github.com/pgEdge/control-plane/server/internal/orchestrator/common" + "github.com/pgEdge/control-plane/server/internal/resource" + "github.com/samber/do" +) + +var _ resource.Resource = (*PatroniConfig)(nil) + +const ResourceTypePatroniConfig resource.Type = "systemd.patroni_config" + +func PatroniConfigIdentifier(instanceID string) resource.Identifier { + return resource.Identifier{ + ID: instanceID, + Type: ResourceTypePatroniConfig, + } +} + +type PatroniConfig struct { + DatabaseID string `json:"database_id"` + Base *common.PatroniConfig `json:"base"` + AllHostIDs []string `json:"all_host_ids"` +} + +func (c *PatroniConfig) ResourceVersion() string { + return "1" +} + +func (c *PatroniConfig) DiffIgnore() []string { + return nil +} + +func (c *PatroniConfig) Executor() resource.Executor { + return resource.HostExecutor(c.Base.HostID) +} + +func (c *PatroniConfig) Identifier() resource.Identifier { + return PatroniConfigIdentifier(c.Base.InstanceID) +} + +func (c *PatroniConfig) Dependencies() []resource.Identifier { + return c.Base.Dependencies() +} + +func (c *PatroniConfig) TypeDependencies() []resource.Type { + return nil +} + +func (c *PatroniConfig) Refresh(ctx context.Context, rc *resource.Context) error { + return c.Base.Refresh(ctx, rc) +} + +func (c *PatroniConfig) Create(ctx context.Context, rc *resource.Context) error { + hostSvc, err := do.Invoke[*host.Service](rc.Injector) + if err != nil { + return err + } + hosts, err := hostSvc.GetHosts(ctx, c.AllHostIDs) + if err != nil { + return fmt.Errorf("failed to get hosts: %w", err) + } + if len(hosts) != len(c.AllHostIDs) { + return fmt.Errorf("wrong number of hosts: expected %d, got %d", len(c.AllHostIDs), len(hosts)) + } + + addresses := ds.NewSet[string]() + for _, h := range hosts { + addresses.Add(h.PeerAddresses...) + } + + return c.Base.Create(ctx, rc, addresses.ToSortedSlice(strings.Compare), nil) +} + +func (c *PatroniConfig) Update(ctx context.Context, rc *resource.Context) error { + return c.Create(ctx, rc) +} + +func (c *PatroniConfig) Delete(ctx context.Context, rc *resource.Context) error { + return c.Base.Delete(ctx, rc) +} diff --git a/server/internal/orchestrator/systemd/patroni_unit.go b/server/internal/orchestrator/systemd/patroni_unit.go new file mode 100644 index 00000000..878cea45 --- /dev/null +++ b/server/internal/orchestrator/systemd/patroni_unit.go @@ -0,0 +1,76 @@ +package systemd + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/coreos/go-systemd/v22/unit" + + "github.com/pgEdge/control-plane/server/internal/orchestrator/common" +) + +func patroniUnitOptions(paths common.InstancePaths, pgBinPath string) []*unit.UnitOption { + pathEnv := "PATH=" + pgBinPath + if p := os.Getenv("PATH"); p != "" { + pathEnv += ":" + p + } + + return []*unit.UnitOption{ + { + Section: "Unit", + Name: "After", + Value: "syslog.target network.target", + }, + { + Section: "Service", + Name: "Type", + Value: "simple", + }, + { + Section: "Service", + Name: "User", + Value: "postgres", + }, + { + Section: "Service", + Name: "ExecStart", + Value: fmt.Sprintf("%s %s", paths.PatroniPath, paths.Instance.PatroniConfig()), + }, + { + Section: "Service", + Name: "ExecReload", + Value: "/bin/kill -s HUP $MAINPID", + }, + { + Section: "Service", + Name: "KillMode", + Value: "process", + }, + { + Section: "Service", + Name: "TimeoutSec", + Value: "30", + }, + { + Section: "Service", + Name: "Restart", + Value: "on-failure", + }, + { + Section: "Service", + Name: "Environment", + Value: pathEnv, + }, + { + Section: "Service", + Name: "Environment", + Value: "PGSERVICEFILE=" + filepath.Join(paths.Instance.Configs(), "pg_service.conf"), + }, + { + Section: "Install", + Name: "WantedBy", + Value: "multi-user.target", + }, + } +} diff --git a/server/internal/orchestrator/systemd/pgbackrest_restore.go b/server/internal/orchestrator/systemd/pgbackrest_restore.go new file mode 100644 index 00000000..01a16537 --- /dev/null +++ b/server/internal/orchestrator/systemd/pgbackrest_restore.go @@ -0,0 +1,238 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "path/filepath" + + "github.com/google/uuid" + "github.com/rs/zerolog" + "github.com/samber/do" + "github.com/spf13/afero" + + "github.com/pgEdge/control-plane/server/internal/database" + "github.com/pgEdge/control-plane/server/internal/orchestrator/common" + "github.com/pgEdge/control-plane/server/internal/resource" + "github.com/pgEdge/control-plane/server/internal/task" + "github.com/pgEdge/control-plane/server/internal/utils" +) + +var _ resource.Resource = (*PgBackRestRestore)(nil) + +const ResourceTypePgBackRestRestore resource.Type = "systemd.pgbackrest_restore" + +func PgBackRestRestoreResourceIdentifier(instanceID string) resource.Identifier { + return resource.Identifier{ + ID: instanceID, + Type: ResourceTypePgBackRestRestore, + } +} + +type PgBackRestRestore struct { + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + InstanceID string `json:"instance_id"` + TaskID uuid.UUID `json:"task_id"` + NodeName string `json:"node_name"` + Paths common.InstancePaths `json:"paths"` + RestoreOptions map[string]string `json:"restore_options"` +} + +func (p *PgBackRestRestore) ResourceVersion() string { + return "1" +} + +func (p *PgBackRestRestore) DiffIgnore() []string { + return nil +} + +func (p *PgBackRestRestore) Executor() resource.Executor { + return resource.HostExecutor(p.HostID) +} + +func (p *PgBackRestRestore) Identifier() resource.Identifier { + return PgBackRestRestoreResourceIdentifier(p.InstanceID) +} + +func (p *PgBackRestRestore) Dependencies() []resource.Identifier { + return []resource.Identifier{ + common.PgBackRestConfigIdentifier(p.InstanceID, common.PgBackRestConfigTypeRestore), + common.PatroniClusterResourceIdentifier(p.NodeName), + UnitResourceIdentifier(patroniServiceName(p.InstanceID), p.DatabaseID, p.HostID), + } +} + +func (p *PgBackRestRestore) TypeDependencies() []resource.Type { + return nil +} + +func (p *PgBackRestRestore) Refresh(ctx context.Context, rc *resource.Context) error { + return nil +} + +func (p *PgBackRestRestore) Create(ctx context.Context, rc *resource.Context) error { + orch, err := do.Invoke[database.Orchestrator](rc.Injector) + if err != nil { + return err + } + taskSvc, err := do.Invoke[*task.Service](rc.Injector) + if err != nil { + return err + } + logger, err := do.Invoke[zerolog.Logger](rc.Injector) + if err != nil { + return err + } + fs, err := do.Invoke[afero.Fs](rc.Injector) + if err != nil { + return err + } + + t, err := p.startTask(ctx, taskSvc) + if err != nil { + return err + } + + handleError := func(cause error) error { + p.failTask(logger, taskSvc, t, cause) + return cause + } + + err = p.stopPostgres(ctx, rc, orch, fs) + if err != nil { + return handleError(err) + } + + err = p.runRestoreCmd(ctx, orch, logger, taskSvc) + if err != nil { + return handleError(err) + } + + err = p.renameDataDir(fs) + if err != nil { + return handleError(err) + } + + err = orch.StartInstance(ctx, p.InstanceID) + if err != nil { + return handleError(fmt.Errorf("failed to start patroni after restore: %w", err)) + } + + err = p.completeTask(ctx, taskSvc, t) + if err != nil { + return handleError(err) + } + + return nil +} + +func (p *PgBackRestRestore) startTask(ctx context.Context, taskSvc *task.Service) (*task.Task, error) { + t, err := taskSvc.GetTask(ctx, task.ScopeDatabase, p.DatabaseID, p.TaskID) + if err != nil { + return nil, fmt.Errorf("failed to get task %s: %w", p.TaskID, err) + } + t.Status = task.StatusRunning + t.InstanceID = p.InstanceID + t.HostID = p.HostID + if err := taskSvc.UpdateTask(ctx, t); err != nil { + return nil, fmt.Errorf("failed to update task to running: %w", err) + } + + return t, err +} + +func (p *PgBackRestRestore) failTask( + logger zerolog.Logger, + taskSvc *task.Service, + t *task.Task, + cause error, +) { + t.SetFailed(cause) + if err := taskSvc.UpdateTask(context.Background(), t); err != nil { + logger.Err(err). + Stringer("task_id", p.TaskID). + Msg("failed to update task to failed") + } +} + +func (p *PgBackRestRestore) completeTask( + ctx context.Context, + taskSvc *task.Service, + t *task.Task, +) error { + t.SetCompleted() + if err := taskSvc.UpdateTask(ctx, t); err != nil { + return fmt.Errorf("failed to update task to completed: %w", err) + } + + return nil +} + +func (p *PgBackRestRestore) stopPostgres( + ctx context.Context, + rc *resource.Context, + orch database.Orchestrator, + fs afero.Fs, +) error { + patroniCluster, err := resource.FromContext[*common.PatroniCluster](rc, common.PatroniClusterResourceIdentifier(p.NodeName)) + if err != nil { + return fmt.Errorf("failed to get patroni cluster resource from state: %w", err) + } + + if err := orch.StopInstance(ctx, p.InstanceID); err != nil { + return fmt.Errorf("failed to stop patroni: %w", err) + } + + // This resource exists to make it easy to remove the patroni namespace. + // The namespace will automatically get recreated when Patroni starts up + // again. + if err := patroniCluster.Delete(ctx, rc); err != nil { + return fmt.Errorf("failed to delete patroni cluster: %w", err) + } + + // Remove the postmaster.pid file if it exists. This can happen if there was + // an improper shutdown. We know that Postgres is not running because we + // stopped the unit above. + err = fs.Remove(filepath.Join(p.Paths.Instance.PgData(), "postmaster.pid")) + if err != nil && !errors.Is(err, afero.ErrFileNotFound) { + return fmt.Errorf("failed to remove postmaster.pid file: %w", err) + } + + return nil +} + +func (p *PgBackRestRestore) runRestoreCmd( + ctx context.Context, + orch database.Orchestrator, + logger zerolog.Logger, + taskSvc *task.Service, +) error { + restoreOptions := utils.BuildOptionArgs(p.RestoreOptions) + opts := append([]string{"--log-timestamp=n"}, restoreOptions...) + cmd := p.Paths.PgBackRestRestoreCmd("restore", opts...).StringSlice() + taskLogger := task.NewTaskLogWriter(ctx, taskSvc, task.ScopeDatabase, p.DatabaseID, p.TaskID) + + err := orch.ExecuteInstanceCommand(ctx, taskLogger, p.DatabaseID, p.InstanceID, cmd...) + if err != nil { + return fmt.Errorf("failed to execute pgbackrest restore command: %w", err) + } + + return nil +} + +func (p *PgBackRestRestore) renameDataDir(fs afero.Fs) error { + if err := fs.Rename(p.Paths.Instance.PgData(), p.Paths.Instance.PgDataRestore()); err != nil { + return fmt.Errorf("failed to rename pgdata for restore: %w", err) + } + + return nil +} + +func (p *PgBackRestRestore) Update(ctx context.Context, rc *resource.Context) error { + return nil +} + +func (p *PgBackRestRestore) Delete(ctx context.Context, rc *resource.Context) error { + return nil +} diff --git a/server/internal/orchestrator/systemd/provide.go b/server/internal/orchestrator/systemd/provide.go new file mode 100644 index 00000000..d394bbe7 --- /dev/null +++ b/server/internal/orchestrator/systemd/provide.go @@ -0,0 +1,55 @@ +package systemd + +import ( + "github.com/pgEdge/control-plane/server/internal/config" + "github.com/pgEdge/control-plane/server/internal/logging" + "github.com/samber/do" +) + +func Provide(i *do.Injector) { + provideClient(i) + providePackageManager(i) + provideOrchestrator(i) +} + +func provideClient(i *do.Injector) { + do.Provide(i, func(i *do.Injector) (*Client, error) { + loggerFactory, err := do.Invoke[*logging.Factory](i) + if err != nil { + return nil, err + } + + return NewClient(loggerFactory), nil + }) +} + +func providePackageManager(i *do.Injector) { + do.Provide(i, func(i *do.Injector) (PackageManager, error) { + // TODO: add a function to check whether OS is RHEL-like or debian-like + // and return the appropriate package manager implementation. + return &Dnf{}, nil + }) +} + +func provideOrchestrator(i *do.Injector) { + do.Provide(i, func(i *do.Injector) (*Orchestrator, error) { + cfg, err := do.Invoke[config.Config](i) + if err != nil { + return nil, err + } + loggerFactory, err := do.Invoke[*logging.Factory](i) + if err != nil { + return nil, err + } + client, err := do.Invoke[*Client](i) + if err != nil { + return nil, err + } + packageManager, err := do.Invoke[PackageManager](i) + if err != nil { + return nil, err + } + + return NewOrchestrator(cfg, loggerFactory, client, packageManager) + }) +} diff --git a/server/internal/orchestrator/systemd/resources.go b/server/internal/orchestrator/systemd/resources.go new file mode 100644 index 00000000..faec35c5 --- /dev/null +++ b/server/internal/orchestrator/systemd/resources.go @@ -0,0 +1,9 @@ +package systemd + +import "github.com/pgEdge/control-plane/server/internal/resource" + +func RegisterResourceTypes(registry *resource.Registry) { + resource.RegisterResourceType[*PatroniConfig](registry, ResourceTypePatroniConfig) + resource.RegisterResourceType[*UnitResource](registry, ResourceTypeUnit) + resource.RegisterResourceType[*PgBackRestRestore](registry, ResourceTypePgBackRestRestore) +} diff --git a/server/internal/orchestrator/systemd/unit.go b/server/internal/orchestrator/systemd/unit.go new file mode 100644 index 00000000..134f0988 --- /dev/null +++ b/server/internal/orchestrator/systemd/unit.go @@ -0,0 +1,149 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/coreos/go-systemd/v22/unit" + "github.com/samber/do" + + "github.com/pgEdge/control-plane/server/internal/resource" +) + +const unitsDir = "/etc/systemd/system" + +var _ resource.Resource = (*UnitResource)(nil) + +const ResourceTypeUnit resource.Type = "systemd.unit" + +func UnitResourceIdentifier(name, databaseID, hostID string) resource.Identifier { + return resource.Identifier{ + ID: name + ":" + databaseID + ":" + hostID, + Type: ResourceTypeUnit, + } +} + +type UnitResource struct { + DatabaseID string `json:"database_id"` + HostID string `json:"host_id"` + Name string `json:"name"` + Options []*unit.UnitOption `json:"options"` + ExtraDependencies []resource.Identifier `json:"extra_dependencies"` +} + +func (r *UnitResource) Executor() resource.Executor { + return resource.HostExecutor(r.HostID) +} + +func (r *UnitResource) Identifier() resource.Identifier { + return UnitResourceIdentifier(r.Name, r.DatabaseID, r.HostID) +} + +func (r *UnitResource) Dependencies() []resource.Identifier { + return r.ExtraDependencies +} + +func (r *UnitResource) TypeDependencies() []resource.Type { + return nil +} + +func (r *UnitResource) Refresh(ctx context.Context, rc *resource.Context) error { + path := filepath.Join(unitsDir, r.Name) + f, err := os.Open(path) + if errors.Is(err, os.ErrNotExist) { + return resource.ErrNotFound + } else if err != nil { + return fmt.Errorf("failed to open unit file '%s': %w", path, err) + } + defer f.Close() + + options, err := unit.Deserialize(f) + if err != nil { + return fmt.Errorf("failed to deserialize unit file '%s': %w", path, err) + } + + r.Options = options + + return nil +} + +func (r *UnitResource) Create(ctx context.Context, rc *resource.Context) error { + client, err := do.Invoke[*Client](rc.Injector) + if err != nil { + return err + } + + path := filepath.Join(unitsDir, r.Name) + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return fmt.Errorf("failed to open unit file for writing '%s': %w", path, err) + } + defer f.Close() + + _, err = io.Copy(f, unit.Serialize(r.Options)) + if err != nil { + return fmt.Errorf("failed to write unit file '%s': %w", path, err) + } + + if err := client.Reload(ctx); err != nil { + return fmt.Errorf("failed to reload: %w", err) + } + if err := client.EnableUnit(ctx, r.Name); err != nil { + return fmt.Errorf("failed to enable unit '%s': %w", path, err) + } + if err := client.RestartUnit(ctx, r.Name); err != nil { + return fmt.Errorf("failed to restart unit '%s': %w", path, err) + } + + return nil +} + +func (r *UnitResource) Update(ctx context.Context, rc *resource.Context) error { + return r.Create(ctx, rc) +} + +func (r *UnitResource) Delete(ctx context.Context, rc *resource.Context) error { + client, err := do.Invoke[*Client](rc.Injector) + if err != nil { + return err + } + + err = client.UnitExists(ctx, r.Name) + switch { + case errors.Is(err, ErrUnitNotFound): + // No need to remove the unit if it doesn't exist + case err != nil: + return fmt.Errorf("failed to check if unit exists: %w", err) + default: + if err := client.StopUnit(ctx, r.Name, true); err != nil { + return fmt.Errorf("failed to stop unit: %w", err) + } + if err := client.DisableUnit(ctx, r.Name); err != nil { + return fmt.Errorf("failed to disable unit: %w", err) + } + } + + path := filepath.Join(unitsDir, r.Name) + err = os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("failed to remove unit file '%s': %w", path, err) + } + + if err := client.Reload(ctx); err != nil { + return fmt.Errorf("failed to reload: %w", err) + } + + return nil +} + +func (r *UnitResource) DiffIgnore() []string { + return nil +} + +func (r *UnitResource) ResourceVersion() string { + return "1" +} diff --git a/server/internal/postgres/hba/entry.go b/server/internal/postgres/hba/entry.go index 8b5f3791..2ca9da09 100644 --- a/server/internal/postgres/hba/entry.go +++ b/server/internal/postgres/hba/entry.go @@ -2,6 +2,7 @@ package hba import ( "fmt" + "net" ) type EntryType string @@ -59,7 +60,11 @@ func (e Entry) String() string { case EntryTypeInclude, EntryTypeIncludeIfExists, EntryTypeIncludeDir: return fmt.Sprintf("%-17s %s", e.Type, e.IncludePath) default: - entry = fmt.Sprintf("%-7s %-15s %-15s %-23s", e.Type, e.Database, e.User, e.Address) + address := e.Address + if e.Mask == "" { + address = transformAddress(address) + } + entry = fmt.Sprintf("%-7s %-15s %-15s %-23s", e.Type, e.Database, e.User, address) } if e.Mask != "" { entry += fmt.Sprintf(" %-23s", e.Mask) @@ -71,3 +76,22 @@ func (e Entry) String() string { } return entry } + +func transformAddress(address string) string { + ip := net.ParseIP(address) + if ip == nil { + return address + } + + // Bare IP addresses are invalid. They need to be accompanied by a range. + var cidr *net.IPNet + if ipv4 := ip.To4(); ipv4 != nil { + cidr = &net.IPNet{IP: ipv4, Mask: net.CIDRMask(32, 32)} + } else if ipv6 := ip.To16(); ipv6 != nil { + cidr = &net.IPNet{IP: ipv6, Mask: net.CIDRMask(128, 128)} + } else { + return "" + } + + return cidr.String() +} diff --git a/server/internal/workflows/activities/create_pgbackrest_backup.go b/server/internal/workflows/activities/create_pgbackrest_backup.go index 6fcb424c..de8d9591 100644 --- a/server/internal/workflows/activities/create_pgbackrest_backup.go +++ b/server/internal/workflows/activities/create_pgbackrest_backup.go @@ -42,25 +42,17 @@ func (a *Activities) CreatePgBackRestBackup(ctx context.Context, input *CreatePg logger := activity.Logger(ctx).With("instance_id", input.InstanceID) logger.Info("running pgbackrest backup") - orch, err := do.Invoke[database.Orchestrator](a.Injector) - if err != nil { - return nil, err - } taskSvc, err := do.Invoke[*task.Service](a.Injector) if err != nil { return nil, err } - dbSvc, err := do.Invoke[*database.Service](a.Injector) - if err != nil { - return nil, err - } - originalState, err := dbSvc.GetStoredInstanceState(ctx, input.DatabaseID, input.InstanceID) + originalState, err := a.DatabaseService.GetStoredInstanceState(ctx, input.DatabaseID, input.InstanceID) if err != nil { return nil, fmt.Errorf("failed to get current instance state: %w", err) } - err = dbSvc.UpdateInstance(ctx, &database.InstanceUpdateOptions{ + err = a.DatabaseService.UpdateInstanceState(ctx, &database.InstanceStateUpdateOptions{ InstanceID: input.InstanceID, DatabaseID: input.DatabaseID, State: database.InstanceStateBackingUp, @@ -72,7 +64,7 @@ func (a *Activities) CreatePgBackRestBackup(ctx context.Context, input *CreatePg defer func() { // Backing up the database doesn't affect availability, so we always set // the instance back to its original state. - err = dbSvc.UpdateInstance(ctx, &database.InstanceUpdateOptions{ + err = a.DatabaseService.UpdateInstanceState(ctx, &database.InstanceStateUpdateOptions{ InstanceID: input.InstanceID, DatabaseID: input.DatabaseID, State: originalState, @@ -85,7 +77,7 @@ func (a *Activities) CreatePgBackRestBackup(ctx context.Context, input *CreatePg taskLogWriter := task.NewTaskLogWriter(ctx, taskSvc, task.ScopeDatabase, input.DatabaseID, input.TaskID) defer taskLogWriter.Close() - err = orch.CreatePgBackRestBackup(ctx, taskLogWriter, input.InstanceID, input.BackupOptions) + err = a.DatabaseService.CreatePgBackRestBackup(ctx, taskLogWriter, input.DatabaseID, input.InstanceID, input.BackupOptions) if err != nil { return nil, fmt.Errorf("failed to create pgBackRest backup: %w", err) } diff --git a/server/internal/workflows/activities/update_db_state.go b/server/internal/workflows/activities/update_db_state.go index c4aae2c5..1bfb805e 100644 --- a/server/internal/workflows/activities/update_db_state.go +++ b/server/internal/workflows/activities/update_db_state.go @@ -73,11 +73,9 @@ func (a *Activities) handleDatabaseFailed(ctx context.Context, databaseID string now := time.Now() for _, instance := range instances { if instance.State.IsInProgress() { - err := a.DatabaseService.UpdateInstance(ctx, &database.InstanceUpdateOptions{ + err := a.DatabaseService.UpdateInstanceState(ctx, &database.InstanceStateUpdateOptions{ InstanceID: instance.InstanceID, DatabaseID: instance.DatabaseID, - HostID: instance.HostID, - NodeName: instance.NodeName, State: database.InstanceStateFailed, Now: now, }) diff --git a/server/internal/workflows/activities/update_planned_instance_states.go b/server/internal/workflows/activities/update_planned_instance_states.go index a1b00626..06a69914 100644 --- a/server/internal/workflows/activities/update_planned_instance_states.go +++ b/server/internal/workflows/activities/update_planned_instance_states.go @@ -55,7 +55,7 @@ func (a *Activities) UpdatePlannedInstanceStates(ctx context.Context, input *Upd if err != nil { return nil, fmt.Errorf("failed to deserialize instance resource: %w", err) } - update := &database.InstanceUpdateOptions{ + update := &database.InstanceStateUpdateOptions{ InstanceID: instance.Spec.InstanceID, DatabaseID: instance.Spec.DatabaseID, HostID: instance.Spec.HostID, @@ -73,7 +73,7 @@ func (a *Activities) UpdatePlannedInstanceStates(ctx context.Context, input *Upd // Other event types don't require an update continue } - if err := a.DatabaseService.UpdateInstance(ctx, update); err != nil { + if err := a.DatabaseService.UpdateInstanceState(ctx, update); err != nil { return nil, fmt.Errorf("failed to update database instance '%s': %w", instance.Spec.InstanceID, err) } } diff --git a/server/internal/workflows/delete_database.go b/server/internal/workflows/delete_database.go index ba9bc338..17da5bdb 100644 --- a/server/internal/workflows/delete_database.go +++ b/server/internal/workflows/delete_database.go @@ -93,6 +93,11 @@ func (w *Workflows) DeleteDatabase(ctx workflow.Context, input *DeleteDatabaseIn return nil, fmt.Errorf("failed to plan database delete: %w", err) } + err = w.persistPlans(ctx, input.DatabaseID, input.TaskID, plans) + if err != nil { + return nil, handleError(err) + } + err = w.applyPlans(ctx, input.DatabaseID, input.TaskID, current, plans) if err != nil { return nil, handleError(err)