-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MISC] Check for peer relation when getting the primary endpoint #366
Changes from 23 commits
ac24da8
98e151e
14b7d3f
5d9da4c
375c957
8c47669
1e0b514
fa44790
7fce785
d50d669
8184dfa
5a09131
1eb5529
e14d5a1
dc4588f
b89476f
e7e8965
dbf7deb
3f1d62e
5c78ca8
1ea2833
beea41d
e520923
f3e3d33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
PostgreSQL, | ||
PostgreSQLCreateUserError, | ||
PostgreSQLEnableDisableExtensionError, | ||
PostgreSQLListUsersError, | ||
PostgreSQLUpdateUserPasswordError, | ||
) | ||
from charms.postgresql_k8s.v0.postgresql_tls import PostgreSQLTLS | ||
|
@@ -299,6 +300,9 @@ def postgresql(self) -> PostgreSQL: | |
@property | ||
def primary_endpoint(self) -> Optional[str]: | ||
"""Returns the endpoint of the primary instance or None when no primary available.""" | ||
if not self._peers: | ||
logger.debug("primary endpoint early exit: Peer relation not joined yet.") | ||
return None | ||
try: | ||
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): | ||
with attempt: | ||
|
@@ -308,7 +312,20 @@ def primary_endpoint(self) -> Optional[str]: | |
# returned is not in the list of the current cluster members | ||
# (like when the cluster was not updated yet after a failed switchover). | ||
if not primary_endpoint or primary_endpoint not in self._units_ips: | ||
raise ValueError() | ||
# TODO figure out why peer data is not available | ||
if ( | ||
primary_endpoint | ||
and len(self._units_ips) == 1 | ||
and len(self._peers.units) > 1 | ||
): | ||
logger.warning( | ||
"Possibly incoplete peer data: Will not map primary IP to unit IP" | ||
) | ||
return primary_endpoint | ||
Comment on lines
+315
to
+324
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On juju 3 we don't seem to be getting the peer data used to get the IP addr when a new unit is joining (e.g. when scaling in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It smells dangerous... |
||
logger.debug( | ||
"primary endpoint early exit: Primary IP not in cached peer list." | ||
) | ||
primary_endpoint = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
except RetryError: | ||
return None | ||
else: | ||
|
@@ -1036,6 +1053,10 @@ def _start_primary(self, event: StartEvent) -> None: | |
logger.exception(e) | ||
self.unit.status = BlockedStatus("Failed to create postgres user") | ||
return | ||
except PostgreSQLListUsersError: | ||
logger.warning("Deferriing on_start: Unable to list users") | ||
event.defer() | ||
return | ||
Comment on lines
+1056
to
+1059
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Skipping the wait in primary_endpoint could lead o exceptions here. |
||
|
||
self.postgresql.set_up_database() | ||
|
||
|
@@ -1381,6 +1402,17 @@ def _is_workload_running(self) -> bool: | |
|
||
return charmed_postgresql_snap.services["patroni"]["active"] | ||
|
||
@property | ||
def _can_connect_to_postgresql(self) -> bool: | ||
try: | ||
for attempt in Retrying(stop=stop_after_delay(30), wait=wait_fixed(3)): | ||
with attempt: | ||
assert self.postgresql.get_postgresql_timezones() | ||
except RetryError: | ||
logger.debug("Cannot connect to database") | ||
return False | ||
return True | ||
|
||
def update_config(self, is_creating_backup: bool = False) -> bool: | ||
"""Updates Patroni config file based on the existence of the TLS files.""" | ||
if ( | ||
|
@@ -1424,6 +1456,10 @@ def update_config(self, is_creating_backup: bool = False) -> bool: | |
logger.debug("Early exit update_config: Patroni not started yet") | ||
return False | ||
|
||
# Try to connect | ||
if not self._can_connect_to_postgresql: | ||
logger.warning("Early exit update_config: Cannot connect to Postgresql") | ||
return False | ||
self._validate_config_options() | ||
|
||
self._patroni.bulk_update_parameters_controller_by_patroni( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,6 @@ | |
|
||
|
||
@pytest.mark.group(1) | ||
@pytest.mark.skip(reason="DB Admin tests are currently broken") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good to have it re-enable. I suspect it is not related to this PR but enabled as coincident. Right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reenabled it here to reduce the amount CI runs. |
||
async def test_landscape_scalable_bundle_db(ops_test: OpsTest, charm: str) -> None: | ||
"""Deploy Landscape Scalable Bundle to test the 'db-admin' relation.""" | ||
await ops_test.model.deploy( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the peer relation is not yet joined, don't try to find the primary.