4343 WrapperProperties )
4444from aws_advanced_python_wrapper .utils .rds_url_type import RdsUrlType
4545from aws_advanced_python_wrapper .utils .rdsutils import RdsUtils
46+ from aws_advanced_python_wrapper .utils .telemetry .telemetry import \
47+ TelemetryTraceLevel
4648from aws_advanced_python_wrapper .writer_failover_handler import (
4749 WriterFailoverHandler , WriterFailoverHandlerImpl )
4850
@@ -84,7 +86,10 @@ def __init__(self, plugin_service: PluginService, props: Properties):
8486 self ._properties )
8587 self ._failover_reader_connect_timeout_sec = WrapperProperties .FAILOVER_READER_CONNECT_TIMEOUT_SEC .get_float (
8688 self ._properties )
87- self ._keep_session_state_on_failover = WrapperProperties .KEEP_SESSION_STATE_ON_FAILOVER .get_bool (self ._properties )
89+ self ._keep_session_state_on_failover = WrapperProperties .KEEP_SESSION_STATE_ON_FAILOVER .get_bool (
90+ self ._properties )
91+ self ._telemetry_failover_additional_top_trace_setting = (
92+ WrapperProperties .TELEMETRY_FAILOVER_ADDITIONAL_TOP_TRACE .get_bool (self ._properties ))
8893 self ._failover_mode : FailoverMode
8994 self ._is_in_transaction : bool = False
9095 self ._is_closed : bool = False
@@ -96,6 +101,18 @@ def __init__(self, plugin_service: PluginService, props: Properties):
96101 self ._saved_read_only_status : bool = False
97102 self ._saved_auto_commit_status : bool = False
98103
104+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
105+ self ._failover_writer_triggered_counter = telemetry_factory .create_counter ("writer_failover.triggered.count" )
106+ self ._failover_writer_success_counter = telemetry_factory .create_counter (
107+ "writer_failover.completed.success.count" )
108+ self ._failover_writer_failed_counter = telemetry_factory .create_counter (
109+ "writer_failover.completed.failed.count" )
110+ self ._failover_reader_triggered_counter = telemetry_factory .create_counter ("reader_failover.triggered.count" )
111+ self ._failover_reader_success_counter = telemetry_factory .create_counter (
112+ "reader_failover.completed.success.count" )
113+ self ._failover_reader_failed_counter = telemetry_factory .create_counter (
114+ "reader_failover.completed.failed.count" )
115+
99116 FailoverPlugin ._SUBSCRIBED_METHODS .update (self ._plugin_service .network_bound_methods )
100117
101118 def init_host_provider (
@@ -213,10 +230,13 @@ def _connect(
213230 properties : Properties ,
214231 is_initial_connection : bool ,
215232 connect_func : Callable ) -> Connection :
216- conn : Connection = self ._stale_dns_helper .get_verified_connection (is_initial_connection , self ._host_list_provider_service , host , properties ,
233+ conn : Connection = self ._stale_dns_helper .get_verified_connection (is_initial_connection ,
234+ self ._host_list_provider_service , host ,
235+ properties ,
217236 connect_func )
218237 if self ._keep_session_state_on_failover :
219- self ._saved_read_only_status = False if self ._saved_read_only_status == self ._plugin_service .driver_dialect .is_read_only (conn ) \
238+ self ._saved_read_only_status = False if self ._saved_read_only_status == self ._plugin_service .driver_dialect .is_read_only (
239+ conn ) \
220240 else self ._saved_read_only_status
221241 self ._saved_auto_commit_status = False \
222242 if self ._saved_read_only_status == self ._plugin_service .driver_dialect .get_autocommit (conn ) \
@@ -270,53 +290,96 @@ def _failover(self, failed_host: Optional[HostInfo]):
270290 raise FailoverSuccessError (Messages .get (error_msg ))
271291
272292 def _failover_reader (self , failed_host : Optional [HostInfo ]):
273- logger .debug ("FailoverPlugin.StartReaderFailover" )
293+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
294+ context = telemetry_factory .open_telemetry_context ("failover to replica" , TelemetryTraceLevel .NESTED )
295+ self ._failover_reader_triggered_counter .inc ()
274296
275- old_aliases = None
276- if self ._plugin_service .current_host_info is not None :
277- old_aliases = self ._plugin_service .current_host_info .aliases
297+ try :
298+ logger .debug ("FailoverPlugin.StartReaderFailover" )
278299
279- if failed_host is not None and failed_host .get_raw_availability () != HostAvailability .AVAILABLE :
280- failed_host = None
300+ old_aliases = None
301+ if self ._plugin_service .current_host_info is not None :
302+ old_aliases = self ._plugin_service .current_host_info .aliases
281303
282- result : ReaderFailoverResult = self ._reader_failover_handler .failover (self ._plugin_service .hosts , failed_host )
304+ if failed_host is not None and failed_host .get_raw_availability () != HostAvailability .AVAILABLE :
305+ failed_host = None
283306
284- if result is None or not result .is_connected :
285- raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToReader" ))
286- else :
287- if result .exception is not None :
288- raise result .exception
289- if self ._keep_session_state_on_failover :
290- self .restore_session_state (result .connection )
291- if result .connection is not None and result .new_host is not None :
292- self ._plugin_service .set_current_connection (result .connection , result .new_host )
307+ result : ReaderFailoverResult = self ._reader_failover_handler .failover (self ._plugin_service .hosts ,
308+ failed_host )
293309
294- if self ._plugin_service .current_host_info is not None and old_aliases is not None and len (old_aliases ) > 0 :
295- self ._plugin_service .current_host_info .remove_alias (old_aliases )
310+ if result is None or not result .is_connected :
311+ raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToReader" ))
312+ else :
313+ if result .exception is not None :
314+ raise result .exception
315+ if self ._keep_session_state_on_failover :
316+ self .restore_session_state (result .connection )
317+ if result .connection is not None and result .new_host is not None :
318+ self ._plugin_service .set_current_connection (result .connection , result .new_host )
319+
320+ if self ._plugin_service .current_host_info is not None and old_aliases is not None and len (old_aliases ) > 0 :
321+ self ._plugin_service .current_host_info .remove_alias (old_aliases )
322+
323+ self ._update_topology (True )
324+
325+ logger .debug ("FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
326+
327+ self ._failover_reader_success_counter .inc ()
328+ except FailoverSuccessError as fse :
329+ context .set_success (True )
330+ context .set_exception (fse )
331+ self ._failover_reader_success_counter .inc ()
332+ raise fse
333+ except Exception as ex :
334+ context .set_success (False )
335+ context .set_exception (ex )
336+ self ._failover_reader_failed_counter .inc ()
337+ raise ex
338+ finally :
339+ context .close_context ()
340+ if self ._telemetry_failover_additional_top_trace_setting :
341+ telemetry_factory .post_copy (context , TelemetryTraceLevel .FORCE_TOP_LEVEL )
296342
297- self ._update_topology (True )
343+ def _failover_writer (self ):
344+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
345+ context = telemetry_factory .open_telemetry_context ("failover to writer node" , TelemetryTraceLevel .NESTED )
346+ self ._failover_writer_triggered_counter .inc ()
298347
299- logger .debug ("FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
348+ try :
349+ logger .debug ("FailoverPlugin.StartWriterFailover" )
300350
301- def _failover_writer (self ):
302- logger .debug ("FailoverPlugin.StartWriterFailover" )
351+ result : WriterFailoverResult = self ._writer_failover_handler .failover (self ._plugin_service .hosts )
303352
304- result : WriterFailoverResult = self ._writer_failover_handler .failover (self ._plugin_service .hosts )
353+ if result is not None and result .exception is not None :
354+ raise result .exception
355+ elif result is None or not result .is_connected :
356+ raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToWriter" ))
305357
306- if result is not None and result .exception is not None :
307- raise result .exception
308- elif result is None or not result .is_connected :
309- raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToWriter" ))
358+ writer_host = self ._get_writer (result .topology )
359+ if self ._keep_session_state_on_failover :
360+ self .restore_session_state (result .new_connection )
310361
311- writer_host = self ._get_writer (result .topology )
312- if self ._keep_session_state_on_failover :
313- self .restore_session_state (result .new_connection )
362+ self ._plugin_service .set_current_connection (result .new_connection , writer_host )
314363
315- self . _plugin_service . set_current_connection ( result . new_connection , writer_host )
364+ logger . debug ( "FailoverPlugin.EstablishedConnection" , self . _plugin_service . current_host_info )
316365
317- logger . debug ( "FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
366+ self ._plugin_service .refresh_host_list ( )
318367
319- self ._plugin_service .refresh_host_list ()
368+ self ._failover_writer_success_counter .inc ()
369+ except FailoverSuccessError as fse :
370+ context .set_success (True )
371+ context .set_exception (fse )
372+ self ._failover_writer_success_counter .inc ()
373+ raise fse
374+ except Exception as ex :
375+ context .set_success (False )
376+ context .set_exception (ex )
377+ self ._failover_writer_failed_counter .inc ()
378+ raise ex
379+ finally :
380+ context .close_context ()
381+ if self ._telemetry_failover_additional_top_trace_setting :
382+ telemetry_factory .post_copy (context , TelemetryTraceLevel .FORCE_TOP_LEVEL )
320383
321384 def restore_session_state (self , conn : Optional [Connection ]):
322385 """
@@ -401,7 +464,8 @@ def _connect_to(self, host: HostInfo):
401464 logger .debug ("FailoverPlugin.EstablishedConnection" , host )
402465 except Exception as ex :
403466 if self ._plugin_service is not None :
404- logger .debug ("FailoverPlugin.ConnectionToHostFailed" , 'writer' if host .role == HostRole .WRITER else 'reader' , host .url )
467+ logger .debug ("FailoverPlugin.ConnectionToHostFailed" ,
468+ 'writer' if host .role == HostRole .WRITER else 'reader' , host .url )
405469 raise ex
406470
407471 def _should_attempt_reader_connection (self ) -> bool :
0 commit comments