Skip to content

Commit fb638a7

Browse files
committed
Merge remote-tracking branch 'origin/fix/5186-proxysql-stop-admin-crash' into v3.0-5218
2 parents dd847a1 + e5a6807 commit fb638a7

File tree

6 files changed

+430
-19
lines changed

6 files changed

+430
-19
lines changed

include/proxysql_structs.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,14 @@ enum proxysql_session_type {
758758
PROXYSQL_SESSION_NONE
759759
};
760760

761+
// Stop state enumeration for PROXYSQL STOP command (issue 5186)
762+
// Used to manage admin query access during module stop/start cycle
763+
enum proxy_stop_state {
764+
STOP_STATE_RUNNING = 0, // Normal operation, all modules running
765+
STOP_STATE_DRAINING = 1, // Admin queries being drained, modules stopping
766+
STOP_STATE_STOPPED = 2 // Modules stopped, only safe queries allowed
767+
};
768+
761769
#endif /* PROXYSQL_ENUMS */
762770

763771

@@ -959,6 +967,11 @@ struct _global_variables_t {
959967
bool nostart;
960968
int reload;
961969

970+
// Stop state management for PROXYSQL STOP command
971+
// See issue 5186: Fix query handling after PROXYSQL STOP
972+
volatile int stop_state;
973+
uint64_t active_admin_queries;
974+
962975
unsigned char protocol_version;
963976
char *mysql_server_version;
964977
uint32_t server_capabilities;

lib/Admin_Handler.cpp

Lines changed: 204 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ extern "C" void __gcov_dump();
8989
extern "C" void __gcov_reset();
9090
#endif
9191

92+
// Function declarations for issue 5186
93+
extern void ProxySQL_Main_init_main_modules();
94+
9295

9396
#ifdef DEBUG
9497
//#define BENCHMARK_FASTROUTING_LOAD
@@ -482,6 +485,54 @@ bool admin_handler_command_proxysql(char *query_no_space, unsigned int query_no_
482485
ProxySQL_Admin* SPA = (ProxySQL_Admin*)pa;
483486
bool rc = false;
484487

488+
// Handle PROXYSQL START after PROXYSQL STOP (issue 5186)
489+
if (glovars.stop_state == STOP_STATE_STOPPED) {
490+
proxy_info("PROXYSQL START: Restarting modules after STOP\n");
491+
492+
/*
493+
* CRITICAL: Why proper restart after PROXYSQL STOP is essential
494+
* =================================================================
495+
*
496+
* PROXYSQL STOP performs a complete shutdown of all core modules:
497+
* 1. MySQL and PgSQL thread pools (GloMTH, GloPTH) are shutdown
498+
* 2. Query Processors (GloMyQPro, GloMyAuth, etc.) are destroyed
499+
* 3. All global module pointers are set to NULL
500+
* 4. Thread synchronization objects are cleaned up
501+
*
502+
* Simply calling ProxySQL_Main_init_main_modules() is INSUFFICIENT because:
503+
* - It doesn't properly reinitialize thread synchronization
504+
* - It doesn't restart the MySQL/PgSQL thread pools
505+
* - It doesn't ensure proper thread initialization sequencing
506+
* - It leaves modules in inconsistent state
507+
*
508+
* Without complete reinitialization, admin queries will crash with:
509+
* - Segmentation faults accessing destroyed Query Processor modules
510+
* - Race conditions with partially initialized thread pools
511+
* - NULL pointer dereferences in GloMyQPro, GloMyAuth, etc.
512+
* - Lock contention on destroyed synchronization objects
513+
*
514+
* SOLUTION: Simulate initial startup conditions:
515+
* 1. Set GloVars.global.nostart = 1 (simulate "not started" state)
516+
* 2. Set admin_nostart_ = true (trigger startup logic)
517+
* 3. Let the normal START sequence reinitialize everything properly
518+
* 4. Ensure thread pools, query processors, and sync objects are rebuilt
519+
* 5. Maintain same initialization order as initial startup
520+
*
521+
* This prevents crashes and ensures full STOP/START functionality.
522+
*/
523+
524+
// Reset state to running and set nostart_ to trigger normal startup sequence
525+
glovars.stop_state = STOP_STATE_RUNNING;
526+
glovars.reload = 0;
527+
glovars.shutdown = 0;
528+
// Set nostart_ to true so the normal startup logic will trigger
529+
GloVars.global.nostart = 1;
530+
531+
// Continue to normal startup logic below
532+
admin_nostart_ = true;
533+
}
534+
535+
// Handle normal START (initial startup or restart after STOP)
485536
if (admin_nostart_) {
486537
rc = __sync_bool_compare_and_swap(&GloVars.global.nostart, 1, 0);
487538
}
@@ -536,6 +587,16 @@ bool admin_handler_command_proxysql(char *query_no_space, unsigned int query_no_
536587
}
537588
}
538589

590+
// Check if already stopped (issue 5186)
591+
if (glovars.stop_state == STOP_STATE_STOPPED) {
592+
SPA->send_error_msg_to_client(sess, (char*)"ProxySQL modules are already stopped");
593+
return false;
594+
}
595+
596+
// Set state to DRAINING - stop accepting new admin queries (issue 5186)
597+
glovars.stop_state = STOP_STATE_DRAINING;
598+
proxy_info("PROXYSQL STOP: Setting state to DRAINING, waiting for %lu admin queries to complete\n", (unsigned long)glovars.active_admin_queries);
599+
539600
char buf[32];
540601

541602
// ----- MySQL module stop -----
@@ -558,22 +619,72 @@ bool admin_handler_command_proxysql(char *query_no_space, unsigned int query_no_
558619
GloPTH->set_variable((char*)"wait_timeout", buf);
559620
GloPTH->commit();
560621

561-
// ----- Common shutdown actions -----
622+
// ----- Wait for admin queries to complete (issue 5186) -----
623+
int wait_time_ms = 0;
624+
int max_wait_time_ms = 30000; // 30 seconds timeout
625+
uint64_t last_active_queries = glovars.active_admin_queries;
626+
int stable_count = 0;
627+
628+
proxy_info("PROXYSQL STOP: Initial admin query count: %lu\n", (unsigned long)glovars.active_admin_queries);
629+
630+
// Wait for all other admin queries to complete (subtract 1 for current PROXYSQL STOP query)
631+
while (glovars.active_admin_queries > 1 && wait_time_ms < max_wait_time_ms) {
632+
usleep(100000); // 100ms intervals
633+
wait_time_ms += 100;
634+
635+
if (last_active_queries == glovars.active_admin_queries) {
636+
stable_count++;
637+
} else {
638+
stable_count = 0;
639+
last_active_queries = glovars.active_admin_queries;
640+
proxy_info("PROXYSQL STOP: Admin query count changed to: %lu\n", (unsigned long)glovars.active_admin_queries);
641+
}
642+
643+
if (wait_time_ms % 1000 == 0) {
644+
proxy_info("PROXYSQL STOP: Waiting for %lu admin queries to complete (%d/%ds), stable for %d cycles\n",
645+
(unsigned long)(glovars.active_admin_queries - 1), wait_time_ms/1000, max_wait_time_ms/1000, stable_count);
646+
}
647+
}
648+
649+
if (glovars.active_admin_queries > 1) {
650+
proxy_warning("PROXYSQL STOP: %lu admin queries still active after timeout (stable count: %d), proceeding with module stop\n",
651+
(unsigned long)(glovars.active_admin_queries - 1), stable_count);
652+
} else {
653+
proxy_info("PROXYSQL STOP: All admin queries completed, proceeding with module stop\n");
654+
}
655+
656+
// ----- Common module stop actions -----
562657
glovars.reload = 2;
658+
glovars.stop_state = STOP_STATE_STOPPED;
563659

564660
// Reset Prometheus counters
565661
if (GloVars.prometheus_registry)
566662
GloVars.prometheus_registry->ResetCounters();
567663

568-
// Signal shutdown and wait for completion
664+
// Signal module stop and wait for completion
665+
proxy_info("PROXYSQL STOP: Starting thread shutdown sequence\n");
569666
__sync_bool_compare_and_swap(&glovars.shutdown, 0, 1);
667+
668+
proxy_info("PROXYSQL STOP: Signaling MySQL threads to shutdown\n");
570669
GloMTH->signal_all_threads(0);
670+
proxy_info("PROXYSQL STOP: MySQL threads signaled\n");
671+
672+
proxy_info("PROXYSQL STOP: Signaling PgSQL threads to shutdown\n");
571673
GloPTH->signal_all_threads(0);
674+
proxy_info("PROXYSQL STOP: PgSQL threads signaled\n");
572675

676+
proxy_info("PROXYSQL STOP: Entering shutdown wait loop\n");
677+
int wait_count = 0;
573678
while (__sync_fetch_and_add(&glovars.shutdown, 0) == 1) {
574679
usleep(1000);
680+
wait_count++;
681+
if (wait_count % 1000 == 0) { // Log every 1 second
682+
proxy_info("PROXYSQL STOP: Still waiting for thread shutdown, count=%d\n", wait_count);
683+
}
575684
}
685+
proxy_info("PROXYSQL STOP: Exited shutdown wait loop after %d iterations\n", wait_count);
576686

687+
proxy_info("PROXYSQL STOP: Module stop completed, all modules stopped\n");
577688
SPA->send_ok_msg_to_client(sess, NULL, 0, query_no_space);
578689

579690
return false;
@@ -2332,6 +2443,10 @@ template<typename S>
23322443
void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
23332444

23342445
ProxySQL_Admin *pa=(ProxySQL_Admin *)_pa;
2446+
2447+
// Increment admin query counter for issue 5186
2448+
// This tracks active admin queries during PROXYSQL STOP
2449+
__sync_fetch_and_add(&glovars.active_admin_queries, 1);
23352450
bool needs_vacuum = false;
23362451
char *error=NULL;
23372452
int cols;
@@ -2597,9 +2712,16 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
25972712

25982713
if (!strncasecmp(CLUSTER_QUERY_MYSQL_USERS, query_no_space, strlen(CLUSTER_QUERY_MYSQL_USERS))) {
25992714
if (sess->session_type == PROXYSQL_SESSION_ADMIN) {
2600-
pthread_mutex_lock(&users_mutex);
2601-
resultset = GloMyAuth->get_current_mysql_users();
2602-
pthread_mutex_unlock(&users_mutex);
2715+
if (glovars.stop_state == STOP_STATE_RUNNING) {
2716+
pthread_mutex_lock(&users_mutex);
2717+
resultset = GloMyAuth->get_current_mysql_users();
2718+
pthread_mutex_unlock(&users_mutex);
2719+
} else {
2720+
// Return empty resultset when modules are stopped (issue 5186)
2721+
resultset = new SQLite3_result(2);
2722+
resultset->add_column_definition(SQLITE_TEXT, "username");
2723+
resultset->add_column_definition(SQLITE_TEXT, "password");
2724+
}
26032725
if (resultset != nullptr) {
26042726
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
26052727
run_query=false;
@@ -2610,28 +2732,37 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
26102732

26112733
if (sess->session_type == PROXYSQL_SESSION_ADMIN) { // no stats
26122734
if (!strncasecmp(CLUSTER_QUERY_MYSQL_QUERY_RULES, query_no_space, strlen(CLUSTER_QUERY_MYSQL_QUERY_RULES))) {
2613-
GloMyQPro->wrlock();
2614-
resultset = GloMyQPro->get_current_query_rules_inner();
2615-
if (resultset == NULL) {
2616-
GloMyQPro->wrunlock(); // unlock first
2617-
resultset = GloMyQPro->get_current_query_rules();
2618-
if (resultset) {
2735+
if (glovars.stop_state == STOP_STATE_RUNNING) {
2736+
GloMyQPro->wrlock();
2737+
resultset = GloMyQPro->get_current_query_rules_inner();
2738+
if (resultset == NULL) {
2739+
GloMyQPro->wrunlock(); // unlock first
2740+
resultset = GloMyQPro->get_current_query_rules();
2741+
if (resultset) {
2742+
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
2743+
delete resultset;
2744+
run_query=false;
2745+
}
2746+
} else {
26192747
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
2620-
delete resultset;
2748+
GloMyQPro->wrunlock();
26212749
run_query=false;
2622-
goto __run_query;
26232750
}
26242751
} else {
2752+
// Return empty resultset when modules are stopped (issue 5186)
2753+
resultset = new SQLite3_result(2);
2754+
resultset->add_column_definition(SQLITE_TEXT, "rule_id");
2755+
resultset->add_column_definition(SQLITE_TEXT, "active");
26252756
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
2626-
//delete resultset; // DO NOT DELETE . This is the inner resultset of Query_Processor
2627-
GloMyQPro->wrunlock();
2757+
delete resultset;
26282758
run_query=false;
26292759
goto __run_query;
26302760
}
26312761
}
26322762
if (!strncasecmp(CLUSTER_QUERY_MYSQL_QUERY_RULES_FAST_ROUTING, query_no_space, strlen(CLUSTER_QUERY_MYSQL_QUERY_RULES_FAST_ROUTING))) {
2633-
GloMyQPro->wrlock();
2634-
resultset = GloMyQPro->get_current_query_rules_fast_routing_inner();
2763+
if (glovars.stop_state == STOP_STATE_RUNNING) {
2764+
GloMyQPro->wrlock();
2765+
resultset = GloMyQPro->get_current_query_rules_fast_routing_inner();
26352766
if (resultset == NULL) {
26362767
GloMyQPro->wrunlock(); // unlock first
26372768
resultset = GloMyQPro->get_current_query_rules_fast_routing();
@@ -2648,14 +2779,27 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
26482779
run_query=false;
26492780
goto __run_query;
26502781
}
2782+
} else {
2783+
// Return empty resultset when modules are stopped (issue 5186)
2784+
resultset = new SQLite3_result(2);
2785+
resultset->add_column_definition(SQLITE_TEXT, "rule_id");
2786+
resultset->add_column_definition(SQLITE_TEXT, "hostname");
2787+
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
2788+
delete resultset;
2789+
run_query=false;
2790+
goto __run_query;
2791+
}
26512792
}
26522793
}
26532794

26542795
// if the client simply executes:
26552796
// SELECT COUNT(*) FROM runtime_mysql_query_rules_fast_routing
26562797
// we just return the count
26572798
if (strcmp("SELECT COUNT(*) FROM runtime_mysql_query_rules_fast_routing", query_no_space)==0) {
2658-
int cnt = GloMyQPro->get_current_query_rules_fast_routing_count();
2799+
int cnt = 0;
2800+
if (glovars.stop_state == STOP_STATE_RUNNING) {
2801+
cnt = GloMyQPro->get_current_query_rules_fast_routing_count();
2802+
}
26592803
l_free(query_length,query);
26602804
char buf[256];
26612805
sprintf(buf,"SELECT %d AS 'COUNT(*)'", cnt);
@@ -2861,11 +3005,46 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
28613005
goto __run_query;
28623006
}
28633007
}
3008+
#if 0
28643009
{
3010+
// this block seems unnecessary, as we have enough fencing
28653011
ProxySQL_Admin *SPA=(ProxySQL_Admin *)pa;
3012+
// Check if this is a dangerous query that should be blocked during STOP states (issue 5186)
3013+
if (glovars.stop_state != STOP_STATE_RUNNING && sess->session_type == PROXYSQL_SESSION_ADMIN) {
3014+
// Block dangerous runtime_* queries that access destroyed modules
3015+
if (!strncasecmp(query_no_space, "SELECT COUNT(*) FROM runtime_mysql_query_rules", strlen("SELECT COUNT(*) FROM runtime_mysql_query_rules")) ||
3016+
!strncasecmp(query_no_space, "SELECT COUNT(*) FROM runtime_mysql_query_rules_fast_routing", strlen("SELECT COUNT(*) FROM runtime_mysql_query_rules_fast_routing")) ||
3017+
!strncasecmp(query_no_space, "SELECT COUNT(*) FROM runtime_mysql_users", strlen("SELECT COUNT(*) FROM runtime_mysql_users")) ||
3018+
!strncasecmp(query_no_space, "SELECT COUNT(*) FROM stats_mysql_query_digest", strlen("SELECT COUNT(*) FROM stats_mysql_query_digest")) ||
3019+
!strncasecmp(query_no_space, "SELECT * FROM runtime_mysql_query_rules", strlen("SELECT * FROM runtime_mysql_query_rules")) ||
3020+
!strncasecmp(query_no_space, "SELECT * FROM runtime_mysql_query_rules_fast_routing", strlen("SELECT * FROM runtime_mysql_query_rules_fast_routing")) ||
3021+
!strncasecmp(query_no_space, "SELECT * FROM runtime_mysql_users", strlen("SELECT * FROM runtime_mysql_users")) ||
3022+
!strncasecmp(query_no_space, "SELECT * FROM stats_mysql_query_digest", strlen("SELECT * FROM stats_mysql_query_digest"))) {
3023+
3024+
//l_free(query_length, query); // ASAN correctly reports a double free
3025+
3026+
// Return empty resultset instead of crashing
3027+
SQLite3_result *resultset = new SQLite3_result(1);
3028+
resultset->add_column_definition(SQLITE_TEXT, "COUNT(*)");
3029+
3030+
// Add a single row with 0 for COUNT(*) queries
3031+
SQLite3_row *row = new SQLite3_row(1);
3032+
char *field_val = strdup("0");
3033+
row->fields[0] = field_val;
3034+
resultset->add_row(row);
3035+
3036+
sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot);
3037+
delete resultset;
3038+
delete row;
3039+
free(field_val);
3040+
3041+
run_query = false;
3042+
goto __run_query;
3043+
}
3044+
}
28663045
needs_vacuum = SPA->GenericRefreshStatistics(query_no_space,query_no_space_length, ( sess->session_type == PROXYSQL_SESSION_ADMIN ? true : false ) );
28673046
}
2868-
3047+
#endif // 0
28693048

28703049
if (!strncasecmp("SHOW GLOBAL VARIABLES LIKE 'read_only'", query_no_space, strlen("SHOW GLOBAL VARIABLES LIKE 'read_only'"))) {
28713050
l_free(query_length,query);
@@ -4137,6 +4316,12 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) {
41374316
pthread_mutex_unlock(&pa->sql_query_global_mutex);
41384317
}
41394318
}
4319+
4320+
__exit_cleanup:
4321+
// Decrement admin query counter for issue 5186
4322+
// This tracks active admin queries during PROXYSQL STOP
4323+
__sync_fetch_and_sub(&glovars.active_admin_queries, 1);
4324+
41404325
l_free(pkt->size-sizeof(mysql_hdr),query_no_space); // it is always freed here
41414326
l_free(query_length,query);
41424327
}

0 commit comments

Comments
 (0)