From 1794318ae07cb61e92d01f119f88adebd49f10d7 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Fri, 9 Apr 2021 23:31:13 -0500 Subject: [PATCH] Bugfix: thread exiting issues and related. My attempt to add locks to make helgrind ended up backfired on me. It seems that when an interrupt is received, the cancel is being sent and it happens at any point in time. Which includes when locks are opened. When an interrupt cancels a thread with an open lock, that lock is never closed. Then, using the same locks to handle cleanup of threads resulted in an occasional deadlock. Remove all locking in the main thread. The logic should be safe as the only cases where there might be a conflict, a f_thread_join() is in protected between them. The cancel function should avoid locks to, where possible. I am somewhat more nervous about this case, I need to review and confirm if main->thread->processs.size does not change. I previously updated the force cancel exit thread to use waits and locks. It later dawned on me that I could just get rid of the force cancel exit thread entirely and implement the same functionality in the cancel thread. --- level_3/controller/c/private-common.c | 3 + level_3/controller/c/private-common.h | 8 +- level_3/controller/c/private-thread.c | 250 ++++++++++------------------------ level_3/controller/c/private-thread.h | 16 --- 4 files changed, 76 insertions(+), 201 deletions(-) diff --git a/level_3/controller/c/private-common.c b/level_3/controller/c/private-common.c index 12cf932..08edc83 100644 --- a/level_3/controller/c/private-common.c +++ b/level_3/controller/c/private-common.c @@ -179,7 +179,10 @@ extern "C" { void controller_process_delete_simple(controller_process_t *process) { if (process->id_thread) { + f_thread_signal(process->id_thread, F_signal_kill); f_thread_join(process->id_thread, 0); + + process->id_thread = 0; } f_thread_condition_signal_all(&process->wait); diff --git a/level_3/controller/c/private-common.h b/level_3/controller/c/private-common.h index 9f1037a..83278b5 100644 --- a/level_3/controller/c/private-common.h +++ b/level_3/controller/c/private-common.h @@ -1075,10 +1075,8 @@ extern "C" { #ifndef _di_controller_thread_t_ #define controller_thread_cleanup_interval_long 3600 // 1 hour in seconds. #define controller_thread_cleanup_interval_short 180 // 3 minutes in seconds. - //#define controller_thread_exit_force_timeout 60 // 1 minute in seconds. - #define controller_thread_exit_process_force_wait 30000000 // 0.03 seconds in nanoseconds. - #define controller_thread_exit_process_force_total 200 // 6 seconds in multiples of wait. - #define controller_thread_exit_main_force_timeout 100000 // 0.1 seconds in microseconds. + #define controller_thread_exit_process_cancel_wait 60000000 // 0.06 seconds in nanoseconds. + #define controller_thread_exit_process_cancel_total 150 // 9 seconds in multiples of wait. #define controller_thread_simulation_timeout 200000 // 0.2 seconds in microseconds. #define controller_thread_wait_timeout_seconds 10 #define controller_thread_wait_timeout_nanoseconds 0 @@ -1090,7 +1088,6 @@ extern "C" { f_thread_id_t id_cleanup; f_thread_id_t id_control; - f_thread_id_t id_exit; f_thread_id_t id_rule; f_thread_id_t id_signal; @@ -1107,7 +1104,6 @@ extern "C" { f_thread_id_t_initialize, \ f_thread_id_t_initialize, \ f_thread_id_t_initialize, \ - f_thread_id_t_initialize, \ controller_lock_t_initialize, \ controller_processs_t_initialize, \ controller_cache_t_initialize, \ diff --git a/level_3/controller/c/private-thread.c b/level_3/controller/c/private-thread.c index a4bb24a..fb0b5bf 100644 --- a/level_3/controller/c/private-thread.c +++ b/level_3/controller/c/private-thread.c @@ -104,118 +104,6 @@ extern "C" { } #endif // _di_controller_thread_control_ -#ifndef _di_controller_thread_exit_force_ - void * controller_thread_exit_force(void *arguments) { - - controller_main_t *main = (controller_main_t *) arguments; - - f_thread_lock_read(&main->thread->lock.process); - - if (main->thread->processs.size) { - f_thread_unlock(&main->thread->lock.process); - - f_thread_lock_read(&main->thread->lock.process); - - pid_t *process_pids[main->thread->processs.used]; - f_thread_id_t *process_threads[main->thread->processs.used]; - - memset(process_pids, 0, sizeof(pid_t *) * main->thread->processs.used); - memset(process_threads, 0, sizeof(f_thread_id_t *) * main->thread->processs.used); - - f_array_length_t i = 0; - f_array_length_t used = 0; - - for (; i < main->thread->processs.size; ++i) { - if (main->thread->processs.array[i]->child > 0) { - - process_pids[used] = &main->thread->processs.array[i]->child; - - if (main->thread->processs.array[i]->id_thread) { - process_threads[used] = &main->thread->processs.array[i]->id_thread; - } - - ++used; - } - else if (main->thread->processs.array[i]->id_thread) { - process_threads[used] = &main->thread->processs.array[i]->id_thread; - - ++used; - } - } // for - - if (used) { - f_status_t status = F_none; - f_array_length_t spent = 0; - - struct timespec wait; - wait.tv_sec = 0; - wait.tv_nsec = controller_thread_exit_process_force_wait; - - for (i = 0; i < used && spent < controller_thread_exit_process_force_total; ++i) { - - do { - - status = f_thread_join_timed(*process_threads[i], wait, 0); - - if (status == F_none) { - if (process_pids[i]) *process_pids[i] = 0; - if (process_threads[i]) *process_threads[i] = 0; - - process_pids[i] = 0; - process_threads[i] = 0; - } - - spent++; - - } while (status == F_time && spent < controller_thread_exit_process_force_total); - - } // for - - for (i = 0; i < used; ++i) { - - if (process_pids[i] && *process_pids[i]) { - f_signal_send(F_signal_kill, *process_pids[i]); - - *process_pids[i] = 0; - process_pids[i] = 0; - } - - if (process_pids[i] && *process_pids[i]) { - f_thread_signal(*process_pids[i], F_signal_kill); - - f_thread_join(*process_pids[i], 0); - - *process_threads[i] = 0; - process_threads[i] = 0; - } - } // for - } - } - - f_thread_unlock(&main->thread->lock.process); - - usleep(controller_thread_exit_main_force_timeout); - - f_thread_lock_read(&main->thread->lock.process); - - if (main->thread->id_cleanup) { - f_thread_signal(main->thread->id_cleanup, F_signal_kill); - } - - if (main->thread->id_control) { - f_thread_signal(main->thread->id_control, F_signal_kill); - } - - if (main->thread->id_rule) { - f_thread_signal(main->thread->id_rule, F_signal_kill); - } - - f_thread_unlock(&main->thread->lock.process); - - return 0; - } -#endif // _di_controller_thread_exit_force_ - #ifndef _di_controller_thread_main_ f_status_t controller_thread_main(const f_string_static_t entry_name, controller_data_t *data, controller_setting_t *setting) { @@ -241,11 +129,7 @@ extern "C" { } if (F_status_is_error_not(status)) { - f_thread_lock_write(&main.thread->lock.process); - status = f_thread_create(0, &thread.id_signal, &controller_thread_signal, (void *) &main); - - f_thread_unlock(&main.thread->lock.process); } if (F_status_is_error(status)) { @@ -279,13 +163,9 @@ extern "C" { else { const controller_main_entry_t entry = controller_macro_main_entry_t_initialize(&entry_name, &main, setting); - f_thread_lock_write(&main.thread->lock.process); - // the entry processing runs using the rule thread. status = f_thread_create(0, &thread.id_rule, &controller_thread_entry, (void *) &entry); - f_thread_unlock(&main.thread->lock.process); - if (F_status_is_error(status)) { if (data->error.verbosity != f_console_verbosity_quiet) { controller_error_print(data->error, F_status_set_fine(status), "f_thread_create", F_true, &thread); @@ -328,32 +208,20 @@ extern "C" { } if (thread.enabled) { - f_thread_lock_write(&main.thread->lock.process); - status = f_thread_create(0, &thread.id_rule, &controller_thread_rule, (void *) &main); - f_thread_unlock(&main.thread->lock.process); - if (F_status_is_error(status)) { thread.id_rule = 0; } else { - f_thread_lock_write(&main.thread->lock.process); - status = f_thread_create(0, &thread.id_control, &controller_thread_control, (void *) &main); - - f_thread_unlock(&main.thread->lock.process); } if (F_status_is_error(status)) { thread.id_control = 0; } else { - f_thread_lock_write(&main.thread->lock.process); - status = f_thread_create(0, &thread.id_cleanup, &controller_thread_cleanup, (void *) &main); - - f_thread_unlock(&main.thread->lock.process); } if (F_status_is_error(status)) { @@ -378,49 +246,30 @@ extern "C" { if (status != F_signal && thread.id_signal) { f_thread_cancel(thread.id_signal); } - - controller_thread_process_cancel(&main); } - - if (data->parameters[controller_parameter_validate].result == f_console_result_none) { - f_thread_lock_read(&main.thread->lock.process); + else if (data->parameters[controller_parameter_validate].result == f_console_result_none) { if (thread.id_signal) { - f_thread_unlock(&main.thread->lock.process); - f_thread_join(thread.id_signal, 0); thread.id_signal = 0; } - else { - f_thread_unlock(&main.thread->lock.process); - } - - controller_thread_process_cancel(&main); + } + else { + f_thread_cancel(thread.id_signal); } - f_thread_lock_read(&main.thread->lock.process); - - if (thread.id_signal) f_thread_cancel(thread.id_signal); - if (thread.id_cleanup) f_thread_cancel(thread.id_cleanup); - if (thread.id_control) f_thread_cancel(thread.id_control); - if (thread.id_rule) f_thread_cancel(thread.id_rule); + controller_thread_process_cancel(&main); if (thread.id_signal) f_thread_join(thread.id_signal, 0); if (thread.id_cleanup) f_thread_join(thread.id_cleanup, 0); if (thread.id_control) f_thread_join(thread.id_control, 0); if (thread.id_rule) f_thread_join(thread.id_rule, 0); - f_thread_unlock(&main.thread->lock.process); - - // wait for exit thread to finish any cleanup. - if (thread.id_exit) f_thread_join(thread.id_exit, 0); - thread.id_cleanup = 0; thread.id_control = 0; thread.id_rule = 0; thread.id_signal = 0; - thread.id_exit = 0; controller_thread_delete_simple(&thread); @@ -469,37 +318,46 @@ extern "C" { #ifndef _di_controller_thread_process_cancel_ void controller_thread_process_cancel(controller_main_t *main) { - f_thread_lock_read(&main->thread->lock.process); - // only cancel when enabled. - if (!main->thread->enabled || main->thread->id_exit) { - f_thread_unlock(&main->thread->lock.process); - + if (!main->thread->enabled) { return; } // this must be set, regardless of lock state and only this function changes this. main->thread->enabled = F_false; - f_thread_create(0, &main->thread->id_exit, &controller_thread_exit_force, (void *) main); + f_status_t status = F_none; + f_array_length_t spent = 0; + + struct timespec wait; + wait.tv_sec = 0; + wait.tv_nsec = controller_thread_exit_process_cancel_wait; controller_process_t *process = 0; f_array_length_t i = 0; + if (main->thread->id_cleanup) { + f_thread_cancel(main->thread->id_cleanup); + } + + if (main->thread->id_control) { + f_thread_cancel(main->thread->id_control); + } + + if (main->thread->id_rule) { + f_thread_cancel(main->thread->id_rule); + } + for (; i < main->thread->processs.used; ++i) { if (!main->thread->processs.array[i]) continue; process = main->thread->processs.array[i]; - f_thread_lock_read(&process->active); - if (process->child > 0) { f_signal_send(F_signal_termination, process->child); } - - f_thread_unlock(&process->active); } // for for (i = 0; i < main->thread->processs.used; ++i) { @@ -508,38 +366,72 @@ extern "C" { process = main->thread->processs.array[i]; - f_thread_lock_read(&process->active); - if (process->id_thread) { - f_thread_cancel(process->id_thread); + status = f_thread_join_timed(process->id_thread, wait, 0); + + if (status == F_none) { + process->child = 0; + process->id_thread = 0; + } + else { + f_thread_cancel(process->id_thread); + } } + } // for + + for (i = 0; i < main->thread->processs.size && spent < controller_thread_exit_process_cancel_total; ++i) { + + if (!main->thread->processs.array[i]) continue; - f_thread_unlock(&process->active); + process = main->thread->processs.array[i]; + + do { + status = f_thread_join_timed(process->id_thread, wait, 0); + + if (status == F_none) { + process->child = 0; + process->id_thread = 0; + } + + spent++; + + } while (status == F_time && spent < controller_thread_exit_process_cancel_total); } // for for (i = 0; i < main->thread->processs.size; ++i) { - if (!main->thread->processs.array[i]) continue; process = main->thread->processs.array[i]; if (process->id_thread) { - f_thread_join(process->id_thread, 0); - f_thread_lock_read(&process->active); + if (process->child > 0) { + f_signal_send(F_signal_kill, process->child); - process->id_thread = 0; + nanosleep(&wait, 0); + } + + f_thread_signal(process->id_thread, F_signal_kill); + + f_thread_join(process->id_thread, 0); - f_thread_unlock(&process->active); + process->child = 0; + process->id_thread = 0; } } // for - f_thread_unlock(&main->thread->lock.process); - f_thread_lock_write(&main->thread->lock.process); + // guarantee these threads are terminated. + if (main->thread->id_cleanup) { + f_thread_signal(main->thread->id_cleanup, F_signal_kill); + } - main->thread->processs.used = 0; + if (main->thread->id_control) { + f_thread_signal(main->thread->id_control, F_signal_kill); + } - f_thread_unlock(&main->thread->lock.process); + if (main->thread->id_rule) { + f_thread_signal(main->thread->id_rule, F_signal_kill); + } } #endif // _di_controller_thread_process_cancel_ diff --git a/level_3/controller/c/private-thread.h b/level_3/controller/c/private-thread.h index 6a861ca..6dbd0d0 100644 --- a/level_3/controller/c/private-thread.h +++ b/level_3/controller/c/private-thread.h @@ -41,22 +41,6 @@ extern "C" { #endif // _di_controller_thread_control_ /** - * Thread that gets started on exit to force other threads to exit. - * - * This will sleep for some time and then send termination signals to all threads and child processes. - * - * @param arguments - * The thread arguments. - * Must be of type controller_main_t. - * - * @return - * 0, always. - */ -#ifndef _di_controller_thread_exit_force_ - extern void * controller_thread_exit_force(void *arguments) f_gcc_attribute_visibility_internal; -#endif // _di_controller_thread_exit_force_ - -/** * Start all threads, wait on threads, and handle requests. * * @param entry_name -- 1.8.3.1