]> Kevux Git Server - fll/commitdiff
Bugfix: thread exiting issues and related.
authorKevin Day <thekevinday@gmail.com>
Sat, 10 Apr 2021 04:31:13 +0000 (23:31 -0500)
committerKevin Day <thekevinday@gmail.com>
Sat, 10 Apr 2021 04:46:01 +0000 (23:46 -0500)
My attempt to add locks to make helgrind ended up backfired on me.
It seems that when an interrupt is received, the cancel is being sent and it happens at any point in time.
Which includes when locks are opened.
When an interrupt cancels a thread with an open lock, that lock is never closed.
Then, using the same locks to handle cleanup of threads resulted in an occasional deadlock.
Remove all locking in the main thread.
The logic should be safe as the only cases where there might be a conflict, a f_thread_join() is in protected between them.
The cancel function should avoid locks to, where possible.
I am somewhat more nervous about this case, I need to review and confirm if main->thread->processs.size does not change.

I previously updated the force cancel exit thread to use waits and locks.
It later dawned on me that I could just get rid of the force cancel exit thread entirely and implement the same functionality in the cancel thread.

level_3/controller/c/private-common.c
level_3/controller/c/private-common.h
level_3/controller/c/private-thread.c
level_3/controller/c/private-thread.h

index 12cf932632b3cad377ff8ad6d7a52aae22c57510..08edc83e44e0e9b769373ec6a87e3d2f5eb784c7 100644 (file)
@@ -179,7 +179,10 @@ extern "C" {
   void controller_process_delete_simple(controller_process_t *process) {
 
     if (process->id_thread) {
+      f_thread_signal(process->id_thread, F_signal_kill);
       f_thread_join(process->id_thread, 0);
+
+      process->id_thread = 0;
     }
 
     f_thread_condition_signal_all(&process->wait);
index 9f1037a5a8948c2228783e809da3d0994d6d3eb4..83278b50e5d12db6f035311fa8061d7518c417d6 100644 (file)
@@ -1075,10 +1075,8 @@ extern "C" {
 #ifndef _di_controller_thread_t_
   #define controller_thread_cleanup_interval_long  3600 // 1 hour in seconds.
   #define controller_thread_cleanup_interval_short 180  // 3 minutes in seconds.
-  //#define controller_thread_exit_force_timeout 60  // 1 minute in seconds.
-  #define controller_thread_exit_process_force_wait 30000000 // 0.03 seconds in nanoseconds.
-  #define controller_thread_exit_process_force_total 200 // 6 seconds in multiples of wait.
-  #define controller_thread_exit_main_force_timeout 100000 // 0.1 seconds in microseconds.
+  #define controller_thread_exit_process_cancel_wait 60000000 // 0.06 seconds in nanoseconds.
+  #define controller_thread_exit_process_cancel_total 150 // 9 seconds in multiples of wait.
   #define controller_thread_simulation_timeout 200000 // 0.2 seconds in microseconds.
   #define controller_thread_wait_timeout_seconds 10
   #define controller_thread_wait_timeout_nanoseconds 0
@@ -1090,7 +1088,6 @@ extern "C" {
 
     f_thread_id_t id_cleanup;
     f_thread_id_t id_control;
-    f_thread_id_t id_exit;
     f_thread_id_t id_rule;
     f_thread_id_t id_signal;
 
@@ -1107,7 +1104,6 @@ extern "C" {
     f_thread_id_t_initialize, \
     f_thread_id_t_initialize, \
     f_thread_id_t_initialize, \
-    f_thread_id_t_initialize, \
     controller_lock_t_initialize, \
     controller_processs_t_initialize, \
     controller_cache_t_initialize, \
index a4bb24a560e68f12380aede520643dad42d72850..fb0b5bf13c3d00e77a0cf6823846280b82719e9e 100644 (file)
@@ -104,118 +104,6 @@ extern "C" {
   }
 #endif // _di_controller_thread_control_
 
-#ifndef _di_controller_thread_exit_force_
-  void * controller_thread_exit_force(void *arguments) {
-
-    controller_main_t *main = (controller_main_t *) arguments;
-
-    f_thread_lock_read(&main->thread->lock.process);
-
-    if (main->thread->processs.size) {
-      f_thread_unlock(&main->thread->lock.process);
-
-      f_thread_lock_read(&main->thread->lock.process);
-
-      pid_t *process_pids[main->thread->processs.used];
-      f_thread_id_t *process_threads[main->thread->processs.used];
-
-      memset(process_pids, 0, sizeof(pid_t *) * main->thread->processs.used);
-      memset(process_threads, 0, sizeof(f_thread_id_t *) * main->thread->processs.used);
-
-      f_array_length_t i = 0;
-      f_array_length_t used = 0;
-
-      for (; i < main->thread->processs.size; ++i) {
-        if (main->thread->processs.array[i]->child > 0) {
-
-          process_pids[used] = &main->thread->processs.array[i]->child;
-
-          if (main->thread->processs.array[i]->id_thread) {
-            process_threads[used] = &main->thread->processs.array[i]->id_thread;
-          }
-
-          ++used;
-        }
-        else if (main->thread->processs.array[i]->id_thread) {
-          process_threads[used] = &main->thread->processs.array[i]->id_thread;
-
-          ++used;
-        }
-      } // for
-
-      if (used) {
-        f_status_t status = F_none;
-        f_array_length_t spent = 0;
-
-        struct timespec wait;
-        wait.tv_sec = 0;
-        wait.tv_nsec = controller_thread_exit_process_force_wait;
-
-        for (i = 0; i < used && spent < controller_thread_exit_process_force_total; ++i) {
-
-          do {
-
-            status = f_thread_join_timed(*process_threads[i], wait, 0);
-
-            if (status == F_none) {
-              if (process_pids[i]) *process_pids[i] = 0;
-              if (process_threads[i]) *process_threads[i] = 0;
-
-              process_pids[i] = 0;
-              process_threads[i] = 0;
-            }
-
-            spent++;
-
-          } while (status == F_time && spent < controller_thread_exit_process_force_total);
-
-        } // for
-
-        for (i = 0; i < used; ++i) {
-
-          if (process_pids[i] && *process_pids[i]) {
-            f_signal_send(F_signal_kill, *process_pids[i]);
-
-            *process_pids[i] = 0;
-            process_pids[i] = 0;
-          }
-
-          if (process_pids[i] && *process_pids[i]) {
-            f_thread_signal(*process_pids[i], F_signal_kill);
-
-            f_thread_join(*process_pids[i], 0);
-
-            *process_threads[i] = 0;
-            process_threads[i] = 0;
-          }
-        } // for
-      }
-    }
-
-    f_thread_unlock(&main->thread->lock.process);
-
-    usleep(controller_thread_exit_main_force_timeout);
-
-    f_thread_lock_read(&main->thread->lock.process);
-
-    if (main->thread->id_cleanup) {
-      f_thread_signal(main->thread->id_cleanup, F_signal_kill);
-    }
-
-    if (main->thread->id_control) {
-      f_thread_signal(main->thread->id_control, F_signal_kill);
-    }
-
-    if (main->thread->id_rule) {
-      f_thread_signal(main->thread->id_rule, F_signal_kill);
-    }
-
-    f_thread_unlock(&main->thread->lock.process);
-
-    return 0;
-  }
-#endif // _di_controller_thread_exit_force_
-
 #ifndef _di_controller_thread_main_
   f_status_t controller_thread_main(const f_string_static_t entry_name, controller_data_t *data, controller_setting_t *setting) {
 
@@ -241,11 +129,7 @@ extern "C" {
     }
 
     if (F_status_is_error_not(status)) {
-      f_thread_lock_write(&main.thread->lock.process);
-
       status = f_thread_create(0, &thread.id_signal, &controller_thread_signal, (void *) &main);
-
-      f_thread_unlock(&main.thread->lock.process);
     }
 
     if (F_status_is_error(status)) {
@@ -279,13 +163,9 @@ extern "C" {
       else {
         const controller_main_entry_t entry = controller_macro_main_entry_t_initialize(&entry_name, &main, setting);
 
-        f_thread_lock_write(&main.thread->lock.process);
-
         // the entry processing runs using the rule thread.
         status = f_thread_create(0, &thread.id_rule, &controller_thread_entry, (void *) &entry);
 
-        f_thread_unlock(&main.thread->lock.process);
-
         if (F_status_is_error(status)) {
           if (data->error.verbosity != f_console_verbosity_quiet) {
             controller_error_print(data->error, F_status_set_fine(status), "f_thread_create", F_true, &thread);
@@ -328,32 +208,20 @@ extern "C" {
         }
 
         if (thread.enabled) {
-          f_thread_lock_write(&main.thread->lock.process);
-
           status = f_thread_create(0, &thread.id_rule, &controller_thread_rule, (void *) &main);
 
-          f_thread_unlock(&main.thread->lock.process);
-
           if (F_status_is_error(status)) {
             thread.id_rule = 0;
           }
           else {
-            f_thread_lock_write(&main.thread->lock.process);
-
             status = f_thread_create(0, &thread.id_control, &controller_thread_control, (void *) &main);
-
-            f_thread_unlock(&main.thread->lock.process);
           }
 
           if (F_status_is_error(status)) {
             thread.id_control = 0;
           }
           else {
-            f_thread_lock_write(&main.thread->lock.process);
-
             status = f_thread_create(0, &thread.id_cleanup, &controller_thread_cleanup, (void *) &main);
-
-            f_thread_unlock(&main.thread->lock.process);
           }
 
           if (F_status_is_error(status)) {
@@ -378,49 +246,30 @@ extern "C" {
       if (status != F_signal && thread.id_signal) {
         f_thread_cancel(thread.id_signal);
       }
-
-      controller_thread_process_cancel(&main);
     }
-
-    if (data->parameters[controller_parameter_validate].result == f_console_result_none) {
-      f_thread_lock_read(&main.thread->lock.process);
+    else if (data->parameters[controller_parameter_validate].result == f_console_result_none) {
 
       if (thread.id_signal) {
-        f_thread_unlock(&main.thread->lock.process);
-
         f_thread_join(thread.id_signal, 0);
 
         thread.id_signal = 0;
       }
-      else {
-        f_thread_unlock(&main.thread->lock.process);
-      }
-
-      controller_thread_process_cancel(&main);
+    }
+    else {
+      f_thread_cancel(thread.id_signal);
     }
 
-    f_thread_lock_read(&main.thread->lock.process);
-
-    if (thread.id_signal) f_thread_cancel(thread.id_signal);
-    if (thread.id_cleanup) f_thread_cancel(thread.id_cleanup);
-    if (thread.id_control) f_thread_cancel(thread.id_control);
-    if (thread.id_rule) f_thread_cancel(thread.id_rule);
+    controller_thread_process_cancel(&main);
 
     if (thread.id_signal) f_thread_join(thread.id_signal, 0);
     if (thread.id_cleanup) f_thread_join(thread.id_cleanup, 0);
     if (thread.id_control) f_thread_join(thread.id_control, 0);
     if (thread.id_rule) f_thread_join(thread.id_rule, 0);
 
-    f_thread_unlock(&main.thread->lock.process);
-
-    // wait for exit thread to finish any cleanup.
-    if (thread.id_exit) f_thread_join(thread.id_exit, 0);
-
     thread.id_cleanup = 0;
     thread.id_control = 0;
     thread.id_rule = 0;
     thread.id_signal = 0;
-    thread.id_exit = 0;
 
     controller_thread_delete_simple(&thread);
 
@@ -469,37 +318,46 @@ extern "C" {
 #ifndef _di_controller_thread_process_cancel_
   void controller_thread_process_cancel(controller_main_t *main) {
 
-    f_thread_lock_read(&main->thread->lock.process);
-
     // only cancel when enabled.
-    if (!main->thread->enabled || main->thread->id_exit) {
-      f_thread_unlock(&main->thread->lock.process);
-
+    if (!main->thread->enabled) {
       return;
     }
 
     // this must be set, regardless of lock state and only this function changes this.
     main->thread->enabled = F_false;
 
-    f_thread_create(0, &main->thread->id_exit, &controller_thread_exit_force, (void *) main);
+    f_status_t status = F_none;
+    f_array_length_t spent = 0;
+
+    struct timespec wait;
+    wait.tv_sec = 0;
+    wait.tv_nsec = controller_thread_exit_process_cancel_wait;
 
     controller_process_t *process = 0;
 
     f_array_length_t i = 0;
 
+    if (main->thread->id_cleanup) {
+      f_thread_cancel(main->thread->id_cleanup);
+    }
+
+    if (main->thread->id_control) {
+      f_thread_cancel(main->thread->id_control);
+    }
+
+    if (main->thread->id_rule) {
+      f_thread_cancel(main->thread->id_rule);
+    }
+
     for (; i < main->thread->processs.used; ++i) {
 
       if (!main->thread->processs.array[i]) continue;
 
       process = main->thread->processs.array[i];
 
-      f_thread_lock_read(&process->active);
-
       if (process->child > 0) {
         f_signal_send(F_signal_termination, process->child);
       }
-
-      f_thread_unlock(&process->active);
     } // for
 
     for (i = 0; i < main->thread->processs.used; ++i) {
@@ -508,38 +366,72 @@ extern "C" {
 
       process = main->thread->processs.array[i];
 
-      f_thread_lock_read(&process->active);
-
       if (process->id_thread) {
-        f_thread_cancel(process->id_thread);
+        status = f_thread_join_timed(process->id_thread, wait, 0);
+
+        if (status == F_none) {
+          process->child = 0;
+          process->id_thread = 0;
+        }
+        else {
+          f_thread_cancel(process->id_thread);
+        }
       }
+    } // for
+
+    for (i = 0; i < main->thread->processs.size && spent < controller_thread_exit_process_cancel_total; ++i) {
+
+      if (!main->thread->processs.array[i]) continue;
 
-      f_thread_unlock(&process->active);
+      process = main->thread->processs.array[i];
+
+      do {
+        status = f_thread_join_timed(process->id_thread, wait, 0);
+
+        if (status == F_none) {
+          process->child = 0;
+          process->id_thread = 0;
+        }
+
+        spent++;
+
+      } while (status == F_time && spent < controller_thread_exit_process_cancel_total);
     } // for
 
     for (i = 0; i < main->thread->processs.size; ++i) {
-
       if (!main->thread->processs.array[i]) continue;
 
       process = main->thread->processs.array[i];
 
       if (process->id_thread) {
-        f_thread_join(process->id_thread, 0);
 
-        f_thread_lock_read(&process->active);
+        if (process->child > 0) {
+          f_signal_send(F_signal_kill, process->child);
 
-        process->id_thread = 0;
+          nanosleep(&wait, 0);
+        }
+
+        f_thread_signal(process->id_thread, F_signal_kill);
+
+        f_thread_join(process->id_thread, 0);
 
-        f_thread_unlock(&process->active);
+        process->child = 0;
+        process->id_thread = 0;
       }
     } // for
 
-    f_thread_unlock(&main->thread->lock.process);
-    f_thread_lock_write(&main->thread->lock.process);
+    // guarantee these threads are terminated.
+    if (main->thread->id_cleanup) {
+      f_thread_signal(main->thread->id_cleanup, F_signal_kill);
+    }
 
-    main->thread->processs.used = 0;
+    if (main->thread->id_control) {
+      f_thread_signal(main->thread->id_control, F_signal_kill);
+    }
 
-    f_thread_unlock(&main->thread->lock.process);
+    if (main->thread->id_rule) {
+      f_thread_signal(main->thread->id_rule, F_signal_kill);
+    }
   }
 #endif // _di_controller_thread_process_cancel_
 
index 6a861caf87fb81b5aaa3d9c3d7276a2245100e4a..6dbd0d0f1c5316f4e5d624b37d949ba1c8552a22 100644 (file)
@@ -41,22 +41,6 @@ extern "C" {
 #endif // _di_controller_thread_control_
 
 /**
- * Thread that gets started on exit to force other threads to exit.
- *
- * This will sleep for some time and then send termination signals to all threads and child processes.
- *
- * @param arguments
- *   The thread arguments.
- *   Must be of type controller_main_t.
- *
- * @return
- *   0, always.
- */
-#ifndef _di_controller_thread_exit_force_
-  extern void * controller_thread_exit_force(void *arguments) f_gcc_attribute_visibility_internal;
-#endif // _di_controller_thread_exit_force_
-
-/**
  * Start all threads, wait on threads, and handle requests.
  *
  * @param entry_name