diff --git a/src/cuda_engine.rs b/src/cuda_engine.rs
index 5f6666b..cf078d1 100644
--- a/src/cuda_engine.rs
+++ b/src/cuda_engine.rs
@@ -103,6 +103,7 @@ impl EngineImpl for CudaEngine {
         block_size: u32,
         grid_size: u32,
     ) -> Result<(Option<u64>, u32, u64), Error> {
+        // println!("CUDA: start mining");
         info!(target: LOG_TARGET, "CUDA: start mining");
         let output = vec![0u64; 5];
         let mut output_buf = output.as_slice().as_dbuf()?;
diff --git a/src/main.rs b/src/main.rs
index 689f3bf..db61134 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,9 @@ use std::{
     convert::TryInto,
     env::current_dir,
     fs,
+    fs::File,
+    io::Write,
+    panic,
     path::PathBuf,
     process,
     str::FromStr,
@@ -84,6 +87,28 @@ const LOG_TARGET: &str = "tari::gpuminer";
 
 #[tokio::main]
 async fn main() {
+    // Set a custom panic hook
+    panic::set_hook(Box::new(|panic_info| {
+        let location = panic_info
+            .location()
+            .map(|loc| format!("file: '{}', line: {}", loc.file(), loc.line()))
+            .unwrap_or_else(|| "unknown location".to_string());
+
+        let message = if let Some(s) = panic_info.payload().downcast_ref::<&str>() {
+            s.to_string()
+        } else if let Some(s) = panic_info.payload().downcast_ref::<String>() {
+            s.clone()
+        } else {
+            "Unknown panic message".to_string()
+        };
+
+        error!(target: "tari::p2pool::main", "Panic occurred at {}: {}", location, message);
+
+        // Optionally, write a custom message directly to the file
+        let mut file = File::create("panic.log").unwrap();
+        file.write_all(format!("Panic at {}: {}", location, message).as_bytes())
+            .unwrap();
+    }));
     match main_inner().await {
         Ok(()) => {
             info!(target: LOG_TARGET, "Gpu miner startup process completed successfully");
@@ -106,6 +131,9 @@ struct Cli {
     #[arg(short, long)]
     benchmark: bool,
 
+    #[arg(short, long)]
+    find_optimal: bool,
+
     /// (Optional) Tari wallet address to send rewards to
     #[arg(short = 'a', long)]
     tari_address: Option<String>,
@@ -305,6 +333,118 @@ async fn main_inner() -> Result<(), anyhow::Error> {
         if let Some(template_timeout) = cli.template_timeout_secs {
             config.template_timeout_secs = template_timeout;
         }
+        // create a list of devices (by index) to use
+        let devices_to_use: Vec<u32> = (0..num_devices)
+            .filter(|x| {
+                if let Some(use_devices) = &cli.use_devices {
+                    use_devices.contains(x)
+                } else {
+                    true
+                }
+            })
+            .filter(|x| {
+                if let Some(excluded_devices) = &cli.exclude_devices {
+                    !excluded_devices.contains(x)
+                } else {
+                    true
+                }
+            })
+            .collect();
+
+        info!(target: LOG_TARGET, "Device indexes to use: {:?} from the total number of devices: {:?}", devices_to_use, num_devices);
+
+        println!(
+            "Device indexes to use: {:?} from the total number of devices: {:?}",
+            devices_to_use, num_devices
+        );
+
+        if cli.find_optimal {
+            let mut best_hashrate = 0;
+            let mut best_grid_size = 1;
+            let mut current_grid_size = 32;
+            let mut is_doubling_stage = true;
+            let mut last_grid_size_increase = 0;
+            let mut prev_hashrate = 0;
+
+            while true {
+                dbg!("here");
+                let mut config = config.clone();
+                config.single_grid_size = current_grid_size;
+                // config.block_size = ;
+                let mut threads = vec![];
+                let (tx, rx) = tokio::sync::broadcast::channel(100);
+                for i in 0..num_devices {
+                    if !devices_to_use.contains(&i) {
+                        continue;
+                    }
+                    let c = config.clone();
+                    let gpu = gpu_engine.clone();
+                    let x = tx.clone();
+                    threads.push(thread::spawn(move || {
+                        run_thread(gpu, num_devices as u64, i as u32, c, true, x)
+                    }));
+                }
+                let thread_len = threads.len();
+                let mut thread_hashrate = Vec::with_capacity(thread_len);
+                for t in threads {
+                    match t.join() {
+                        Ok(res) => match res {
+                            Ok(hashrate) => {
+                                info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en));
+                                thread_hashrate.push(hashrate);
+                            },
+                            Err(err) => {
+                                eprintln!("Thread join succeeded but result failed: {:?}", err);
+                                error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err);
+                            },
+                        },
+                        Err(err) => {
+                            eprintln!("Thread join failed: {:?}", err);
+                            error!(target: LOG_TARGET, "Thread join failed: {:?}", err);
+                        },
+                    }
+                }
+                let total_hashrate: u64 = thread_hashrate.iter().sum();
+                if total_hashrate > best_hashrate {
+                    best_hashrate = total_hashrate;
+                    best_grid_size = current_grid_size;
+                    // best_grid_size = config.single_grid_size;
+                    // best_block_size = config.block_size;
+                    println!(
+                        "Best hashrate: {} grid_size: {}, current_grid: {} block_size: {} Prev Hash {}",
+                        best_hashrate, best_grid_size, current_grid_size, config.block_size, prev_hashrate
+                    );
+                }
+                // if total_hashrate < prev_hashrate {
+                //     println!("total decreased, breaking");
+                //     break;
+                // }
+                if is_doubling_stage {
+                    if total_hashrate > prev_hashrate {
+                        last_grid_size_increase = current_grid_size;
+                        current_grid_size = current_grid_size * 2;
+                    } else {
+                        is_doubling_stage = false;
+                        last_grid_size_increase = last_grid_size_increase / 2;
+                        current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase);
+                    }
+                } else {
+                    // Bisecting stage
+                    if last_grid_size_increase < 2 {
+                        break;
+                    }
+                    if total_hashrate > prev_hashrate {
+                        last_grid_size_increase = last_grid_size_increase / 2;
+                        current_grid_size += last_grid_size_increase;
+                    } else {
+                        last_grid_size_increase = last_grid_size_increase / 2;
+                        current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase);
+                    }
+                }
+                prev_hashrate = total_hashrate;
+            }
+            return Ok(());
+        }
 
         let (stats_tx, stats_rx) = tokio::sync::broadcast::channel(100);
         if config.http_server_enabled {
@@ -328,29 +468,12 @@ async fn main_inner() -> Result<(), anyhow::Error> {
                 }
             });
         }
-        // create a list of devices (by index) to use
-        let devices_to_use: Vec<u32> = (0..num_devices)
-            .filter(|x| {
-                if let Some(use_devices) = &cli.use_devices {
-                    use_devices.contains(x)
-                } else {
-                    true
-                }
-            })
-            .filter(|x| {
-                if let Some(excluded_devices) = &cli.exclude_devices {
-                    !excluded_devices.contains(x)
-                } else {
-                    true
-                }
-            })
-            .collect();
-
-        info!(target: LOG_TARGET, "Device indexes to use: {:?} from the total number of devices: {:?}", devices_to_use, num_devices);
 
         let mut threads = vec![];
         for i in 0..num_devices {
+            println!("Device index: {}", i);
             if devices_to_use.contains(&i) {
+                println!("Starting thread for device index: {}", i);
                 let c = config.clone();
                 let gpu = gpu_engine.clone();
                 let curr_stats_tx = stats_tx.clone();
@@ -542,6 +665,7 @@ fn run_thread<T: EngineImpl>(
                 },
                 Err(e) => {
                     error!(target: LOG_TARGET, "Mining failed: {}", e);
+                    eprintln!("Mining failed: {}", e);
                     return Err(e.into());
                 },
             };