diff --git a/packages/napthaai/customs/prediction_request_rag/component.yaml b/packages/napthaai/customs/prediction_request_rag/component.yaml
index 0eeb14fe..5f668bb3 100644
--- a/packages/napthaai/customs/prediction_request_rag/component.yaml
+++ b/packages/napthaai/customs/prediction_request_rag/component.yaml
@@ -7,10 +7,12 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeibt7f7crtwvmkg7spy3jhscmlqltvyblzp32g6gj44v7tlo5lycuq
-  prediction_request_rag.py: bafybeie4rafsx7m7mfkdgd2n2ovfm3gwk5ybjr5cz6s7r3oz545ag2wwuy
+  prediction_request_rag.py: bafybeibtvuddvbhjlyd4sbk7rwz4mcsr4hiigfgrpdhzwa6vn6bhb6fboy
 fingerprint_ignore_patterns: []
 entry_point: prediction_request_rag.py
 callable: run
+params:
+  default_model: claude-3-sonnet-20240229
 dependencies:
   google-api-python-client:
     version: ==2.95.0
diff --git a/packages/napthaai/customs/prediction_request_rag/prediction_request_rag.py b/packages/napthaai/customs/prediction_request_rag/prediction_request_rag.py
index 3c384a87..3abe8283 100644
--- a/packages/napthaai/customs/prediction_request_rag/prediction_request_rag.py
+++ b/packages/napthaai/customs/prediction_request_rag/prediction_request_rag.py
@@ -222,8 +222,6 @@ def embeddings(self, model, input):
     "prediction-request-rag",
 ]
 ALLOWED_MODELS = list(LLM_SETTINGS.keys())
-DEFAULT_MODEL = "claude-3-haiku-20240307"
-TOOL_TO_ENGINE = {tool: DEFAULT_MODEL for tool in ALLOWED_TOOLS}
 DEFAULT_NUM_URLS = defaultdict(lambda: 3)
 DEFAULT_NUM_QUERIES = defaultdict(lambda: 3)
 NUM_URLS_PER_QUERY = 5
@@ -299,11 +297,11 @@ def count_tokens(text: str, model: str) -> int:
 
 def multi_queries(
     prompt: str,
-    engine: str,
+    model: str,
     num_queries: int,
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["gpt-4-0125-preview"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["gpt-4-0125-preview"]["default_max_tokens"],
 ) -> List[str]:
     """Generate multiple queries for fetching information from the web."""
     url_query_prompt = URL_QUERY_PROMPT.format(
@@ -316,7 +314,7 @@ def multi_queries(
     ]
 
     response = client.completions(
-        model=engine,
+        model=model,
         messages=messages,
         temperature=temperature,
         max_tokens=max_tokens,
@@ -325,7 +323,7 @@ def multi_queries(
         counter_callback(
             input_tokens=response.usage.prompt_tokens,
             output_tokens=response.usage.completion_tokens,
-            model=engine,
+            model=model,
             token_counter=count_tokens,
         )
     queries = parser_query_response(response.content, num_queries=num_queries)
@@ -539,15 +537,15 @@ def recursive_character_text_splitter(text, max_tokens, overlap):
 
 def fetch_additional_information(
     prompt: str,
-    engine: str,
+    model: str,
     google_api_key: Optional[str],
     google_engine_id: Optional[str],
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
     source_links: Optional[List[str]] = None,
     num_urls: Optional[int] = DEFAULT_NUM_URLS,
     num_queries: Optional[int] = DEFAULT_NUM_QUERIES,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["gpt-4-0125-preview"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["gpt-4-0125-preview"]["default_max_tokens"],
 ) -> Tuple[str, Callable[[int, int, str], None]]:
     """Fetch additional information to help answer the user prompt."""
 
@@ -556,7 +554,7 @@ def fetch_additional_information(
     try:
         queries, counter_callback = multi_queries(
             prompt=prompt,
-            engine=engine,
+            model=model,
             num_queries=num_queries,
             counter_callback=counter_callback,
             temperature=temperature,
@@ -674,14 +672,13 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
         kwargs["api_keys"], kwargs["llm_provider"], embedding_provider="openai"
     ):
         tool = kwargs["tool"]
-        model = kwargs.get("model", TOOL_TO_ENGINE[tool])
         prompt = extract_question(kwargs["prompt"])
-        engine = kwargs.get("model", TOOL_TO_ENGINE[tool])
-        print(f"ENGINE: {engine}")
+        model = kwargs.get("model")
+        print(f"MODEL: {model}")
         max_tokens = kwargs.get(
-            "max_tokens", LLM_SETTINGS[engine]["default_max_tokens"]
+            "max_tokens", LLM_SETTINGS[model]["default_max_tokens"]
         )
-        temperature = kwargs.get("temperature", LLM_SETTINGS[engine]["temperature"])
+        temperature = kwargs.get("temperature", LLM_SETTINGS[model]["temperature"])
         num_urls = kwargs.get("num_urls", DEFAULT_NUM_URLS[tool])
         num_queries = kwargs.get("num_queries", DEFAULT_NUM_QUERIES[tool])
         counter_callback = kwargs.get("counter_callback", None)
@@ -699,7 +696,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
 
         additional_information, counter_callback = fetch_additional_information(
             prompt=prompt,
-            engine=engine,
+            model=model,
             google_api_key=google_api_key,
             google_engine_id=google_engine_id,
             counter_callback=counter_callback,
@@ -723,7 +720,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
         ]
 
         response = client.completions(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -733,7 +730,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
             counter_callback(
                 input_tokens=response.usage.prompt_tokens,
                 output_tokens=response.usage.completion_tokens,
-                model=engine,
+                model=model,
                 token_counter=count_tokens,
             )
 
diff --git a/packages/napthaai/customs/prediction_request_rag_cohere/component.yaml b/packages/napthaai/customs/prediction_request_rag_cohere/component.yaml
index cd637d4f..4de55b62 100644
--- a/packages/napthaai/customs/prediction_request_rag_cohere/component.yaml
+++ b/packages/napthaai/customs/prediction_request_rag_cohere/component.yaml
@@ -8,10 +8,12 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeiekjzoy2haayvkiwhb2u2epflpqxticud34mma3gdhfzgu36lxwiq
-  prediction_request_rag_cohere.py: bafybeidt7vvlrapi2y4b4ytqqi2s2ro5oi5xfc62a2rftkwbat2fmw5hme
+  prediction_request_rag_cohere.py: bafybeig4oq3tdjuz2la2pz232u5m7347q7gplu5pw4vebbxteuiqw6hh3u
 fingerprint_ignore_patterns: []
 entry_point: prediction_request_rag_cohere.py
 callable: run
+params:
+  default_model: cohere/command-r-plus
 dependencies:
   google-api-python-client:
     version: ==2.95.0
diff --git a/packages/napthaai/customs/prediction_request_rag_cohere/prediction_request_rag_cohere.py b/packages/napthaai/customs/prediction_request_rag_cohere/prediction_request_rag_cohere.py
index 99641f12..71ebc974 100644
--- a/packages/napthaai/customs/prediction_request_rag_cohere/prediction_request_rag_cohere.py
+++ b/packages/napthaai/customs/prediction_request_rag_cohere/prediction_request_rag_cohere.py
@@ -192,8 +192,6 @@ def embeddings(self, model, input):
     "prediction-request-rag-cohere",
 ]
 ALLOWED_MODELS = list(LLM_SETTINGS.keys())
-DEFAULT_MODEL = "cohere/command-r-plus"
-TOOL_TO_ENGINE = {tool: DEFAULT_MODEL for tool in ALLOWED_TOOLS}
 DEFAULT_NUM_URLS = defaultdict(lambda: 3)
 DEFAULT_NUM_QUERIES = defaultdict(lambda: 3)
 NUM_URLS_PER_QUERY = 5
@@ -274,11 +272,11 @@ def count_tokens(text: str, model: str) -> int:
 
 def multi_queries(
     prompt: str,
-    engine: str,
+    model: str,
     num_queries: int,
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["cohere/command-r-plus"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["cohere/command-r-plus"]["default_max_tokens"],
 ) -> List[str]:
     """Generate multiple queries for fetching information from the web."""
     url_query_prompt = URL_QUERY_PROMPT.format(
@@ -291,7 +289,7 @@ def multi_queries(
     ]
 
     response = client.completions(
-        model=engine,
+        model=model,
         messages=messages,
         temperature=temperature,
         max_tokens=max_tokens,
@@ -300,7 +298,7 @@ def multi_queries(
         counter_callback(
             input_tokens=response.usage.prompt_tokens,
             output_tokens=response.usage.completion_tokens,
-            model=engine,
+            model=model,
             token_counter=count_tokens,
         )
     queries = parser_query_response(response.content, num_queries=num_queries)
@@ -514,15 +512,15 @@ def recursive_character_text_splitter(text, max_tokens, overlap):
 
 def fetch_additional_information(
     prompt: str,
-    engine: str,
+    model: str,
     google_api_key: Optional[str],
     google_engine_id: Optional[str],
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
     source_links: Optional[List[str]] = None,
     num_urls: Optional[int] = DEFAULT_NUM_URLS,
     num_queries: Optional[int] = DEFAULT_NUM_QUERIES,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["cohere/command-r-plus"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["cohere/command-r-plus"]["default_max_tokens"],
 ) -> Tuple[str, Callable[[int, int, str], None]]:
     """Fetch additional information to help answer the user prompt."""
 
@@ -531,7 +529,7 @@ def fetch_additional_information(
     try:
         queries, counter_callback = multi_queries(
             prompt=prompt,
-            engine=engine,
+            model=model,
             num_queries=num_queries,
             counter_callback=counter_callback,
             temperature=temperature,
@@ -648,14 +646,13 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
         kwargs["api_keys"], kwargs["llm_provider"], embedding_provider="openai"
     ):
         tool = kwargs["tool"]
-        model = kwargs.get("model", TOOL_TO_ENGINE[tool])
+        model = kwargs.get("model")
         prompt = extract_question(kwargs["prompt"])
-        engine = kwargs.get("model", TOOL_TO_ENGINE[tool])
-        print(f"ENGINE: {engine}")
+        print(f"MODEL: {model}")
         max_tokens = kwargs.get(
-            "max_tokens", LLM_SETTINGS[engine]["default_max_tokens"]
+            "max_tokens", LLM_SETTINGS[model]["default_max_tokens"]
         )
-        temperature = kwargs.get("temperature", LLM_SETTINGS[engine]["temperature"])
+        temperature = kwargs.get("temperature", LLM_SETTINGS[model]["temperature"])
         num_urls = kwargs.get("num_urls", DEFAULT_NUM_URLS[tool])
         num_queries = kwargs.get("num_queries", DEFAULT_NUM_QUERIES[tool])
         counter_callback = kwargs.get("counter_callback", None)
@@ -673,7 +670,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
 
         additional_information, counter_callback = fetch_additional_information(
             prompt=prompt,
-            engine=engine,
+            model=model,
             google_api_key=google_api_key,
             google_engine_id=google_engine_id,
             counter_callback=counter_callback,
@@ -697,7 +694,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
         ]
 
         response = client.completions(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -707,7 +704,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
             counter_callback(
                 input_tokens=response.usage.prompt_tokens,
                 output_tokens=response.usage.completion_tokens,
-                model=engine,
+                model=model,
                 token_counter=count_tokens,
             )
 
diff --git a/packages/napthaai/customs/prediction_request_reasoning/component.yaml b/packages/napthaai/customs/prediction_request_reasoning/component.yaml
index 9402cf8b..6ca0a6a6 100644
--- a/packages/napthaai/customs/prediction_request_reasoning/component.yaml
+++ b/packages/napthaai/customs/prediction_request_reasoning/component.yaml
@@ -7,10 +7,12 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeib36ew6vbztldut5xayk5553rylrq7yv4cpqyhwc5ktvd4cx67vwu
-  prediction_request_reasoning.py: bafybeibyncgeeyrlcqhdbsdzzama7aah44jzedqo3zcplvuc45goykwjli
+  prediction_request_reasoning.py: bafybeidiabgnlc453spgrdn7rhhl2xc3aa6zqeukkw2bthndbugtjf6bya
 fingerprint_ignore_patterns: []
 entry_point: prediction_request_reasoning.py
 callable: run
+params:
+  default_model: claude-3-sonnet-20240229
 dependencies:
   google-api-python-client:
     version: ==2.95.0
diff --git a/packages/napthaai/customs/prediction_request_reasoning/prediction_request_reasoning.py b/packages/napthaai/customs/prediction_request_reasoning/prediction_request_reasoning.py
index 056aab3b..4ec69ef4 100644
--- a/packages/napthaai/customs/prediction_request_reasoning/prediction_request_reasoning.py
+++ b/packages/napthaai/customs/prediction_request_reasoning/prediction_request_reasoning.py
@@ -225,8 +225,6 @@ def embeddings(self, model, input):
     "prediction-request-reasoning",
 ]
 ALLOWED_MODELS = list(LLM_SETTINGS.keys())
-DEFAULT_MODEL = "claude-3-haiku-20240307"
-TOOL_TO_ENGINE = {tool: DEFAULT_MODEL for tool in ALLOWED_TOOLS}
 DEFAULT_NUM_URLS = defaultdict(lambda: 3)
 DEFAULT_NUM_QUERIES = defaultdict(lambda: 3)
 SPLITTER_CHUNK_SIZE = 300
@@ -392,11 +390,11 @@ def parser_prediction_response(response: str) -> str:
 
 def multi_queries(
     prompt: str,
-    engine: str,
+    model: str,
     num_queries: int,
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["gpt-4-0125-preview"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["gpt-4-0125-preview"]["default_max_tokens"],
 ) -> List[str]:
     """Generate multiple queries for fetching information from the web."""
     url_query_prompt = URL_QUERY_PROMPT.format(
@@ -409,7 +407,7 @@ def multi_queries(
     ]
 
     response = client.completions(
-        model=engine,
+        model=model,
         messages=messages,
         temperature=temperature,
         max_tokens=max_tokens,
@@ -418,7 +416,7 @@ def multi_queries(
         counter_callback(
             input_tokens=response.usage.prompt_tokens,
             output_tokens=response.usage.completion_tokens,
-            model=engine,
+            model=model,
             token_counter=count_tokens,
         )
     queries = parser_query_response(response.content, num_queries=num_queries)
@@ -608,9 +606,9 @@ def find_similar_chunks(
 
 def multi_questions_response(
     prompt: str,
-    engine: str,
-    temperature: float = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: int = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    model: str,
+    temperature: float = LLM_SETTINGS["gpt-4-0125-preview"]["temperature"],
+    max_tokens: int = LLM_SETTINGS["gpt-4-0125-preview"]["default_max_tokens"],
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
 ) -> List[str]:
     """Generate multiple questions for fetching information from the web."""
@@ -622,7 +620,7 @@ def multi_questions_response(
         ]
 
         response = client.completions(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -632,7 +630,7 @@ def multi_questions_response(
             counter_callback(
                 input_tokens=response.usage.prompt_tokens,
                 output_tokens=response.usage.completion_tokens,
-                model=engine,
+                model=model,
                 token_counter=count_tokens,
             )
 
@@ -711,15 +709,15 @@ def count_tokens(text: str, model: str) -> int:
 
 def fetch_additional_information(
     prompt: str,
-    engine: str,
+    model: str,
     google_api_key: Optional[str],
     google_engine_id: Optional[str],
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
     source_links: Optional[List[str]] = None,
     num_urls: Optional[int] = DEFAULT_NUM_URLS,
     num_queries: Optional[int] = DEFAULT_NUM_QUERIES,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["gpt-4-0125-preview"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["gpt-4-0125-preview"]["default_max_tokens"],
 ) -> Tuple[str, List[str], Optional[Callable[[int, int, str], None]]]:
     """Fetch additional information from the web."""
 
@@ -727,7 +725,7 @@ def fetch_additional_information(
     try:
         queries, counter_callback = multi_queries(
             prompt=prompt,
-            engine=engine,
+            model=model,
             num_queries=num_queries,
             counter_callback=counter_callback,
             temperature=temperature,
@@ -790,7 +788,7 @@ def fetch_additional_information(
     # multi questions prompt
     questions, counter_callback = multi_questions_response(
         prompt=prompt,
-        engine=engine,
+        model=model,
         counter_callback=counter_callback,
         temperature=temperature,
         max_tokens=max_tokens,
@@ -840,14 +838,13 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
         kwargs["api_keys"], kwargs["llm_provider"], embedding_provider="openai"
     ):
         tool = kwargs["tool"]
-        model = kwargs.get("model", TOOL_TO_ENGINE[tool])
+        model = kwargs.get("model")
         prompt = extract_question(kwargs["prompt"])
-        engine = kwargs.get("model", TOOL_TO_ENGINE[tool])
-        print(f"ENGINE: {engine}")
+        print(f"MODEL: {model}")
         max_tokens = kwargs.get(
-            "max_tokens", LLM_SETTINGS[engine]["default_max_tokens"]
+            "max_tokens", LLM_SETTINGS[model]["default_max_tokens"]
         )
-        temperature = kwargs.get("temperature", LLM_SETTINGS[engine]["temperature"])
+        temperature = kwargs.get("temperature", LLM_SETTINGS[model]["temperature"])
         num_urls = kwargs.get("num_urls", DEFAULT_NUM_URLS[tool])
         num_queries = kwargs.get("num_queries", DEFAULT_NUM_QUERIES[tool])
         counter_callback = kwargs.get("counter_callback", None)
@@ -869,7 +866,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
             counter_callback,
         ) = fetch_additional_information(
             prompt=prompt,
-            engine=engine,
+            model=model,
             google_api_key=google_api_key,
             google_engine_id=google_engine_id,
             counter_callback=counter_callback,
@@ -891,7 +888,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
             {"role": "user", "content": reasoning_prompt},
         ]
         reasoning, counter_callback = do_reasoning_with_retry(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -912,7 +909,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
         ]
 
         response_prediction = client.completions(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -923,7 +920,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
             counter_callback(
                 input_tokens=response_prediction.usage.prompt_tokens,
                 output_tokens=response_prediction.usage.completion_tokens,
-                model=engine,
+                model=model,
                 token_counter=count_tokens,
             )
 
diff --git a/packages/napthaai/customs/prediction_url_cot/component.yaml b/packages/napthaai/customs/prediction_url_cot/component.yaml
index 20faa6e6..7791b826 100644
--- a/packages/napthaai/customs/prediction_url_cot/component.yaml
+++ b/packages/napthaai/customs/prediction_url_cot/component.yaml
@@ -7,10 +7,12 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeiflni5dkn5fqe7fnu4lgbqxzfrgochhqfbgzwz3vlf5grijp3nkpm
-  prediction_url_cot.py: bafybeihyp5zsjsungb6kzqg3wo6fz3mx6h7u5u2xwbk73w7ly2frh7soga
+  prediction_url_cot.py: bafybeifrkisrrphzyhqnvjqtwxynue7xlsmlqhggm5vcuceod2sl4td7ei
 fingerprint_ignore_patterns: []
 entry_point: prediction_url_cot.py
 callable: run
+params:
+  default_model: claude-3-sonnet-20240229
 dependencies:
   google-api-python-client:
     version: ==2.95.0
diff --git a/packages/napthaai/customs/prediction_url_cot/prediction_url_cot.py b/packages/napthaai/customs/prediction_url_cot/prediction_url_cot.py
index dc334ba3..74333f1f 100644
--- a/packages/napthaai/customs/prediction_url_cot/prediction_url_cot.py
+++ b/packages/napthaai/customs/prediction_url_cot/prediction_url_cot.py
@@ -149,7 +149,7 @@ def completions(
     def embeddings(self, model, input):
         if self.llm_provider == "openai" or self.llm_provider == "openrouter":
             response = self.client.embeddings.create(
-                model=EMBEDDING_MODEL,
+                model=model,
                 input=input,
             )
             return response
@@ -182,8 +182,6 @@ def embeddings(self, model, input):
     "prediction-url-cot",
 ]
 ALLOWED_MODELS = list(LLM_SETTINGS.keys())
-DEFAULT_MODEL = "claude-3-haiku-20240307"
-TOOL_TO_ENGINE = {tool: DEFAULT_MODEL for tool in ALLOWED_TOOLS}
 NUM_QUERIES = 5
 NUM_URLS_PER_QUERY = 3
 HTTP_TIMEOUT = 20
@@ -253,11 +251,11 @@ def count_tokens(text: str, model: str) -> int:
 
 def multi_queries(
     prompt: str,
-    engine: str,
+    model: str,
     num_queries: int,
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["claude-3-sonnet-20240229"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["claude-3-sonnet-20240229"]["default_max_tokens"],
 ) -> List[str]:
     """Generate multiple queries for fetching information from the web."""
     url_query_prompt = URL_QUERY_PROMPT.format(
@@ -270,7 +268,7 @@ def multi_queries(
     ]
 
     response = client.completions(
-        model=engine,
+        model=model,
         messages=messages,
         temperature=temperature,
         max_tokens=max_tokens,
@@ -279,7 +277,7 @@ def multi_queries(
         counter_callback(
             input_tokens=response.usage.prompt_tokens,
             output_tokens=response.usage.completion_tokens,
-            model=engine,
+            model=model,
             token_counter=count_tokens,
         )
     queries = parser_query_response(response.content, num_queries=num_queries)
@@ -493,15 +491,15 @@ def select_docs(
 
 def fetch_additional_information(
     prompt: str,
-    engine: str,
+    model: str,
     google_api_key: Optional[str],
     google_engine_id: Optional[str],
     counter_callback: Optional[Callable[[int, int, str], None]] = None,
     source_links: Optional[List[str]] = None,
     num_urls: Optional[int] = NUM_URLS_PER_QUERY,
     num_queries: Optional[int] = NUM_QUERIES,
-    temperature: Optional[float] = LLM_SETTINGS[DEFAULT_MODEL]["temperature"],
-    max_tokens: Optional[int] = LLM_SETTINGS[DEFAULT_MODEL]["default_max_tokens"],
+    temperature: Optional[float] = LLM_SETTINGS["claude-3-sonnet-20240229"]["temperature"],
+    max_tokens: Optional[int] = LLM_SETTINGS["claude-3-sonnet-20240229"]["default_max_tokens"],
     n_docs: int = N_DOCS,
 ) -> Tuple[str, Callable[[int, int, str], None]]:
     """Fetch additional information from the web."""
@@ -510,7 +508,7 @@ def fetch_additional_information(
     try:
         queries, counter_callback = multi_queries(
             prompt=prompt,
-            engine=engine,
+            model=model,
             num_queries=num_queries,
             counter_callback=counter_callback,
             temperature=temperature,
@@ -587,14 +585,13 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
     """Run the task"""
     with LLMClientManager(kwargs["api_keys"], kwargs["llm_provider"]):
         tool = kwargs["tool"]
-        model = kwargs.get("model", TOOL_TO_ENGINE[tool])
+        model = kwargs.get("model")
         prompt = extract_question(kwargs["prompt"])
-        engine = kwargs.get("model", TOOL_TO_ENGINE[tool])
-        print(f"ENGINE: {engine}")
+        print(f"MODEL: {model}")
         max_tokens = kwargs.get(
-            "max_tokens", LLM_SETTINGS[engine]["default_max_tokens"]
+            "max_tokens", LLM_SETTINGS[model]["default_max_tokens"]
         )
-        temperature = kwargs.get("temperature", LLM_SETTINGS[engine]["temperature"])
+        temperature = kwargs.get("temperature", LLM_SETTINGS[model]["temperature"])
         num_urls = kwargs.get("num_urls", NUM_URLS_PER_QUERY)
         num_queries = kwargs.get("num_queries", NUM_QUERIES)
         n_docs = kwargs.get("n_docs", N_DOCS)
@@ -613,7 +610,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
 
         additional_information, counter_callback = fetch_additional_information(
             prompt=prompt,
-            engine=engine,
+            model=model,
             google_api_key=google_api_key,
             google_engine_id=google_engine_id,
             counter_callback=counter_callback,
@@ -638,7 +635,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
         ]
 
         response = client.completions(
-            model=engine,
+            model=model,
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -648,7 +645,7 @@ def run(**kwargs) -> Tuple[Optional[str], Any, Optional[Dict[str, Any]], Any]:
             counter_callback(
                 input_tokens=response.usage.prompt_tokens,
                 output_tokens=response.usage.completion_tokens,
-                model=engine,
+                model=model,
                 token_counter=count_tokens,
             )
 
diff --git a/packages/nickcom007/customs/prediction_request_sme/component.yaml b/packages/nickcom007/customs/prediction_request_sme/component.yaml
index d8396c57..5b3940a8 100644
--- a/packages/nickcom007/customs/prediction_request_sme/component.yaml
+++ b/packages/nickcom007/customs/prediction_request_sme/component.yaml
@@ -12,6 +12,8 @@ fingerprint:
 fingerprint_ignore_patterns: []
 entry_point: prediction_request_sme.py
 callable: run
+params:
+  default_model: gpt-4-0125-preview
 dependencies:
   requests: {}
   google-api-python-client:
diff --git a/packages/packages.json b/packages/packages.json
index 636facc2..09274e0f 100644
--- a/packages/packages.json
+++ b/packages/packages.json
@@ -5,23 +5,23 @@
         "custom/valory/openai_request/0.1.0": "bafybeihjtddwwkvwzaltk6yhtkk3xxnwnkurdtyuy6ki5tpf7h5htvuxnq",
         "custom/valory/prediction_request_embedding/0.1.0": "bafybeifnz5fzxvzyj3mmjpfsre3nzbdieuyjvnxqxuplopp5taz4qw7ys4",
         "custom/valory/resolve_market/0.1.0": "bafybeiaag2e7rsdr3bwg6mlmfyom4vctsdapohco7z45pxhzjymepz3rya",
-        "custom/valory/prediction_request/0.1.0": "bafybeibepdykzzljz3shboivclxz3fp3u5gxwe3f53r5b5bhznm2o4welq",
+        "custom/valory/prediction_request/0.1.0": "bafybeieq5lu3dtgz7svxr6eelbopyvravg3iiomvvtdv33ej5w7hgbjhja",
         "custom/valory/stability_ai_request/0.1.0": "bafybeicyyteycvzj4lk33p4t7mspfarc5d5ktbysu7oqkv6woo4aouxira",
         "custom/polywrap/prediction_with_research_report/0.1.0": "bafybeiewbcbfyjnyqyp4oou6ianxseakblwjyck22bd2doqojjk37uyxwy",
         "custom/jhehemann/prediction_sum_url_content/0.1.0": "bafybeiby55g53cvc4vpbgww5awrlf6x67h7q7pg5xlhwber75ejdkh4twa",
         "custom/psouranis/optimization_by_prompting/0.1.0": "bafybeihb3pyk5qcbj5ib7377p65tznzdsnwilyyhlkcvaj2scmfcpsh6ru",
         "custom/nickcom007/sme_generation_request/0.1.0": "bafybeibqv4ru4lpufy2hvcb3swqhzuq2kejjxmlyepofx6l6mxce6lhiqq",
-        "custom/nickcom007/prediction_request_sme/0.1.0": "bafybeicd457gt64fkhwowe4d26oaprcsqu2e67biz7miqthaktqcw52nfi",
+        "custom/nickcom007/prediction_request_sme/0.1.0": "bafybeigsszaat6k5m5a3ljyem7xdhjflpcm24imtcscgst3tghpwhamglu",
         "custom/napthaai/resolve_market_reasoning/0.1.0": "bafybeiewdqtfkee3od5kuktrhyzexy7466ea3w3to7vv6qnli6qutfrqaa",
-        "custom/napthaai/prediction_request_rag/0.1.0": "bafybeift5lujqkbdhm5gcq7m2zvxytlkjmayuxikgbx4bbthulxdvnxifq",
-        "custom/napthaai/prediction_request_reasoning/0.1.0": "bafybeihu76cyiv57t6ba3xbfutcypx7rbconedqagdjfkg2ziekmz6rkxy",
+        "custom/napthaai/prediction_request_rag/0.1.0": "bafybeigb7hfqcuvkvsc54526hxwhl6utfj44dnbwiyabcdbghlr5ctkwuu",
+        "custom/napthaai/prediction_request_reasoning/0.1.0": "bafybeiati546f5fyhtwv6yo7zaq3xwtb635p3jp3h3f546stknpbkkyhou",
         "custom/valory/prepare_tx/0.1.0": "bafybeibjqckeb73df724lr4xkrmeh3woqwas4mswa7au65xnwag2edad2e",
         "custom/valory/short_maker/0.1.0": "bafybeif63rt4lkopu3rc3l7sg6tebrrwg2lxqufjx6dx4hoda5yzax43fa",
-        "custom/napthaai/prediction_url_cot/0.1.0": "bafybeib7gwgy2rtrmmp6sz6rvbt2qnyp2omeqdmik65wznzags2s2ecbiu",
+        "custom/napthaai/prediction_url_cot/0.1.0": "bafybeidk6s4nqtow6dxmslhjtxzbbnhzpeogyy33e2zpjmqdjijtqb6rz4",
         "custom/napthaai/prediction_url_cot_claude/0.1.0": "bafybeicbjywni5hx5ssoiv6tnnjbqzsck6cmtsdpr6m562z6afogz5eh44",
         "custom/napthaai/prediction_request_reasoning_claude/0.1.0": "bafybeihtx2cejxoy42jwk2i5m4evfzz537aic5njuawxnzdzwlo63kdduq",
         "custom/napthaai/prediction_request_rag_claude/0.1.0": "bafybeickr32t7nmapuoymjyo3cf5rr2v2zapksxcivuqsgjr2gn6zo6y7y",
-        "custom/napthaai/prediction_request_rag_cohere/0.1.0": "bafybeidw3kaddisejlghxsgzzug56775vyy2s4qgglwcz5mz2gtwnsrwsu",
+        "custom/napthaai/prediction_request_rag_cohere/0.1.0": "bafybeig3xsmmb4bgbjong6uzvnurf4mwdisqwp3eidmeuo7hj42wkcbymm",
         "protocol/valory/acn_data_share/0.1.0": "bafybeih5ydonnvrwvy2ygfqgfabkr47s4yw3uqxztmwyfprulwfsoe7ipq",
         "protocol/valory/websocket_client/0.1.0": "bafybeifjk254sy65rna2k32kynzenutujwqndap2r222afvr3zezi27mx4",
         "contract/valory/agent_mech/0.1.0": "bafybeiah6b5epo2hlvzg5rr2cydgpp2waausoyrpnoarf7oa7bw33rex34",
@@ -31,11 +31,11 @@
         "skill/valory/contract_subscription/0.1.0": "bafybeiefuemlp75obgpxrp6iuleb3hn6vcviwh5oetk5djbuprf4xsmgjy",
         "skill/valory/mech_abci/0.1.0": "bafybeicyyz3nq6zs4ovns5jwlhr7nszusdivh32pxzxnpsa3i4mbxsm25i",
         "skill/valory/task_submission_abci/0.1.0": "bafybeiaa6fby75jusdz55ufy6x3td7bz2feh5bpwyipfy2nrnzpzrqrb3y",
-        "skill/valory/task_execution/0.1.0": "bafybeifi7ndwd64no5h6ceamxn46swfvb7zewzwwgpep25hujyoebe7jsa",
+        "skill/valory/task_execution/0.1.0": "bafybeiccegzjasdrwksd542jnj5e4k5nztta2yl3ssf5ijj6uqizt7d6m4",
         "skill/valory/websocket_client/0.1.0": "bafybeif7rrvsu6z4evqkhblxj3u6wwv2eqou576hgkyoehxuj7cntw7o2m",
         "skill/valory/subscription_abci/0.1.0": "bafybeig2vey5dxykpsojovlxw6f5o333f7kgmjyghmlu3yc5or5m4twqey",
-        "agent/valory/mech/0.1.0": "bafybeihtmqjxus44r6nen3dscfqc4zt7mkri7evklpz4aqroquf44aynv4",
-        "service/valory/mech/0.1.0": "bafybeifx5crgqc3rm3ur3e4kf3grphu5w2oyr2bxegtzp54qguqmw4smje"
+        "agent/valory/mech/0.1.0": "bafybeiahdheu25soa4bbu3qxqsbg3u5orpmqvqcrg4vsjk5wls4kpzatpa",
+        "service/valory/mech/0.1.0": "bafybeiegbmefda54qkfevjpjac2caqrg4y2sffxg4yhdmpbuh3b5my2tmq"
     },
     "third_party": {
         "protocol/valory/default/1.0.0": "bafybeifqcqy5hfbnd7fjv4mqdjrtujh2vx3p2xhe33y67zoxa6ph7wdpaq",
diff --git a/packages/valory/agents/mech/aea-config.yaml b/packages/valory/agents/mech/aea-config.yaml
index 42d04c60..85b1f00a 100644
--- a/packages/valory/agents/mech/aea-config.yaml
+++ b/packages/valory/agents/mech/aea-config.yaml
@@ -42,7 +42,7 @@ skills:
 - valory/registration_abci:0.1.0:bafybeiek7zcsxbucjwzgqfftafhfrocvc7q4yxllh2q44jeemsjxg3rcfm
 - valory/reset_pause_abci:0.1.0:bafybeidw4mbx3os3hmv7ley7b3g3gja7ydpitr7mxbjpwzxin2mzyt5yam
 - valory/subscription_abci:0.1.0:bafybeig2vey5dxykpsojovlxw6f5o333f7kgmjyghmlu3yc5or5m4twqey
-- valory/task_execution:0.1.0:bafybeifi7ndwd64no5h6ceamxn46swfvb7zewzwwgpep25hujyoebe7jsa
+- valory/task_execution:0.1.0:bafybeiccegzjasdrwksd542jnj5e4k5nztta2yl3ssf5ijj6uqizt7d6m4
 - valory/task_submission_abci:0.1.0:bafybeiaa6fby75jusdz55ufy6x3td7bz2feh5bpwyipfy2nrnzpzrqrb3y
 - valory/termination_abci:0.1.0:bafybeihq6qtbwt6i53ayqym63vhjexkcppy26gguzhhjqywfmiuqghvv44
 - valory/transaction_settlement_abci:0.1.0:bafybeigtzlk4uakmd54rxnznorcrstsr52kta474lgrnvx5ovr546vj7sq
diff --git a/packages/valory/customs/prediction_request/component.yaml b/packages/valory/customs/prediction_request/component.yaml
index a420eb08..a5696269 100644
--- a/packages/valory/customs/prediction_request/component.yaml
+++ b/packages/valory/customs/prediction_request/component.yaml
@@ -7,10 +7,12 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeibbn67pnrrm4qm3n3kbelvbs3v7fjlrjniywmw2vbizarippidtvi
-  prediction_request.py: bafybeidticmtthrlms6tift6fd2xypflwqsyl4c7qjftouzgr7c3qjmtdy
+  prediction_request.py: bafybeif3s6wd3gotqpg6qdcs7zjszhffkyffeb554r2j5xvtmrbsxy7oca
 fingerprint_ignore_patterns: []
 entry_point: prediction_request.py
 callable: run
+params:
+  default_model: gpt-4-0125-preview
 dependencies:
   google-api-python-client:
     version: ==2.95.0
diff --git a/packages/valory/customs/prediction_request/prediction_request.py b/packages/valory/customs/prediction_request/prediction_request.py
index c2c3c283..a2919f2d 100644
--- a/packages/valory/customs/prediction_request/prediction_request.py
+++ b/packages/valory/customs/prediction_request/prediction_request.py
@@ -221,8 +221,6 @@ def count_tokens(text: str, model: str) -> int:
     # "prediction-online-summarized-info",
 ]
 ALLOWED_MODELS = list(LLM_SETTINGS.keys())
-DEFAULT_MODEL = "gpt-4-0125-preview"
-TOOL_TO_ENGINE = {tool: DEFAULT_MODEL for tool in ALLOWED_TOOLS}
 # the default number of URLs to fetch online information for
 DEFAULT_NUM_URLS = defaultdict(lambda: 3)
 DEFAULT_NUM_URLS["prediction-online-summarized-info"] = 7
@@ -658,7 +656,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
     with LLMClientManager(kwargs["api_keys"], kwargs["llm_provider"]):
         tool = kwargs["tool"]
         prompt = kwargs["prompt"]
-        engine = kwargs.get("model", TOOL_TO_ENGINE[tool])
+        engine = kwargs.get("model")
         print(f"ENGINE: {engine}")
         max_tokens = kwargs.get(
             "max_tokens", LLM_SETTINGS[engine]["default_max_tokens"]
diff --git a/packages/valory/services/mech/service.yaml b/packages/valory/services/mech/service.yaml
index f43e8bb9..eeba2974 100644
--- a/packages/valory/services/mech/service.yaml
+++ b/packages/valory/services/mech/service.yaml
@@ -7,7 +7,7 @@ license: Apache-2.0
 fingerprint:
   README.md: bafybeif7ia4jdlazy6745ke2k2x5yoqlwsgwr6sbztbgqtwvs3ndm2p7ba
 fingerprint_ignore_patterns: []
-agent: valory/mech:0.1.0:bafybeihtmqjxus44r6nen3dscfqc4zt7mkri7evklpz4aqroquf44aynv4
+agent: valory/mech:0.1.0:bafybeiahdheu25soa4bbu3qxqsbg3u5orpmqvqcrg4vsjk5wls4kpzatpa
 number_of_agents: 4
 deployment:
   agent:
diff --git a/packages/valory/skills/task_execution/behaviours.py b/packages/valory/skills/task_execution/behaviours.py
index 9e510739..92106676 100644
--- a/packages/valory/skills/task_execution/behaviours.py
+++ b/packages/valory/skills/task_execution/behaviours.py
@@ -79,7 +79,7 @@ def __init__(self, **kwargs: Any):
         self._executor = ProcessPoolExecutor(max_workers=1)
         self._executing_task: Optional[Dict[str, Any]] = None
         self._tools_to_file_hash: Dict[str, str] = {}
-        self._all_tools: Dict[str, Tuple[str, str]] = {}
+        self._all_tools: Dict[str, Tuple[str, str, Dict[str, Any]]] = {}
         self._inflight_tool_req: Optional[str] = None
         self._done_task: Optional[Dict[str, Any]] = None
         self._last_polling: Optional[float] = None
@@ -189,11 +189,11 @@ def _download_tools(self) -> None:
 
     def _handle_get_tool(self, message: IpfsMessage, dialogue: Dialogue) -> None:
         """Handle get tool response"""
-        _component_yaml, tool_py, callable_method = ComponentPackageLoader.load(
+        component_yaml, tool_py, callable_method = ComponentPackageLoader.load(
             message.files
         )
         tool_req = cast(str, self._inflight_tool_req)
-        self._all_tools[tool_req] = tool_py, callable_method
+        self._all_tools[tool_req] = tool_py, callable_method, component_yaml
         self._inflight_tool_req = None
 
     def _populate_from_block(self) -> None:
@@ -286,6 +286,7 @@ def _handle_done_task(self, task_result: Any) -> None:
         mech_address = executing_task.get("contract_address", None)
         tool = executing_task.get("tool", None)
         model = executing_task.get("model", None)
+        tool_params = executing_task.get("params", None)
         response = {"requestId": req_id, "result": "Invalid response"}
         task_executor = self.context.agent_address
         self._done_task = {
@@ -304,6 +305,7 @@ def _handle_done_task(self, task_result: Any) -> None:
             metadata = {
                 "model": model,
                 "tool": tool,
+                "params": tool_params,
             }
             response = {
                 **response,
@@ -387,7 +389,8 @@ def _submit_task(self, fn: Any, *args: Any, **kwargs: Any) -> Future:
     def _prepare_task(self, task_data: Dict[str, Any]) -> None:
         """Prepare the task."""
         tool_task = AnyToolAsTask()
-        tool_py, callable_method = self._all_tools[task_data["tool"]]
+        tool_py, callable_method, component_yaml = self._all_tools[task_data["tool"]]
+        tool_params = component_yaml.get("params", {})
         task_data["tool_py"] = tool_py
         task_data["callable_method"] = callable_method
         task_data["api_keys"] = self.params.api_keys
@@ -396,7 +399,10 @@ def _prepare_task(self, task_data: Dict[str, Any]) -> None:
         executing_task = cast(Dict[str, Any], self._executing_task)
         executing_task["timeout_deadline"] = time.time() + self.params.task_deadline
         executing_task["tool"] = task_data["tool"]
-        executing_task["model"] = task_data.get("model", None)
+        executing_task["model"] = task_data.get(
+            "model", tool_params.pop("default_model", None)
+        )
+        executing_task["params"] = tool_params
         self._async_result = cast(Optional[Future], future)
 
     def _build_ipfs_message(
diff --git a/packages/valory/skills/task_execution/skill.yaml b/packages/valory/skills/task_execution/skill.yaml
index 77a52992..b28331c8 100644
--- a/packages/valory/skills/task_execution/skill.yaml
+++ b/packages/valory/skills/task_execution/skill.yaml
@@ -7,7 +7,7 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeidqhvvlnthkbnmrdkdeyjyx2f2ab6z4xdgmagh7welqnh2v6wczx4
-  behaviours.py: bafybeibspx4pipgcy4jqum4mse74hc5b4bfmushhiijxtbr3etw6bpx5pe
+  behaviours.py: bafybeibwc6asaiutsopq7flz3iis7si5karjs6tigsab4alrwsvodobm3u
   dialogues.py: bafybeid4zxalqdlo5mw4yfbuf34hx4jp5ay5z6chm4zviwu4cj7fudtwca
   handlers.py: bafybeidbt5ezj74cgfogk3w4uw4si2grlnk5g54veyumw7g5yh6gdscywu
   models.py: bafybeid6befxrrbiaw7nduz4zgbm5nfc246fn2eb6rfmja6v5hmq4wtcwe