diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 3a21c0f7d0e..1e3f366e511 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -1314,7 +1314,7 @@ def _reflection_with_llm(
         else:
             raise ValueError("No OpenAIWrapper client is found.")
         response = self._generate_oai_reply_from_client(llm_client=llm_client, messages=messages, cache=cache)
-        return response
+        return self.generate_oai_reply(messages=messages, config=llm_client)
 
     def _check_chat_queue_for_sender(self, chat_queue: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
diff --git a/notebook/agentchat_groupchat_user_input_stateflow.ipynb b/notebook/agentchat_groupchat_user_input_stateflow.ipynb
new file mode 100644
index 00000000000..4c8043958d2
--- /dev/null
+++ b/notebook/agentchat_groupchat_user_input_stateflow.ipynb
@@ -0,0 +1,426 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# User Input + StateFlow GroupChat: Build Workflows through State-Oriented Actions that can intelligently include the user in the process\n",
+    "\n",
+    "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. In this notebook, we introduce how to use groupchat to build workflows with AutoGen agents from a state-oriented perspective and allow for user input during the process.\n",
+    "\n",
+    "This notebook is an expansion of [StateFlow: Build Workflows through State-Oriented Actions](https://microsoft.github.io/autogen/docs/notebooks/agentchat_groupchat_stateflow/) to allow for user input during the process. \n",
+    "\n",
+    "\n",
+    "````{=mdx}\n",
+    ":::info Requirements\n",
+    "Install `autogen-agentchat`:\n",
+    "```bash\n",
+    "pip install autogen-agentchat~=0.2\n",
+    "```\n",
+    "\n",
+    "For more information, please refer to the [installation guide](/docs/installation/).\n",
+    ":::\n",
+    "````"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set your API Endpoint\n",
+    "\n",
+    "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import autogen\n",
+    "\n",
+    "config_list = autogen.config_list_from_json(\n",
+    "    \"OAI_CONFIG_LIST\",\n",
+    "    filter_dict={\n",
+    "        \"tags\": [\"gpt-4o-2024-05-13\"],\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "````{=mdx}\n",
+    ":::tip\n",
+    "Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).\n",
+    ":::\n",
+    "````\n",
+    "## Workflow: Arxiv Research\n",
+    "We define the following agents:\n",
+    "- Planner: Create a plan.\n",
+    "- Admin: Human in the loop to provide optional feedback.\n",
+    "- Engineer: Retrieve papers from the internet by writing code.\n",
+    "- Executor: Execute the code.\n",
+    "- Scientist: Read the papers and write a summary.\n",
+    "\n",
+    "The pipeline is the following:\n",
+    "0. Input: a str prompt -> a research topic from Arxiv\n",
+    "1. Plan: The planner will create a plan based on the prompt and decide if it would like user input. \n",
+    "2. Optional_plan_input: If they don't, workflow continues, if they do, the Admin will be asked for input before returning to step 1.\n",
+    "3. Code: The engineer will write code to retrieve papers from the internet. \n",
+    "4. Optional_code_input: If the engineer is stuck or doesn't know how to solve the task, they can ask the user for input before returning to step 3. \n",
+    "5. Execution: The code will be executed by executor. If the code execution fails, we go back to step 3. \n",
+    "6. Summary: If the code is executed successfully, the scientist will read the papers and write a summary. They can choose to ask the user for input. \n",
+    "7. Optional_summary_input: Once the summary is received, if user input was asked, the Admin will be asked for input, and we go back to step 5. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gpt4_config = {\n",
+    "    \"cache_seed\": 42,  # change the cache_seed for different trials\n",
+    "    \"temperature\": 0,\n",
+    "    \"config_list\": config_list,\n",
+    "    \"timeout\": 120,\n",
+    "}\n",
+    "\n",
+    "planner = autogen.AssistantAgent(\n",
+    "    name=\"Planner\",\n",
+    "    system_message=\"\"\"You are the Planner for a research workflow. The user will provide a task, and your job is to suggest a plan. \n",
+    "The plan can involve an engineer who can write code and a scientist who doesn't write code. The engineer always acts first, and then the scientist. \n",
+    "Do not suggest recursive plans where agents need to interact multiple times. Keep it simple. \n",
+    "Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist. \n",
+    "If you need to ask the user for more information, you can do so by including ´ASK_USER:´ in your response. If you do, use the answer and respond with a complete plan. \n",
+    "\"\"\",\n",
+    "    llm_config=gpt4_config,\n",
+    ")\n",
+    "\n",
+    "user_proxy = autogen.UserProxyAgent(\n",
+    "    name=\"Admin\",\n",
+    "    system_message=\"A human admin. Interacts with the planner and scientist if asked.\",\n",
+    "    code_execution_config=False,\n",
+    "    default_auto_reply=\"No user input received\"\n",
+    ")\n",
+    "\n",
+    "engineer = autogen.AssistantAgent(\n",
+    "    name=\"Engineer\",\n",
+    "    llm_config=gpt4_config,\n",
+    "    system_message=\"\"\"You are an Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.\n",
+    "Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.\n",
+    "If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n",
+    "If you can't solve the task, you can ask the user for more information by including ´ASK_USER:´ in your response. If you do, use the answer and respond with a complete code solution.\n",
+    "\"\"\",\n",
+    ")\n",
+    "scientist = autogen.AssistantAgent(\n",
+    "    name=\"Scientist\",\n",
+    "    llm_config=gpt4_config,\n",
+    "    system_message=\"\"\"You are a Scientist. You are part of a research workflow, which has been planned. \n",
+    "Follow the plan the Planner suggested, and provide a summary of the information retrieved. You don't write code. \n",
+    "If you need to ask the user for more information, you can do so by including ´ASK_USER:´ in your response. If you do, use the answer and respond with a complete summary.\"\"\",\n",
+    ")\n",
+    "\n",
+    "executor = autogen.UserProxyAgent(\n",
+    "    name=\"Executor\",\n",
+    "    system_message=\"Executor. Execute the code written by the engineer and report the result.\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    code_execution_config={\n",
+    "        \"last_n_messages\": 3,\n",
+    "        \"work_dir\": \"paper\",\n",
+    "        \"use_docker\": False,\n",
+    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
+    ")\n",
+    "\n",
+    "from typing import Dict, List\n",
+    "from autogen import Agent\n",
+    "\n",
+    "\n",
+    "def custom_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):\n",
+    "    \"\"\"Define a customized speaker selection function.\n",
+    "    A recommended way is to define a transition for each speaker in the groupchat.\n",
+    "\n",
+    "    Returns:\n",
+    "        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.\n",
+    "    \"\"\"\n",
+    "    messages = groupchat.messages\n",
+    "\n",
+    "    if len(messages) <= 1:\n",
+    "        return planner\n",
+    "    \n",
+    "    if last_speaker is user_proxy:\n",
+    "        if messages[-2][\"name\"] == \"Planner\":\n",
+    "            # If it is the planning stage, let the planner continue\n",
+    "            return planner\n",
+    "        elif messages[-2][\"name\"] == \"Scientist\":\n",
+    "            # If the last message is from the scientist, let the scientist continue\n",
+    "            return scientist\n",
+    "        elif messages[-2][\"name\"] == \"Engineer\":\n",
+    "            # If the last message is from the engineer, let the engineer continue\n",
+    "            return engineer\n",
+    "        \n",
+    "    elif \"ASK_USER:\" in messages[-1][\"content\"] and last_speaker is not executor:\n",
+    "        # If someone asks for user input, let the user speak\n",
+    "        return user_proxy\n",
+    "\n",
+    "    elif last_speaker is planner:\n",
+    "        # If the last message is from the planner, we continue to the engineer because they didn't ask for user input\n",
+    "        return engineer\n",
+    "\n",
+    "    elif last_speaker is engineer:\n",
+    "        if \"```python\" in messages[-1][\"content\"]:\n",
+    "            # If the last message is a python code block, let the executor speak\n",
+    "            return executor\n",
+    "        else:\n",
+    "            # Otherwise, let the engineer continue -> This can be confusing, so adding a message here to clarify the issue would be better\n",
+    "            return engineer\n",
+    "\n",
+    "    elif last_speaker is executor:\n",
+    "        if \"exitcode: 1\" in messages[-1][\"content\"]:\n",
+    "            # If the last message indicates an error, let the engineer improve the code\n",
+    "            return engineer\n",
+    "        else:\n",
+    "            # Otherwise, let the scientist speak\n",
+    "            return scientist\n",
+    "\n",
+    "    elif last_speaker is scientist:\n",
+    "        # If the last message is from the scientist, we end because they didn't request user input\n",
+    "        return None\n",
+    "    else:\n",
+    "        return \"random\"\n",
+    "\n",
+    "\n",
+    "groupchat = autogen.GroupChat(\n",
+    "    agents=[user_proxy, engineer, scientist, planner, executor],\n",
+    "    messages=[],\n",
+    "    max_round=5,\n",
+    "    speaker_selection_method=custom_speaker_selection_func,\n",
+    ")\n",
+    "manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mAdmin\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Find a latest paper about gpt-4 on arxiv and find its potential applications in software.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mPlanner\u001b[0m (to chat_manager):\n",
+      "\n",
+      "### Plan\n",
+      "\n",
+      "1. **Engineer Step:**\n",
+      "   - The engineer will write a script to search for the latest papers about GPT-4 on arXiv. This script will use the arXiv API to fetch the most recent papers related to GPT-4.\n",
+      "   - The engineer will then extract the relevant information from the fetched papers, such as the title, authors, abstract, and publication date.\n",
+      "   - The engineer will provide a summary of the latest paper found, including a link to the full paper.\n",
+      "\n",
+      "2. **Scientist Step:**\n",
+      "   - The scientist will read the summary and the full paper provided by the engineer.\n",
+      "   - The scientist will analyze the paper to identify potential applications of GPT-4 in software.\n",
+      "   - The scientist will document these potential applications in a detailed report.\n",
+      "\n",
+      "### Execution\n",
+      "\n",
+      "1. **Engineer Step:**\n",
+      "   - Write a Python script to interact with the arXiv API and search for the latest papers about GPT-4.\n",
+      "   - Extract and summarize the relevant information from the latest paper found.\n",
+      "\n",
+      "2. **Scientist Step:**\n",
+      "   - Read and analyze the summary and full paper.\n",
+      "   - Identify and document potential applications of GPT-4 in software.\n",
+      "\n",
+      "Would you like to proceed with this plan, or do you have any additional requirements or constraints?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mEngineer\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Let's proceed with the plan. I'll start by writing a Python script to search for the latest papers about GPT-4 on arXiv and extract relevant information.\n",
+      "\n",
+      "```python\n",
+      "import requests\n",
+      "import xml.etree.ElementTree as ET\n",
+      "\n",
+      "def search_arxiv(query, max_results=1):\n",
+      "    url = f'http://export.arxiv.org/api/query?search_query={query}&start=0&max_results={max_results}'\n",
+      "    response = requests.get(url)\n",
+      "    if response.status_code == 200:\n",
+      "        return response.text\n",
+      "    else:\n",
+      "        return None\n",
+      "\n",
+      "def parse_arxiv_response(response):\n",
+      "    root = ET.fromstring(response)\n",
+      "    ns = {'arxiv': 'http://www.w3.org/2005/Atom'}\n",
+      "    entries = root.findall('arxiv:entry', ns)\n",
+      "    papers = []\n",
+      "    for entry in entries:\n",
+      "        title = entry.find('arxiv:title', ns).text\n",
+      "        authors = [author.find('arxiv:name', ns).text for author in entry.findall('arxiv:author', ns)]\n",
+      "        summary = entry.find('arxiv:summary', ns).text\n",
+      "        published = entry.find('arxiv:published', ns).text\n",
+      "        link = entry.find('arxiv:id', ns).text\n",
+      "        papers.append({\n",
+      "            'title': title,\n",
+      "            'authors': authors,\n",
+      "            'summary': summary,\n",
+      "            'published': published,\n",
+      "            'link': link\n",
+      "        })\n",
+      "    return papers\n",
+      "\n",
+      "def main():\n",
+      "    query = 'all: \"GPT-4\"'\n",
+      "    response = search_arxiv(query)\n",
+      "    if response:\n",
+      "        papers = parse_arxiv_response(response)\n",
+      "        if papers:\n",
+      "            latest_paper = papers[0]\n",
+      "            print(\"Title:\", latest_paper['title'])\n",
+      "            print(\"Authors:\", \", \".join(latest_paper['authors']))\n",
+      "            print(\"Published:\", latest_paper['published'])\n",
+      "            print(\"Summary:\", latest_paper['summary'])\n",
+      "            print(\"Link:\", latest_paper['link'])\n",
+      "        else:\n",
+      "            print(\"No papers found.\")\n",
+      "    else:\n",
+      "        print(\"Failed to fetch data from arXiv.\")\n",
+      "\n",
+      "if __name__ == \"__main__\":\n",
+      "    main()\n",
+      "```\n",
+      "\n",
+      "This script will search for the latest paper about GPT-4 on arXiv, extract relevant information, and print it out. Let's execute this script to find the latest paper and its potential applications in software.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[31m\n",
+      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
+      "\u001b[33mExecutor\u001b[0m (to chat_manager):\n",
+      "\n",
+      "exitcode: 0 (execution succeeded)\n",
+      "Code output: \n",
+      "Title: Can LLMs like GPT-4 outperform traditional AI tools in dementia\n",
+      "  diagnosis? Maybe, but not today\n",
+      "Authors: Zhuo Wang, Rongzhen Li, Bowen Dong, Jie Wang, Xiuxing Li, Ning Liu, Chenhui Mao, Wei Zhang, Liling Dong, Jing Gao, Jianyong Wang\n",
+      "Published: 2023-06-02T12:47:45Z\n",
+      "Summary:   Recent investigations show that large language models (LLMs), specifically\n",
+      "GPT-4, not only have remarkable capabilities in common Natural Language\n",
+      "Processing (NLP) tasks but also exhibit human-level performance on various\n",
+      "professional and academic benchmarks. However, whether GPT-4 can be directly\n",
+      "used in practical applications and replace traditional artificial intelligence\n",
+      "(AI) tools in specialized domains requires further experimental validation. In\n",
+      "this paper, we explore the potential of LLMs such as GPT-4 to outperform\n",
+      "traditional AI tools in dementia diagnosis. Comprehensive comparisons between\n",
+      "GPT-4 and traditional AI tools are conducted to examine their diagnostic\n",
+      "accuracy in a clinical setting. Experimental results on two real clinical\n",
+      "datasets show that, although LLMs like GPT-4 demonstrate potential for future\n",
+      "advancements in dementia diagnosis, they currently do not surpass the\n",
+      "performance of traditional AI tools. The interpretability and faithfulness of\n",
+      "GPT-4 are also evaluated by comparison with real doctors. We discuss the\n",
+      "limitations of GPT-4 in its current state and propose future research\n",
+      "directions to enhance GPT-4 in dementia diagnosis.\n",
+      "\n",
+      "Link: http://arxiv.org/abs/2306.01499v1\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mScientist\u001b[0m (to chat_manager):\n",
+      "\n",
+      "### Summary of the Latest Paper on GPT-4\n",
+      "\n",
+      "**Title:** Can LLMs like GPT-4 outperform traditional AI tools in dementia diagnosis? Maybe, but not today\n",
+      "\n",
+      "**Authors:** Zhuo Wang, Rongzhen Li, Bowen Dong, Jie Wang, Xiuxing Li, Ning Liu, Chenhui Mao, Wei Zhang, Liling Dong, Jing Gao, Jianyong Wang\n",
+      "\n",
+      "**Published Date:** 2023-06-02\n",
+      "\n",
+      "**Summary:**\n",
+      "The paper investigates the potential of large language models (LLMs), specifically GPT-4, in outperforming traditional AI tools in the domain of dementia diagnosis. While GPT-4 has shown remarkable capabilities in various Natural Language Processing (NLP) tasks and has achieved human-level performance on several professional and academic benchmarks, its effectiveness in specialized domains like dementia diagnosis remains uncertain.\n",
+      "\n",
+      "The authors conducted comprehensive comparisons between GPT-4 and traditional AI tools to evaluate their diagnostic accuracy in clinical settings. The experimental results, based on two real clinical datasets, indicate that although GPT-4 shows promise for future advancements in dementia diagnosis, it currently does not surpass the performance of traditional AI tools. Additionally, the paper evaluates the interpretability and faithfulness of GPT-4 by comparing its outputs with real doctors' assessments. The authors discuss the limitations of GPT-4 in its current state and propose future research directions to enhance its capabilities in dementia diagnosis.\n",
+      "\n",
+      "**Link to Full Paper:** [arXiv:2306.01499v1](http://arxiv.org/abs/2306.01499v1)\n",
+      "\n",
+      "### Potential Applications of GPT-4 in Software\n",
+      "\n",
+      "Based on the findings and discussions in the paper, here are some potential applications of GPT-4 in software:\n",
+      "\n",
+      "1. **Medical Diagnosis Assistance:**\n",
+      "   - **Clinical Decision Support Systems:** GPT-4 can be integrated into clinical decision support systems to assist healthcare professionals by providing preliminary diagnostic suggestions and insights based on patient data.\n",
+      "   - **Symptom Checker Tools:** GPT-4 can be used to develop advanced symptom checker tools that help patients understand potential health issues and recommend when to seek medical attention.\n",
+      "\n",
+      "2. **Healthcare Data Analysis:**\n",
+      "   - **Medical Record Summarization:** GPT-4 can be employed to summarize patient medical records, making it easier for healthcare providers to quickly review patient histories and make informed decisions.\n",
+      "   - **Natural Language Processing of Medical Texts:** GPT-4 can be used to analyze and extract valuable information from large volumes of medical texts, such as research papers, clinical notes, and patient feedback.\n",
+      "\n",
+      "3. **Patient Interaction and Support:**\n",
+      "   - **Virtual Health Assistants:** GPT-4 can power virtual health assistants that provide patients with information about their conditions, treatment options, and medication instructions.\n",
+      "   - **Mental Health Support:** GPT-4 can be used to develop chatbots and virtual therapists that offer mental health support and counseling to patients.\n",
+      "\n",
+      "4. **Research and Development:**\n",
+      "   - **Medical Research Assistance:** GPT-4 can assist researchers by analyzing and summarizing scientific literature, identifying trends, and generating hypotheses for further investigation.\n",
+      "   - **Clinical Trial Matching:** GPT-4 can help match patients with suitable clinical trials based on their medical history and condition, improving the efficiency of clinical research.\n",
+      "\n",
+      "5. **Education and Training:**\n",
+      "   - **Medical Education Tools:** GPT-4 can be used to create interactive educational tools and simulations for medical students and professionals, enhancing their learning experience.\n",
+      "   - **Continuing Medical Education:** GPT-4 can provide up-to-date information and training materials to healthcare professionals, helping them stay current with the latest advancements in their field.\n",
+      "\n",
+      "While GPT-4 shows potential in these applications, it is important to note that its current limitations must be addressed through further research and development to ensure its reliability and effectiveness in specialized domains like healthcare.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat_result = user_proxy.initiate_chat(\n",
+    "    manager, \n",
+    "    message=\"Find a latest paper about gpt-4 on arxiv and find its potential applications in software.\",\n",
+    "    max_turns=20,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "front_matter": {
+   "description": "StateFlow: Build Workflows through State-Oriented Actions",
+   "tags": [
+    "orchestration",
+    "group chat",
+    "research"
+   ]
+  },
+  "kernelspec": {
+   "display_name": "flaml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py
index 0cb652ce54b..202724230b5 100755
--- a/test/agentchat/test_groupchat.py
+++ b/test/agentchat/test_groupchat.py
@@ -2178,6 +2178,8 @@ def test_select_speaker_transform_messages():
     # test_speaker_selection_auto_process_result()
     # test_speaker_selection_validate_speaker_name()
     # test_select_speaker_auto_messages()
+    # test_speaker_selection_auto_process_result()
+    # test_speaker_selection_validate_speaker_name()
     # test_select_speaker_auto_messages()
     # test_manager_messages_to_string()
     # test_manager_messages_from_string()