HazyResearch · alexwang0317 · Mar 23, 2025 · Mar 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,8 @@ wheels/
 .installed.cfg
 *.egg
 
+test.py
+
 # Virtual Environment
 venv/
 env/
@@ -44,4 +46,5 @@ data/
 .DS_Store
 .coverage
 htmlcov/
-.pytest_cache/ 
+.pytest_cache/ 
+minions_logs/
diff --git a/app.py b/app.py
@@ -22,6 +22,7 @@
 from streamlit_theme import st_theme
 from dotenv import load_dotenv
 import base64
+from minions.minion_notebooklm import MinionNotebookLM
 
 
 # Load environment variables from .env file
@@ -554,6 +555,12 @@ def initialize_clients(
             mcp_server_name=mcp_server_name,
             callback=message_callback,
         )
+    elif protocol == "Minions-NotebookLM":
+        st.session_state.method = MinionNotebookLM(
+            st.session_state.local_client,
+            st.session_state.remote_client,
+            callback=message_callback,
+        )
     else:  # Minion protocol
         st.session_state.method = Minion(
             st.session_state.local_client,
@@ -669,6 +676,13 @@ def run_protocol(
                 max_rounds=5,
                 use_bm25=use_bm25,
             )
+        elif protocol == "Minions-NotebookLM":
+            output = st.session_state.method(
+                task=task,
+                doc_metadata=doc_metadata,
+                context=[context],
+                max_rounds=5,
+            )
         else:
             output = st.session_state.method(
                 task=task,
@@ -916,7 +930,7 @@ def validate_azure_openai_key(api_key):
         "OpenRouter",
         "DeepSeek",
     ]:  # Added AzureOpenAI to the list
-        protocol_options = ["Minion", "Minions", "Minions-MCP"]
+        protocol_options = ["Minion", "Minions", "Minions-MCP", "Minions-NotebookLM"]
         protocol = st.segmented_control(
             "Communication protocol", options=protocol_options, default="Minion"
         )
@@ -1548,5 +1562,68 @@ def validate_azure_openai_key(api_key):
                             f"Remote messages: {len(round_meta['remote']['messages'])}"
                         )
 
+            # Add this function to format the final answer as a podcast transcript
+            def format_as_podcast_transcript(text):
+                """Convert a regular text into a podcast transcript format with HOST tags."""
+                # Split the text into paragraphs or sentences for alternating speakers
+                paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
+
+                if not paragraphs:
+                    # If no clear paragraphs, split by sentences
+                    import re
+                    sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
+                    paragraphs = sentences
+
+                # Create transcript with alternating hosts
+                transcript = "<PODCAST TRANSCRIPT>\n"
+                for i, paragraph in enumerate(paragraphs):
+                    host_num = (i % 2) + 1  # Alternate between HOST 1 and HOST 2
+                    transcript += f"HOST {host_num}: {paragraph}\n"
+
+                return transcript
+
+            # Modify the section where you process the output to include podcast generation
+            if "final_answer" in output and output["final_answer"]:
+                final_answer = output["final_answer"]
+
+                # Display the text answer
+                st.markdown(final_answer)
+
+                # Check if voice generation is available
+                if 'voice_generator' in st.session_state and st.session_state.voice_generator.csm_available:
+                    with st.spinner("Generating podcast audio..."):
+                        # Format the answer as a podcast transcript
+                        transcript = format_as_podcast_transcript(final_answer)
+
+                        # Generate a unique filename
+                        import time
+                        import os
+                        timestamp = int(time.time())
+                        audio_filename = f"minion_logs/podcast_{timestamp}.wav"
+                        os.makedirs("minion_logs", exist_ok=True)
+
+                        # Generate the podcast audio
+                        from minions.podcast_generator import PodcastGenerator
+                        if 'podcast_gen' not in st.session_state:
+                            st.session_state.podcast_gen = PodcastGenerator()
+
+                        st.session_state.podcast_gen.generate_from_transcript(
+                            transcript, 
+                            output_filename=audio_filename
+                        )
+
+                        # Display audio player in the UI
+                        st.subheader("Listen to this answer as a podcast:")
+                        st.audio(audio_filename)
+
+                        # Add a download button
+                        with open(audio_filename, "rb") as file:
+                            btn = st.download_button(
+                                label="Download Podcast Audio",
+                                data=file,
+                                file_name=os.path.basename(audio_filename),
+                                mime="audio/wav"
+                            )
+
         except Exception as e:
             st.error(f"An error occurred: {str(e)}")
diff --git a/minion_logs/20250319_091708_Explain_the_dee.json b/minion_logs/20250319_091708_Explain_the_dee.json
diff --git a/minion_logs/20250319_161845_Where_has_he_pr.json b/minion_logs/20250319_161845_Where_has_he_pr.json
@@ -0,0 +1,27 @@
+{
+  "task": "Where has he previously worked?",
+  "context": "I am a Postdoctoral Scholar at Stanford University, co-advised by Scott Linderman and Christopher Ré. I am also an academic partner with Databricks Mosaic AI, where I previously interned. I recently graduated from a PhD at Columbia’s Center for Theoretical Neuroscience, advised by John Cunningham and working closely with Liam Paninski.\n\nI build resource-efficient AI systems for science – in vision, timeseries, and text domains, fusing approaches from statistical ML and CS systems. Most notably, I worked on deep learning systems for tracking animal movement in videos - the Lightning Pose package (Nature Methods, 2024), scalability of Gaussian processes (ICML, 2021), and learning-forgetting tradeoffs in LLM finetuning for math and code generation (TMLR, 2024 (Featured Certification))\n\nThroughout my PhD, I collaborated closely with Lightning AI, named a Lightning Ambassador, and a featured developer in their first DevCon, June 2022.",
+  "conversation": [
+    {
+      "user": "remote",
+      "prompt": "We need to perform the following task.\n\n### Task\nWhere has he previously worked?\n\n### Instructions\nYou will not have direct access to the context, but can chat with a small language model which has read the entire thing.\n\nFeel free to think step-by-step, but eventually you must provide an output in the format below:\n\n```json\n{\n    \"message\": \"<your message to the small language model. If you are asking model to do a task, make sure it is a single task!>\"\n}\n```\n",
+      "output": "\n\n\n\n{\"message\": \"List all the places where he has previously worked.\"}"
+    },
+    {
+      "user": "local",
+      "prompt": "List all the places where he has previously worked.",
+      "output": "Based on the context provided, here are the places where he has previously worked:\n\n1. Stanford University (as a Postdoctoral Scholar)\n2. Databricks Mosaic AI (as an academic partner)\n3. Columbia's Center for Theoretical Neuroscience (during his PhD studies)"
+    },
+    {
+      "user": "remote",
+      "prompt": "\nHere is the response from the small language model:\n\n### Response\nBased on the context provided, here are the places where he has previously worked:\n\n1. Stanford University (as a Postdoctoral Scholar)\n2. Databricks Mosaic AI (as an academic partner)\n3. Columbia's Center for Theoretical Neuroscience (during his PhD studies)\n\n\n### Instructions\nAnalyze the response and think-step-by-step to determine if you have enough information to answer the question.\n\nThink about:\n1. What information we have gathered\n2. Whether it is sufficient to answer the question\n3. If not sufficient, what specific information is missing\n4. If sufficient, how we would calculate or derive the answer\n\n",
+      "output": "1. **Information Gathered:**\n   - He has worked at Stanford University as a Postdoctoral Scholar.\n   - He has been an academic partner at Databricks Mosaic AI.\n   - He was involved with Columbia's Center for Theoretical Neuroscience during his PhD studies.\n\n2. **Sufficiency of Information:**\n   - The information provided by the small language model seems sufficient to answer the question about where he has previously worked.\n\n3. **Missing Information:**\n   - There doesn't appear to be any missing information regarding the places he has worked, based on the response provided.\n\n4. **Deriving the Answer:**\n   - The answer to the question \"Where has he previously worked?\" can be directly derived from the list provided by the small language model.\n\nGiven this analysis, we have enough information to answer the question. The places where he has previously worked are Stanford University, Databricks Mosaic AI, and Columbia's Center for Theoretical Neuroscience."
+    },
+    {
+      "user": "remote",
+      "prompt": "Here is the response after step-by-step thinking.\n\n### Response\n1. **Information Gathered:**\n   - He has worked at Stanford University as a Postdoctoral Scholar.\n   - He has been an academic partner at Databricks Mosaic AI.\n   - He was involved with Columbia's Center for Theoretical Neuroscience during his PhD studies.\n\n2. **Sufficiency of Information:**\n   - The information provided by the small language model seems sufficient to answer the question about where he has previously worked.\n\n3. **Missing Information:**\n   - There doesn't appear to be any missing information regarding the places he has worked, based on the response provided.\n\n4. **Deriving the Answer:**\n   - The answer to the question \"Where has he previously worked?\" can be directly derived from the list provided by the small language model.\n\nGiven this analysis, we have enough information to answer the question. The places where he has previously worked are Stanford University, Databricks Mosaic AI, and Columbia's Center for Theoretical Neuroscience.\n\n### Instructions\nIf you have enough information or if the task is complete, write a final answer to fullfills the task. \n\n```json\n{\n    \"decision\": \"provide_final_answer\", \n    \"answer\": \"<your answer>\"\n}\n```\n\nOtherwise, if the task is not complete, request the small language model to do additional work, by outputting the following:\n\n```json\n{\n    \"decision\": \"request_additional_info\",\n    \"message\": \"<your message to the small language model>\"\n}\n```\n\n",
+      "output": "\n{\n    \"decision\": \"provide_final_answer\",\n    \"answer\": \"He has previously worked at Stanford University as a Postdoctoral Scholar, Databricks Mosaic AI as an academic partner, and Columbia's Center for Theoretical Neuroscience during his PhD studies.\"\n}"
+    }
+  ],
+  "generated_final_answer": "He has previously worked at Stanford University as a Postdoctoral Scholar, Databricks Mosaic AI as an academic partner, and Columbia's Center for Theoretical Neuroscience during his PhD studies."
+}
diff --git a/minion_logs/20250319_164316_Where_has_this_.json b/minion_logs/20250319_164316_Where_has_this_.json
@@ -0,0 +1,27 @@
+{
+  "task": "Where has this person worked?",
+  "context": "I am a Postdoctoral Scholar at Stanford University, co-advised by Scott Linderman and Christopher Ré. I am also an academic partner with Databricks Mosaic AI, where I previously interned. I recently graduated from a PhD at Columbia’s Center for Theoretical Neuroscience, advised by John Cunningham and working closely with Liam Paninski.\n\nI build resource-efficient AI systems for science – in vision, timeseries, and text domains, fusing approaches from statistical ML and CS systems. Most notably, I worked on deep learning systems for tracking animal movement in videos - the Lightning Pose package (Nature Methods, 2024), scalability of Gaussian processes (ICML, 2021), and learning-forgetting tradeoffs in LLM finetuning for math and code generation (TMLR, 2024 (Featured Certification))\n\nThroughout my PhD, I collaborated closely with Lightning AI, named a Lightning Ambassador, and a featured developer in their first DevCon, June 2022.",
+  "conversation": [
+    {
+      "user": "remote",
+      "prompt": "We need to perform the following task.\n\n### Task\nWhere has this person worked?\n\n### Instructions\nYou will not have direct access to the context, but can chat with a small language model which has read the entire thing.\n\nFeel free to think step-by-step, but eventually you must provide an output in the format below:\n\n```json\n{\n    \"message\": \"<your message to the small language model. If you are asking model to do a task, make sure it is a single task!>\"\n}\n```\n",
+      "output": "\n\n\n\n{\n    \"message\": \"List all the places where the person has worked.\"\n}"
+    },
+    {
+      "user": "local",
+      "prompt": "List all the places where the person has worked.",
+      "output": "Based on the context provided, here are the places where the person has worked:\n\n1. Stanford University (as a Postdoctoral Scholar)\n2. Databricks Mosaic AI (as an academic partner)\n3. Columbia's Center for Theoretical Neuroscience (during PhD studies)\n\nNote that the text also mentions collaborations and involvement with Lightning AI, but it does not explicitly state that they have \"worked\" there in the classical sense."
+    },
+    {
+      "user": "remote",
+      "prompt": "\nHere is the response from the small language model:\n\n### Response\nBased on the context provided, here are the places where the person has worked:\n\n1. Stanford University (as a Postdoctoral Scholar)\n2. Databricks Mosaic AI (as an academic partner)\n3. Columbia's Center for Theoretical Neuroscience (during PhD studies)\n\nNote that the text also mentions collaborations and involvement with Lightning AI, but it does not explicitly state that they have \"worked\" there in the classical sense.\n\n\n### Instructions\nAnalyze the response and think-step-by-step to determine if you have enough information to answer the question.\n\nThink about:\n1. What information we have gathered\n2. Whether it is sufficient to answer the question\n3. If not sufficient, what specific information is missing\n4. If sufficient, how we would calculate or derive the answer\n\n",
+      "output": "1. **Information Gathered:**\n   - The person has worked at:\n     - Stanford University as a Postdoctoral Scholar\n     - Databricks Mosaic AI as an academic partner\n     - Columbia's Center for Theoretical Neuroscience during PhD studies\n   - There is mention of collaborations with Lightning AI, but it is not clear if this constitutes formal employment.\n\n2. **Sufficiency of Information:**\n   - The information provided seems sufficient to answer the question of where the person has worked. The response lists specific institutions and roles, which directly address the task.\n\n3. **Missing Information:**\n   - There is no explicit mention of formal employment with Lightning AI, so if the task requires a comprehensive list of all professional engagements, including collaborations, this might be a gap. However, based on the typical interpretation of \"worked,\" the information seems adequate.\n\n4. **Deriving the Answer:**\n   - The answer can be derived directly from the response by listing the institutions and roles mentioned. The task is to identify places of work, and the response provides a clear list of such places.\n\nGiven this analysis, the information provided is sufficient to answer the question about where the person has worked. The response from the small language model directly addresses the task by listing the relevant institutions and roles."
+    },
+    {
+      "user": "remote",
+      "prompt": "Here is the response after step-by-step thinking.\n\n### Response\n1. **Information Gathered:**\n   - The person has worked at:\n     - Stanford University as a Postdoctoral Scholar\n     - Databricks Mosaic AI as an academic partner\n     - Columbia's Center for Theoretical Neuroscience during PhD studies\n   - There is mention of collaborations with Lightning AI, but it is not clear if this constitutes formal employment.\n\n2. **Sufficiency of Information:**\n   - The information provided seems sufficient to answer the question of where the person has worked. The response lists specific institutions and roles, which directly address the task.\n\n3. **Missing Information:**\n   - There is no explicit mention of formal employment with Lightning AI, so if the task requires a comprehensive list of all professional engagements, including collaborations, this might be a gap. However, based on the typical interpretation of \"worked,\" the information seems adequate.\n\n4. **Deriving the Answer:**\n   - The answer can be derived directly from the response by listing the institutions and roles mentioned. The task is to identify places of work, and the response provides a clear list of such places.\n\nGiven this analysis, the information provided is sufficient to answer the question about where the person has worked. The response from the small language model directly addresses the task by listing the relevant institutions and roles.\n\n### Instructions\nIf you have enough information or if the task is complete, write a final answer to fullfills the task. \n\n```json\n{\n    \"decision\": \"provide_final_answer\", \n    \"answer\": \"<your answer>\"\n}\n```\n\nOtherwise, if the task is not complete, request the small language model to do additional work, by outputting the following:\n\n```json\n{\n    \"decision\": \"request_additional_info\",\n    \"message\": \"<your message to the small language model>\"\n}\n```\n\n",
+      "output": "\n{\n    \"decision\": \"provide_final_answer\",\n    \"answer\": \"The person has worked at the following places: Stanford University as a Postdoctoral Scholar, Databricks Mosaic AI as an academic partner, and Columbia's Center for Theoretical Neuroscience during PhD studies.\"\n}"
+    }
+  ],
+  "generated_final_answer": "The person has worked at the following places: Stanford University as a Postdoctoral Scholar, Databricks Mosaic AI as an academic partner, and Columbia's Center for Theoretical Neuroscience during PhD studies."
+}