perplexityai · rbuchmayer-pplx · Jun 11, 2026 · Jun 11, 2026
diff --git a/docs/articles/langchain-vc-memo-agent/README.md b/docs/articles/langchain-vc-memo-agent/README.md
@@ -0,0 +1,13 @@
+# VC Investment Memo Agent with LangGraph
+
+Runnable code for the cookbook guide at
+https://docs.perplexity.ai/docs/cookbook/articles/langchain-vc-memo-agent/README
+
+```bash
+cd scripts
+pip install -r requirements.txt
+export PPLX_API_KEY="pplx-..."
+export LANGSMITH_API_KEY="ls__..."
+export LANGSMITH_TRACING="true"
+python -m memo --company "Anthropic"
+```
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/.env.example b/docs/articles/langchain-vc-memo-agent/scripts/.env.example
@@ -0,0 +1,11 @@
+# ChatPerplexity reads PPLX_API_KEY.
+PPLX_API_KEY=pplx-...
+
+# LangSmith — tracing is on from the start so every node's tool calls and
+# token usage are captured end-to-end.
+LANGSMITH_API_KEY=ls__...
+LANGSMITH_TRACING=true
+
+# Provider comparison only
+PARALLEL_API_KEY=...
+EXA_API_KEY=...
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/__init__.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/__init__.py
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/__main__.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/__main__.py
@@ -0,0 +1,5 @@
+from .main import main
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/compare.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/compare.py
@@ -0,0 +1,48 @@
+import asyncio
+from langsmith import Client
+from langsmith.evaluation import evaluate
+
+from .eval_dataset import EVAL_COMPANIES
+from .evaluators import primary_source_rate, financial_concept_coverage
+from .profiles import PROFILES, ProviderProfile
+
+DATASET_NAME = "vc-memo-eval-v2"
+client = Client()
+
+
+def upload_dataset() -> None:
+    """Create the LangSmith eval dataset from EVAL_COMPANIES if it doesn't already exist."""
+    if any(d.name == DATASET_NAME for d in client.list_datasets()):
+        return
+    dataset = client.create_dataset(DATASET_NAME, description="VC memo evaluation")
+    for company in EVAL_COMPANIES:
+        client.create_example(
+            inputs={"company": company},
+            outputs={},
+            dataset_id=dataset.id,
+        )
+
+
+async def _run_one(company: str, profile: ProviderProfile) -> dict:
+    """Run the memo graph for one company under the given provider profile."""
+    graph = profile.build_graph()
+    final = await graph.ainvoke({"company": company, "research_output": {}, "memo": ""})
+    return {"memo_md": final["memo"]}
+
+
+def main() -> None:
+    """Upload the dataset and run a LangSmith evaluation for each provider profile."""
+    upload_dataset()
+    for name in ("perplexity", "parallel", "exa"):
+        profile = PROFILES[name]
+        evaluate(
+            lambda inputs, profile=profile: asyncio.run(_run_one(inputs["company"], profile)),
+            data=DATASET_NAME,
+            evaluators=[primary_source_rate, financial_concept_coverage],
+            experiment_prefix=f"memo-{name}",
+            max_concurrency=2,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/eval_dataset.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/eval_dataset.py
@@ -0,0 +1,7 @@
+# A mix of public and private companies across sectors. The evaluators below
+# are search-quality metrics — no per-company ground truth required, so extend
+# this with your own targets freely.
+EVAL_COMPANIES = [
+    "Anduril", "Arm Holdings", "Cohere", "CrowdStrike", "Klaviyo",
+    "Mistral AI", "Palantir", "Perplexity", "Reddit", "xAI",
+]
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/evaluators.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/evaluators.py
@@ -0,0 +1,61 @@
+import re
+from urllib.parse import urlparse
+from langsmith.evaluation import EvaluationResult, run_evaluator
+
+URL_RE = re.compile(r"https?://\S+")
+
+PRIMARY_HOST_RE = re.compile(
+    r"(^|\.)(investors?|ir|investorrelations?|press|news|newsroom|press-?releases?|media)\.",
+    re.IGNORECASE,
+)
+PRIMARY_DOMAINS = {"sec.gov", "edgar.sec.gov", "businesswire.com",
+                   "prnewswire.com", "globenewswire.com"}
+AGGREGATOR_DOMAINS = {"en.wikipedia.org", "crunchbase.com", "pitchbook.com",
+                      "simplywall.st", "stockanalysis.com", "finance.yahoo.com",
+                      "reddit.com", "medium.com", "macrotrends.net"}
+
+
+def _classify(url: str, company: str) -> str:
+    """Classify a cited URL as primary, aggregator, or neutral source."""
+    host = urlparse(url).netloc.lower()
+    if host in PRIMARY_DOMAINS or PRIMARY_HOST_RE.search(host):
+        return "primary"
+    co_words = [w.lower() for w in company.split() if len(w) > 3]
+    if any(w in host for w in co_words):
+        return "primary"        # Company's own domain counts as primary
+    if host in AGGREGATOR_DOMAINS:
+        return "aggregator"
+    return "neutral"
+
+
+@run_evaluator
+def primary_source_rate(run, example) -> EvaluationResult:
+    """Share of citations from primary sources (IR pages, SEC, official press)
+    rather than aggregators (Wikipedia, Crunchbase). Neutral domains are
+    excluded from the ratio."""
+    memo = run.outputs["memo_md"]
+    company = example.inputs["company"]
+    urls = [u.rstrip(".,)") for u in URL_RE.findall(memo)]
+    primary = sum(1 for u in urls if _classify(u, company) == "primary")
+    aggregator = sum(1 for u in urls if _classify(u, company) == "aggregator")
+    denom = primary + aggregator
+    score = primary / denom if denom else None
+    return EvaluationResult(key="primary_source_rate", score=score)
+
+
+@run_evaluator
+def financial_concept_coverage(run, example) -> EvaluationResult:
+    """Of four financial concepts (valuation, revenue/ARR, funding, operating
+    metrics), how many appear in the Financials section?"""
+    memo = run.outputs["memo_md"]
+    m = re.search(r"##.*Financials.*?\n(.*?)(?=\n##\s|\Z)", memo, re.DOTALL | re.IGNORECASE)
+    if not m:
+        return EvaluationResult(key="financial_concept_coverage", score=0.0)
+    body = m.group(1).lower()
+    hits = sum([
+        bool(re.search(r"valuation|valued at|market cap|post-money|pre-money", body)),
+        bool(re.search(r"revenue|arr |annual recurring|run.?rate", body)),
+        bool(re.search(r"raised|series [a-h]|funding round|total funding", body)),
+        bool(re.search(r"gross margin|operating margin|cash position|growth|customers|headcount|employees", body)),
+    ])
+    return EvaluationResult(key="financial_concept_coverage", score=hits / 4)
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/graph.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/graph.py
@@ -0,0 +1,196 @@
+# NOTE: The inline code blocks in the cookbook guide
+# (docs/cookbook/articles/langchain-vc-memo-agent/README.mdx in ppl-ai/api-docs)
+# are verbatim slices of this file. When editing either, update both in the
+# same change.
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import Annotated, Any, TypedDict
+
+from langchain_core.messages import AIMessage
+from langchain_perplexity import ChatPerplexity
+from langgraph.graph import END, START, StateGraph
+
+
+def merge_research_output(left: dict[str, str], right: dict[str, str]) -> dict[str, str]:
+    """Each research node returns {"<section>": "..."}; merge into one dict."""
+    return {**(left or {}), **(right or {})}
+
+
+class MemoState(TypedDict):
+    company: str
+    research_output: Annotated[dict[str, str], merge_research_output]
+    memo: str
+
+
+SUBNODE_MODEL_NAME = "openai/gpt-5.5"
+SYNTHESIZER_MODEL_NAME = "openai/gpt-5.5"
+
+
+def _agent_model(model: str) -> ChatPerplexity:
+    """Build a ChatPerplexity client wired to the Responses API."""
+    # The Responses (Agent) API ignores sampling params like temperature, so we omit it.
+    return ChatPerplexity(model=model, use_responses_api=True)
+
+
+SUBNODE_MODEL = _agent_model(SUBNODE_MODEL_NAME)
+SYNTHESIZER_MODEL = _agent_model(SYNTHESIZER_MODEL_NAME)
+
+
+# Per-research-node tool specs.
+TEAM_TOOLS = [{
+    "type": "web_search",
+    "filters": {"search_recency_filter": "year"},
+}]
+
+PRODUCT_TOOLS = [{"type": "web_search"}]
+
+MARKET_TOOLS = [{"type": "web_search"}]
+
+FINANCIALS_TOOLS = [{"type": "finance_search"}, {"type": "web_search"}]
+
+
+# Per-research-node max_steps caps the Perplexity Agent API's internal search loop.
+RESEARCH_MAX_STEPS = {
+    "team": 2, "financials": 5, "product": 2, "market": 2,
+}
+
+
+RESEARCH_PROMPT = """You are a VC analyst writing the {section} section of the research output for {company}.
+
+{guidance}
+
+Return a markdown section, then end the document with a "### Citations" header \
+followed by a markdown list of:
+
+  - <url> — one-sentence evidence quoted from the source
+
+Cite only URLs that came back from your tool calls; never fabricate URLs. \
+Keep the section focused — 250-400 words is appropriate for the body."""
+
+
+GUIDANCE = {
+    "team": (
+        "Search for the founders, CEO, and other named executives. Capture each "
+        "leader's prior roles and education. Prioritize the company's own About/Team "
+        "page and professional-network sources."
+    ),
+    "financials": (
+        "If the company is public, use finance_search for revenue, margins, and analyst "
+        "estimates. If private, use web_search for funding rounds, valuation, and "
+        "disclosed revenue. Cross-check structured data against recent news."
+    ),
+    "product": (
+        "Describe the company's flagship product, recent launches, and technical "
+        "differentiators. Cite the company's own product or engineering pages where "
+        "possible, plus tech-press coverage for context."
+    ),
+    "market": (
+        "Map the competitive landscape, name direct competitors, and surface market "
+        "sizing. Your web_search is scoped to analyst and trade-press sources."
+    ),
+}
+
+
+def _run_research(
+    state: MemoState,
+    *,
+    section: str,
+    tools: list[dict[str, Any]],
+    max_steps: int,
+) -> dict[str, dict[str, str]]:
+    """Run one research section with the given tools and return its output."""
+    msg: AIMessage = SUBNODE_MODEL.invoke(
+        [
+            {"role": "system", "content": RESEARCH_PROMPT.format(
+                section=section, company=state["company"], guidance=GUIDANCE[section],
+            )},
+            {"role": "user", "content": f"Research the {section} of {state['company']}."},
+        ],
+        tools=tools,
+        extra_body={"max_steps": max_steps},
+    )
+    return {"research_output": {section: msg.content}}
+
+
+def team_node(state):
+    """Research the founders and leadership team."""
+    return _run_research(state, section="team",
+        tools=TEAM_TOOLS, max_steps=RESEARCH_MAX_STEPS["team"])
+
+def financials_node(state):
+    """Research revenue, funding, and financial metrics."""
+    return _run_research(state, section="financials",
+        tools=FINANCIALS_TOOLS, max_steps=RESEARCH_MAX_STEPS["financials"])
+
+def product_node(state):
+    """Research the product, launches, and technical differentiators."""
+    return _run_research(state, section="product",
+        tools=PRODUCT_TOOLS, max_steps=RESEARCH_MAX_STEPS["product"])
+
+def market_node(state):
+    """Research the competitive landscape and market sizing."""
+    return _run_research(state, section="market",
+        tools=MARKET_TOOLS, max_steps=RESEARCH_MAX_STEPS["market"])
+
+
+SYNTH_PROMPT = """You are a senior VC partner writing the final memo for {company}.
+
+You may only cite evidence that appears in the research outputs below. You have no \
+tools; do not browse or fabricate sources.
+
+Produce a markdown memo with these seven sections, in order:
+
+  1. Snapshot — what the company is, founded, valuation, positioning (3-4 sentences)
+  2. Team — founders, leadership, recent senior hires
+  3. Financials — revenue, growth, funding history, comparables
+  4. Product — what they sell, technology, distribution
+  5. Market — TAM, direct competitors, category dynamics
+  6. Risks — top 3-5 risks with brief reasoning
+  7. Thesis — 1-2 paragraphs of analysis, ending with a single line:
+     "Recommendation: <PASS | TRACK | ADVANCE | LEAD>"
+
+Each section's H2 heading must be exactly `## <N> · <Section Name>` \
+(e.g. `## 1 · Snapshot`), using a middle-dot separator — the evaluator depends \
+on this format.
+
+Each of sections 1-6 must end with a `### Citations` subsection listing the \
+<url> — <evidence> pairs drawn from the research outputs. Section 7 (Thesis) does \
+not need its own citations.
+
+If a research output lacks evidence for a section, write "Insufficient evidence in \
+research outputs." in that section's body instead of guessing."""
+
+
+def synthesizer_node(state: MemoState) -> dict[str, str]:
+    """Combine all research outputs into the final memo. No tools attached."""
+    research_output_block = "\n\n".join(
+        f"## Research output: {name}\n\n{body}"
+        for name, body in sorted(state["research_output"].items())
+    )
+    msg: AIMessage = SYNTHESIZER_MODEL.invoke([
+        {"role": "system", "content": SYNTH_PROMPT.format(company=state["company"])},
+        {"role": "user", "content": (
+            f"Company: {state['company']}\n"
+            f"As-of: {datetime.now(timezone.utc).isoformat(timespec='seconds')}\n\n"
+            f"Research outputs:\n\n{research_output_block}"
+        )},
+    ])
+    return {"memo": msg.content}
+
+
+def build_graph():
+    """Wire the four research nodes in parallel from START into the synthesizer, then END."""
+    g = StateGraph(MemoState)
+    g.add_node("team", team_node)
+    g.add_node("financials", financials_node)
+    g.add_node("product", product_node)
+    g.add_node("market", market_node)
+    g.add_node("synthesizer", synthesizer_node)
+
+    for section in ("team", "financials", "product", "market"):
+        g.add_edge(START, section)
+        g.add_edge(section, "synthesizer")
+
+    g.add_edge("synthesizer", END)
+    return g.compile()
diff --git a/docs/articles/langchain-vc-memo-agent/scripts/memo/main.py b/docs/articles/langchain-vc-memo-agent/scripts/memo/main.py
@@ -0,0 +1,23 @@
+import argparse
+import asyncio
+
+from .graph import build_graph
+
+
+async def run_memo(company: str) -> str:
+    """Run the full memo agent for one company and return the final markdown memo."""
+    graph = build_graph()
+    final = await graph.ainvoke({"company": company, "research_output": {}, "memo": ""})
+    return final["memo"]
+
+
+def main() -> None:
+    """CLI entrypoint: parse `--company` and print the generated memo."""
+    parser = argparse.ArgumentParser(description="VC investment memo agent.")
+    parser.add_argument("--company", required=True)
+    args = parser.parse_args()
+    print(asyncio.run(run_memo(args.company)))
+
+
+if __name__ == "__main__":
+    main()