Last active
August 15, 2025 22:57
-
-
Save kyletaylored/848fa8fdf266a1762628684e68ed6d62 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------- Azure OpenAI ---------- | |
# Your Azure OpenAI resource endpoint (include trailing slash) | |
AZURE_OPENAI_ENDPOINT=https://<your-resource>.openai.azure.com/ | |
# Your Azure OpenAI API key | |
AZURE_OPENAI_API_KEY=<your-azure-openai-key> | |
# The API version compatible with your deployment | |
AZURE_OPENAI_API_VERSION=2024-10-21 | |
# Your deployment name (e.g., gpt-4o-mini, gpt-4o-mini-2024-07-18, etc.) | |
AZURE_OPENAI_DEPLOYMENT=gpt-4o-mini | |
# ---------- Datadog LLM Observability (Agentless, optional) ---------- | |
# Your Datadog API key | |
DD_API_KEY=<your-datadog-api-key> | |
# Your Datadog site (e.g., datadoghq.com, us3.datadoghq.com, datadoghq.eu) | |
DD_SITE=datadoghq.com | |
# Application name to group traces/metrics in Datadog | |
DD_LLMOBS_ML_APP=quickstart-app | |
# ---------- Notes ---------- | |
# 1) Copy this file to `.env` and fill in the placeholders. | |
# 2) The notebook auto-loads `.env` if present (python-dotenv). | |
# 3) To launch with ddtrace-run instead of embedded init: | |
# DD_LLMOBS_ENABLED=1 DD_LLMOBS_ML_APP=$DD_LLMOBS_ML_APP DD_API_KEY=$DD_API_KEY ddtrace-run jupyter lab |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "f1247c76", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"# LangGraph + Azure OpenAI + Microsoft Learn MCP (FastMCP) + Datadog LLM Observability\n", | |
"\n", | |
"A **self-contained** demo notebook that:\n", | |
"1. Picks a random Azure product and asks: _\"What is <product>?\"_\n", | |
"2. Uses **Azure OpenAI** to route which **Microsoft Learn MCP** tool to call (via **FastMCP**): search first, then fetch.\n", | |
"3. Returns markdown from the top doc result.\n", | |
"4. Instruments **Datadog LLM Observability** (agentless).\n", | |
"\n", | |
"> MCP tools exposed by the Learn server:\n", | |
">\n", | |
"> - `microsoft_docs_search(query: string)` — semantic search over Microsoft docs \n", | |
"> - `microsoft_docs_fetch(url: string)` — fetch & convert a docs page to markdown\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "df70611d", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## (Optional) Create `.env` interactively\n", | |
"\n", | |
"If you haven't prepared a `.env` yet, run the cell below to create one **safely** from prompts.\n", | |
"- Keys are collected via `getpass` to avoid echoing them in the notebook.\n", | |
"- If `.env` already exists, this cell will **skip** creation (to avoid overwriting).\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "71fe572e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"import os, sys\n", | |
"from pathlib import Path\n", | |
"from getpass import getpass\n", | |
"\n", | |
"ENV_PATH = Path(\".env\")\n", | |
"if ENV_PATH.exists():\n", | |
" print(\"`.env` already exists — skipping interactive creation. Edit it manually if needed.\")\n", | |
"else:\n", | |
" print(\"Let's create a `.env` file with your settings. Leave blank to accept defaults shown in [brackets].\")\n", | |
" # Azure\n", | |
" azure_endpoint = input(\"AZURE_OPENAI_ENDPOINT [https://<your-resource>.openai.azure.com/]: \").strip() or \"https://<your-resource>.openai.azure.com/\"\n", | |
" azure_api_key = getpass(\"AZURE_OPENAI_API_KEY: \").strip()\n", | |
" azure_api_version = input(\"AZURE_OPENAI_API_VERSION [2024-10-21]: \").strip() or \"2024-10-21\"\n", | |
" azure_deployment = input(\"AZURE_OPENAI_DEPLOYMENT [gpt-4o-mini]: \").strip() or \"gpt-4o-mini\"\n", | |
"\n", | |
" # Datadog (optional)\n", | |
" dd_use = input(\"Configure Datadog LLM Observability? [y/N]: \").strip().lower().startswith(\"y\")\n", | |
" dd_api_key = \"\"\n", | |
" dd_site = \"datadoghq.com\"\n", | |
" dd_app = \"quickstart-app\"\n", | |
" if dd_use:\n", | |
" dd_api_key = getpass(\"DD_API_KEY (leave blank to skip): \").strip()\n", | |
" dd_site_in = input(\"DD_SITE [datadoghq.com]: \").strip()\n", | |
" dd_app_in = input(\"DD_LLMOBS_ML_APP [quickstart-app]: \").strip()\n", | |
" if dd_site_in:\n", | |
" dd_site = dd_site_in\n", | |
" if dd_app_in:\n", | |
" dd_app = dd_app_in\n", | |
"\n", | |
" lines = []\n", | |
" lines.append(f\"AZURE_OPENAI_ENDPOINT={azure_endpoint}\")\n", | |
" lines.append(f\"AZURE_OPENAI_API_KEY={azure_api_key}\")\n", | |
" lines.append(f\"AZURE_OPENAI_API_VERSION={azure_api_version}\")\n", | |
" lines.append(f\"AZURE_OPENAI_DEPLOYMENT={azure_deployment}\")\n", | |
" if dd_use:\n", | |
" if dd_api_key:\n", | |
" lines.append(f\"DD_API_KEY={dd_api_key}\")\n", | |
" lines.append(f\"DD_SITE={dd_site}\")\n", | |
" lines.append(f\"DD_LLMOBS_ML_APP={dd_app}\")\n", | |
"\n", | |
" ENV_PATH.write_text(\"\\n\".join(lines) + \"\\n\", encoding=\"utf-8\")\n", | |
" print(\"✅ Wrote .env (secrets not printed).\")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4f747f56", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## Setup\n", | |
"Install these if needed (restart the kernel after installing if imports fail):\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "59e9030e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"# !pip install --upgrade pip\n", | |
"# !pip install fastmcp langgraph openai python-dotenv ddtrace\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "673bde4b", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## Load environment\n", | |
"This cell loads `.env` (if present), checks required Azure variables, and shows Datadog config presence.\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "91b74617", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"import os\n", | |
"try:\n", | |
" from dotenv import load_dotenv\n", | |
" load_dotenv()\n", | |
"except Exception:\n", | |
" pass\n", | |
"\n", | |
"required = [\"AZURE_OPENAI_ENDPOINT\",\"AZURE_OPENAI_API_KEY\",\"AZURE_OPENAI_API_VERSION\",\"AZURE_OPENAI_DEPLOYMENT\"]\n", | |
"missing = [k for k in required if not os.getenv(k)]\n", | |
"print(\"✅ Azure env looks good.\" if not missing else f\"⚠️ Missing Azure env: {missing}\")\n", | |
"\n", | |
"dd_missing = [k for k in [\"DD_API_KEY\",\"DD_SITE\",\"DD_LLMOBS_ML_APP\"] if not os.getenv(k)]\n", | |
"print(\"ℹ️ Datadog env present.\" if not dd_missing else f\"ℹ️ Datadog optional env missing: {dd_missing}\")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8a87fee8", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## Datadog LLM Observability (agentless, embedded)\n", | |
"\n", | |
"You can alternatively launch your Python with `ddtrace-run`, but this embedded approach enables LLMObs directly inside the notebook.\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "66238bde", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"try:\n", | |
" from ddtrace.llmobs import LLMObs\n", | |
" import os\n", | |
" LLMObs.enable(\n", | |
" ml_app=os.getenv(\"DD_LLMOBS_ML_APP\",\"quickstart-app\"),\n", | |
" api_key=os.getenv(\"DD_API_KEY\"),\n", | |
" site=os.getenv(\"DD_SITE\",\"datadoghq.com\"),\n", | |
" agentless_enabled=True,\n", | |
" )\n", | |
" print(\"✅ Datadog LLMObs agentless enabled (embedded).\")\n", | |
"except Exception as e:\n", | |
" print(\"ℹ️ Datadog LLMObs not enabled (install ddtrace and set DD_API_KEY/DD_SITE). Error:\", str(e))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b058b7b3", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## Build the tiny graph (seed → route → MCP → done)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f8bfea12", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"import random\n", | |
"import asyncio\n", | |
"from dataclasses import dataclass, field\n", | |
"from typing import Any\n", | |
"\n", | |
"# Azure OpenAI client\n", | |
"try:\n", | |
" from openai import AzureOpenAI\n", | |
"except Exception:\n", | |
" from openai import OpenAI as AzureOpenAI # fallback\n", | |
"\n", | |
"def _azure_client():\n", | |
" return AzureOpenAI(\n", | |
" azure_endpoint=os.environ.get(\"AZURE_OPENAI_ENDPOINT\"),\n", | |
" api_key=os.environ.get(\"AZURE_OPENAI_API_KEY\"),\n", | |
" api_version=os.environ.get(\"AZURE_OPENAI_API_VERSION\"),\n", | |
" )\n", | |
"\n", | |
"AZURE_PRODUCTS = [\n", | |
" \"Azure Kubernetes Service\",\n", | |
" \"Azure Functions\",\n", | |
" \"Azure Cosmos DB\",\n", | |
" \"Azure Blob Storage\",\n", | |
" \"Azure Event Hubs\",\n", | |
" \"Azure App Service\",\n", | |
" \"Azure Virtual Network\",\n", | |
" \"Azure Key Vault\",\n", | |
" \"Azure Monitor\",\n", | |
" \"Azure AI Search\",\n", | |
"]\n", | |
"\n", | |
"@dataclass\n", | |
"class AgentState:\n", | |
" product: str = \"\"\n", | |
" question: str = \"\"\n", | |
" tool_choice: str = \"\" # 'microsoft_docs_search' or 'microsoft_docs_fetch'\n", | |
" search_payload: Any = None\n", | |
" fetched_markdown: str = \"\"\n", | |
" final_answer: str = \"\"\n", | |
" debug: dict = field(default_factory=dict)\n", | |
"\n", | |
"def seed_node(state: AgentState) -> AgentState:\n", | |
" product = random.choice(AZURE_PRODUCTS)\n", | |
" state.product = product\n", | |
" state.question = f\"What is {product}?\"\n", | |
" state.debug[\"seed\"] = {\"product\": product}\n", | |
" return state\n", | |
"\n", | |
"def route_node(state: AgentState) -> AgentState:\n", | |
" client = _azure_client()\n", | |
" deployment = os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\")\n", | |
" system = (\n", | |
" \"You are a router for Microsoft Learn MCP tools.\\n\"\n", | |
" \"Available tools:\\n\"\n", | |
" \"- microsoft_docs_search(query: string)\\n\"\n", | |
" \"- microsoft_docs_fetch(url: string)\\n\\n\"\n", | |
" \"If the user asks 'what is <service>?', choose microsoft_docs_search.\\n\"\n", | |
" \"Return ONLY one token: microsoft_docs_search or microsoft_docs_fetch.\"\n", | |
" )\n", | |
" resp = client.chat_completions.create( # Support both new and old SDKs\n", | |
" model=deployment,\n", | |
" messages=[\n", | |
" {\"role\": \"system\", \"content\": system},\n", | |
" {\"role\": \"user\", \"content\": state.question},\n", | |
" ],\n", | |
" temperature=0,\n", | |
" ) if hasattr(client, \"chat_completions\") else client.chat.completions.create(\n", | |
" model=deployment,\n", | |
" messages=[\n", | |
" {\"role\": \"system\", \"content\": system},\n", | |
" {\"role\": \"user\", \"content\": state.question},\n", | |
" ],\n", | |
" temperature=0,\n", | |
" )\n", | |
" # Normalize response extraction\n", | |
" content = None\n", | |
" try:\n", | |
" content = resp.choices[0].message.content.strip()\n", | |
" except Exception:\n", | |
" # Some SDKs use different fields\n", | |
" content = resp.choices[0].message.get(\"content\",\"\").strip()\n", | |
"\n", | |
" state.tool_choice = \"microsoft_docs_fetch\" if content and \"fetch\" in content.lower() else \"microsoft_docs_search\"\n", | |
" state.debug[\"route\"] = {\"raw\": content, \"tool_choice\": state.tool_choice}\n", | |
" return state\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "42e79e75", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"from fastmcp import Client as FastMCPClient\n", | |
"\n", | |
"MCP_URL = \"https://learn.microsoft.com/api/mcp\"\n", | |
"\n", | |
"async def mcp_call_async(state: AgentState) -> AgentState:\n", | |
" client = FastMCPClient(MCP_URL)\n", | |
" async with client:\n", | |
" # Optional: list tools\n", | |
" try:\n", | |
" tools = await client.list_tools()\n", | |
" state.debug[\"available_tools\"] = [t.get(\"name\") or t.get(\"tool\") for t in tools]\n", | |
" except Exception:\n", | |
" state.debug[\"available_tools\"] = []\n", | |
"\n", | |
" # Start with search if we don't yet have a URL\n", | |
" if state.tool_choice == \"microsoft_docs_fetch\":\n", | |
" state.tool_choice = \"microsoft_docs_search\"\n", | |
"\n", | |
" # 1) Search\n", | |
" search = await client.call_tool(\"microsoft_docs_search\", {\"query\": state.question})\n", | |
" state.search_payload = search\n", | |
"\n", | |
" # Extract first URL from nested shapes\n", | |
" def _extract_urls(obj):\n", | |
" urls = []\n", | |
" if isinstance(obj, dict):\n", | |
" for k in (\"url\",\"source_url\",\"href\",\"link\"):\n", | |
" v = obj.get(k)\n", | |
" if isinstance(v, str):\n", | |
" urls.append(v)\n", | |
" for v in obj.values():\n", | |
" urls.extend(_extract_urls(v))\n", | |
" elif isinstance(obj, list):\n", | |
" for it in obj:\n", | |
" urls.extend(_extract_urls(it))\n", | |
" return urls\n", | |
"\n", | |
" urls = _extract_urls(search)\n", | |
" top_url = urls[0] if urls else None\n", | |
" if not top_url:\n", | |
" state.final_answer = \"Search returned no obvious URL. Raw payload is in state.search_payload.\"\n", | |
" state.debug[\"no_url\"] = True\n", | |
" return state\n", | |
"\n", | |
" # 2) Fetch markdown\n", | |
" fetched = await client.call_tool(\"microsoft_docs_fetch\", {\"url\": top_url})\n", | |
"\n", | |
" # Flatten to markdown-ish text\n", | |
" def _flatten_text(x):\n", | |
" parts = []\n", | |
" if isinstance(x, dict):\n", | |
" for key in (\"text\",\"markdown\",\"data\",\"content\"):\n", | |
" val = x.get(key)\n", | |
" if isinstance(val, str):\n", | |
" parts.append(val)\n", | |
" for v in x.values():\n", | |
" parts.extend(_flatten_text(v))\n", | |
" elif isinstance(x, list):\n", | |
" for it in x:\n", | |
" parts.extend(_flatten_text(it))\n", | |
" elif isinstance(x, str):\n", | |
" parts.append(x)\n", | |
" return parts\n", | |
"\n", | |
" chunks = _flatten_text(fetched)\n", | |
" md = \"\\n\\n\".join([c for c in chunks if isinstance(c, str)])\n", | |
" state.fetched_markdown = md or str(fetched)\n", | |
" state.final_answer = state.fetched_markdown[:4000]\n", | |
" state.debug[\"fetch_url\"] = top_url\n", | |
" return state\n", | |
"\n", | |
"def mcp_call_node(state: AgentState) -> AgentState:\n", | |
" # Jupyter-friendly event loop handling\n", | |
" try:\n", | |
" loop = asyncio.get_event_loop()\n", | |
" if loop.is_running():\n", | |
" import threading, queue\n", | |
" q = queue.Queue()\n", | |
" def runner(q):\n", | |
" res = asyncio.new_event_loop()\n", | |
" asyncio.set_event_loop(res)\n", | |
" out = res.run_until_complete(mcp_call_async(state))\n", | |
" q.put(out)\n", | |
" res.close()\n", | |
" t = threading.Thread(target=runner, args=(q,), daemon=True)\n", | |
" t.start(); t.join()\n", | |
" return q.get()\n", | |
" else:\n", | |
" return loop.run_until_complete(mcp_call_async(state))\n", | |
" except RuntimeError:\n", | |
" return asyncio.run(mcp_call_async(state))\n", | |
"\n", | |
"def done_node(state: AgentState) -> AgentState:\n", | |
" if not state.final_answer:\n", | |
" state.final_answer = f\"**{state.product}** — no markdown available. Debug: {state.debug}\"\n", | |
" return state\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a174d9ad", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"from langgraph.graph import StateGraph, END\n", | |
"\n", | |
"graph = StateGraph(AgentState)\n", | |
"graph.add_node(\"seed\", seed_node)\n", | |
"graph.add_node(\"route\", route_node)\n", | |
"graph.add_node(\"mcp_call\", mcp_call_node)\n", | |
"graph.add_node(\"done\", done_node)\n", | |
"\n", | |
"graph.set_entry_point(\"seed\")\n", | |
"graph.add_edge(\"seed\", \"route\")\n", | |
"graph.add_edge(\"route\", \"mcp_call\")\n", | |
"graph.add_edge(\"mcp_call\", \"done\")\n", | |
"graph.add_edge(\"done\", END)\n", | |
"\n", | |
"app = graph.compile()\n", | |
"print(\"✅ Graph compiled.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "fdfec777", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"## Run it\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3d1c2396", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"state = app.invoke(AgentState())\n", | |
"print(\"### QUESTION\")\n", | |
"print(state.question)\n", | |
"print(\"\\n### ANSWER (truncated)\")\n", | |
"print(state.final_answer[:2000])\n", | |
"print(\"\\n---\\nDEBUG keys:\", list(state.debug.keys()))\n", | |
"if \"fetch_url\" in state.debug:\n", | |
" print(\"Fetched URL:\", state.debug[\"fetch_url\"])\n" | |
] | |
} | |
], | |
"metadata": {}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment