{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using LangChain to get structured outputs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Exception reporting mode: Minimal\n"
     ]
    }
   ],
   "source": [
    " %xmode minimal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_anthropic import ChatAnthropic\n",
    "from langchain_ollama import ChatOllama\n",
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "import streamlit as st"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1 - Claude\n",
    "# 2 - Ollama, Llama 3.2\n",
    "# 3 - Ollama, Llama 3.2 with JSON mode\n",
    "# 4 - Ollama, Gemma2\n",
    "# 5 - Ollama, Gemma2 with JSON mode\n",
    "USE_LLM = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "claude_api_key = \"<API KEY>\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "if USE_LLM == 1:\n",
    "    llm_model = ChatAnthropic(model=\"claude-3-haiku-20240307\", api_key=claude_api_key)\n",
    "elif USE_LLM == 2:\n",
    "    llm_model = ChatOllama(model=\"llama3.2\", temperature=1)\n",
    "elif USE_LLM == 3:\n",
    "    llm_model = ChatOllama(model=\"llama3.2\", format=\"json\", temperature=1)\n",
    "elif USE_LLM == 4:\n",
    "    llm_model = ChatOllama(model=\"gemma2\", temperature=1)\n",
    "elif USE_LLM == 5:\n",
    "    llm_model = ChatOllama(model=\"gemma2\", format=\"json\", temperature=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Method 1: Structured output using the tool-calling API under the hood\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can define a Pydantic model and the output will be returned as a Pydantic object with validation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!', rating=8)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from typing import Optional\n",
    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Joke(BaseModel):\n",
    "    \"\"\"Joke to tell user.\"\"\"\n",
    "\n",
    "    setup: str = Field(description=\"The setup of the joke\")\n",
    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
    "    rating: int = Field(description=\"How funny the joke is, from 1 to 10\")\n",
    "\n",
    "\n",
    "structured_llm = llm_model.with_structured_output(Joke)\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Defining the schema using a TypedDict parses the JSON output into a Python dict not a Pydantic object so there's no schema validation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'punchline': 'Why did the cat join a band?',\n",
       " 'rating': '8',\n",
       " 'setup': 'Because it wanted to be the purr-cussionist!'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from typing_extensions import Annotated, TypedDict\n",
    "\n",
    "\n",
    "class JokeTD(TypedDict):\n",
    "    \"\"\"Joke to tell user.\"\"\"\n",
    "\n",
    "    setup: Annotated[str, ..., \"The setup of the joke\"]\n",
    "    punchline: Annotated[str, ..., \"The punchline of the joke\"]\n",
    "    rating: Annotated[Optional[int], ..., \"How funny the joke is, from 1 to 10\"]\n",
    "\n",
    "\n",
    "structured_llm = llm_model.with_structured_output(JokeTD)\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or just extract the JSON Schema object\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'punchline': 'Why did the cat join a band?',\n",
       " 'rating': 8,\n",
       " 'setup': 'Because it wanted to be a purr-cussionist.'}"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "structured_llm = llm_model.with_structured_output(Joke.model_json_schema())\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's try a more complicated structure with nested types\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValidationError",
     "evalue": "1 validation error for ArticleResponse\nhistorical_timeline\n  Input should be a valid list [type=list_type, input_value='[\"1528: Spanish explorer...he American Civil War\"]', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.9/v/list_type",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31mValidationError\u001b[0m\u001b[0;31m:\u001b[0m 1 validation error for ArticleResponse\nhistorical_timeline\n  Input should be a valid list [type=list_type, input_value='[\"1528: Spanish explorer...he American Civil War\"]', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.9/v/list_type\n"
     ]
    }
   ],
   "source": [
    "class ArticleResponse(BaseModel):\n",
    "    \"\"\"A clear and concise answer to the users question.\"\"\"\n",
    "\n",
    "    title: str = Field(description=\"Title of the article\")\n",
    "    context: str = Field(\n",
    "        description=\"Provide a brief historical context to answer the question.\"\n",
    "    )\n",
    "    historical_timeline: list[str] = Field(\n",
    "        description=\"Provide a list of historical events relevant to the question\"\n",
    "    )\n",
    "\n",
    "\n",
    "structured_llm = llm_model.with_structured_output(ArticleResponse)\n",
    "structured_llm.invoke(\"Tell me the history of the state of Texas in America\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ValidationError: 1 validation error for ArticleResponse\n",
      "historical_timeline\n",
      "  Input should be a valid list [type=list_type, input_value='[\"1763: Texas becomes pa...ate States of America\"]', input_type=str]\n",
      "    For further information visit https://errors.pydantic.dev/2.9/v/list_type\n",
      "\n",
      "Raw output:\n",
      "{'context': 'Texas has a rich and diverse history that spans over 300 years, from its early days as a Mexican territory to its current status as the second-largest state in the US.', 'historical_timeline': '[\"1763: Texas becomes part of Spain after Mexico gains independence from Spain\", \"1821: The Texas Revolution begins with the Battle of Gonzales and the famous \"Come and Take It\" declaration\", \"1836: Texas declares independence from Mexico and establishes the Republic of Texas\", \"1845: The US purchases Texas from Mexico for $15 million and it becomes a state\", \"1860s: Texas secedes from the Union during the American Civil War and joins the Confederate States of America\"]', 'title': 'The History of Texas'}\n"
     ]
    }
   ],
   "source": [
    "structured_llm = llm_model.with_structured_output(ArticleResponse, include_raw=True)\n",
    "results = structured_llm.invoke(\"Tell me the history of the state of Texas in America\")\n",
    "raw_output = results[\"raw\"].response_metadata[\"message\"][\"tool_calls\"][0][\"function\"][\n",
    "    \"arguments\"\n",
    "]\n",
    "\n",
    "try:\n",
    "    ArticleResponse(**raw_output)\n",
    "except Exception as e:\n",
    "    print(f\"{type(e).__name__}: {str(e)}\")\n",
    "    print(f\"\\nRaw output:\\n{raw_output}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can directly create the JSON schema object from the Pydantic object and we get the raw dict output without Pydantic validation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'context': 'The state of Texas is located in the south-central region of the United States. It is bordered by Mexico to the south, New Mexico and Oklahoma to the west, Arkansas and Louisiana to the east, and the Gulf of Mexico to the southeast.',\n",
       " 'historical_timeline': '[\"Texas declared independence from Mexico on March 2, 1836\", \"The Republic of Texas existed as a separate nation for nearly a decade before being annexed by the United States in 1845\", \"The Texas Civil War broke out in 1861 over secession from the Union and was fought between pro-Union forces and Confederate states rights advocates\", \"After the end of the American Civil War, Reconstruction efforts took place in Texas during the late 19th century\", \"The early 20th century saw significant industrialization and urbanization in Texas\", \"In 1948, the federal government established several national parks within Texas to preserve its unique natural environment\"]',\n",
       " 'title': 'A Brief History of the State of Texas'}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "structured_llm_js = llm_model.with_structured_output(\n",
    "    ArticleResponse.model_json_schema()\n",
    ")\n",
    "structured_llm_js.invoke(\"Tell me the history of the state of Texas in America\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Under the hood: How Pydantic models are converted to JSONSchema\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The JSON schema representation is quite straightforward\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'description': 'Joke to tell user.',\n",
       " 'properties': {'setup': {'description': 'The setup of the joke',\n",
       "   'title': 'Setup',\n",
       "   'type': 'string'},\n",
       "  'punchline': {'description': 'The punchline to the joke',\n",
       "   'title': 'Punchline',\n",
       "   'type': 'string'},\n",
       "  'rating': {'description': 'How funny the joke is, from 1 to 10',\n",
       "   'title': 'Rating',\n",
       "   'type': 'integer'}},\n",
       " 'required': ['setup', 'punchline', 'rating'],\n",
       " 'title': 'Joke',\n",
       " 'type': 'object'}"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Joke.model_json_schema()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note the same schema is contained in the format instructions, expect for 'title' and 'type'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
      "\n",
      "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
      "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
      "\n",
      "Here is the output schema:\n",
      "```\n",
      "{\"description\": \"Joke to tell user.\", \"properties\": {\"setup\": {\"description\": \"The setup of the joke\", \"title\": \"Setup\", \"type\": \"string\"}, \"punchline\": {\"description\": \"The punchline to the joke\", \"title\": \"Punchline\", \"type\": \"string\"}, \"rating\": {\"description\": \"How funny the joke is, from 1 to 10\", \"title\": \"Rating\", \"type\": \"integer\"}}, \"required\": [\"setup\", \"punchline\", \"rating\"]}\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "from langchain_core.output_parsers import PydanticOutputParser\n",
    "\n",
    "output_parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "print(output_parser.get_format_instructions())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Method 2: JSON formating instructions\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the PydanticOutputParser allows us to specify JSON outputs for other models that don't support tool calling.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "ename": "OutputParserException",
     "evalue": "Failed to parse Joke from completion {\"message\": \"Why did the cat join a band? Because it wanted to be the purr-cussionist!\"}. Got: 3 validation errors for Joke\nsetup\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\npunchline\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\nrating\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31mOutputParserException\u001b[0m\u001b[0;31m:\u001b[0m Failed to parse Joke from completion {\"message\": \"Why did the cat join a band? Because it wanted to be the purr-cussionist!\"}. Got: 3 validation errors for Joke\nsetup\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\npunchline\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\nrating\n  Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.9/v/missing\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE\n"
     ]
    }
   ],
   "source": [
    "from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser\n",
    "from langchain_core.exceptions import OutputParserException\n",
    "\n",
    "parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\n",
    "            \"system\",\n",
    "            \"Answer the user query. Wrap the output in `json` tags\\n{format_instructions}\",\n",
    "        ),\n",
    "        (\"human\", \"{query}\"),\n",
    "    ]\n",
    ").partial(format_instructions=parser.get_format_instructions())\n",
    "\n",
    "chain_llm = prompt | llm_model | parser\n",
    "chain_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"properties\": {\"setup\": {\"title\": \"Why did the cat join a band?\", \"description\": \"Because it wanted to be the purr-cussionist.\", \"type\": \"string\"}, \"punchline\": {\"title\": \"Purr-cussionist joke\", \"description\": \"a play on words\", \"type\": \"string\"}, \"rating\": {\"title\": \"Funny rating out of 10\", \"description\": \"How funny is this cat joke?\", \"type\": \"integer\"}}, \"required\": [\"setup\", \"punchline\", \"rating\"]}\n"
     ]
    }
   ],
   "source": [
    "prompt_user_format = ChatPromptTemplate.from_template(\n",
    "    \"{input} \\n{format_instructions}\"\n",
    ").partial(format_instructions=parser.get_format_instructions())\n",
    "\n",
    "structured_llm = prompt_user_format | llm_model | StrOutputParser()\n",
    "print(structured_llm.invoke(\"Tell me a joke about cats\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Structure output with Pydantic validation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm_model = ChatOllama(model=\"llama3.2\", temperature=1)\n",
    "\n",
    "chain = prompt_direct | llm_model.with_structured_output(schema=ArticleResponse1)\n",
    "\n",
    "try:\n",
    "    output = chain.invoke(dict(question=questions[0]))\n",
    "except Exception as e:\n",
    "    print(f\"{type(e).__name__}: {str(e)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Structured output without Pydantic validation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'dict'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'answer': 'The oldest recorded fossil dates back to around 3.5 billion years ago, during a time known as the Eoarchean era of the Precambrian period. This ancient relic is called Strelley Pool fossil, found in Western Australia.',\n",
       " 'number': 3400000000,\n",
       " 'title': 'Uncovering the Ancient Past: The Oldest Recorded Fossil'}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "llm_model = ChatOllama(model=\"llama3.2\", temperature=1)\n",
    "\n",
    "chain = prompt_direct | llm_model.with_structured_output(\n",
    "    schema=ArticleResponse1.model_json_schema()\n",
    ")\n",
    "output = chain.invoke(dict(question=questions[0]))\n",
    "print(type(output))\n",
    "output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Structured output using output parsers\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using a system prompt seems much less reliable than just inserting the format instructions into a user prompt. Why is this?\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OutputParserException: Invalid json output: {\"title\": \"Uncovering the Oldest Record of Life on Earth\", \"answer\": \"The oldest recorded fossil is believed to be Strome Canyon chert, which dates back approximately 3.46 billion years. This ancient relic was discovered in Western Australia and provides a glimpse into the earliest life forms on our planet. The fossils were found embedded in a rock formation that has been dated using various geological methods, including uranium-lead dating. This incredible find has sparked significant interest in the scientific community, shedding light on the origins of life on Earth.\", \"number\": 3,430,000,000}\n",
      "For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE\n"
     ]
    }
   ],
   "source": [
    "from langchain_core.output_parsers import PydanticOutputParser, JsonOutputParser\n",
    "from langchain.output_parsers.fix import OutputFixingParser\n",
    "\n",
    "parser = JsonOutputParser(pydantic_object=ArticleResponse1)\n",
    "prompt = prompt_user_format.partial(\n",
    "    format_instructions=parser.get_format_instructions()\n",
    ")\n",
    "\n",
    "structured_llm = prompt | llm_model | parser\n",
    "\n",
    "try:\n",
    "    output = structured_llm.invoke(dict(question=questions[0]))\n",
    "    print(output)\n",
    "\n",
    "except Exception as e:\n",
    "    print(f\"{type(e).__name__}: {str(e)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OutputParserException: Invalid json output: **Uncovering the Oldest Fossil: A Window into Earth's Ancient Past**\n",
      "\n",
      "The search for ancient secrets in the earth's crust has led us to discover one of the most significant fossils in human history. Meet Archeopteryx, a 150-million-year-old bird-like creature that sheds light on the evolution of life on our planet.\n",
      "\n",
      "Discovered in 1861 by German paleontologist Hermann von Meyer, Archeopteryx was initially thought to be a mere hybrid between dinosaurs and birds. However, further analysis revealed its unique characteristics, including feathers, wings, and claws. This remarkable fossil has been extensively studied, providing insights into the transition from non-flying reptiles to birds.\n",
      "\n",
      "Other contenders for the oldest recorded fossil include Tiktaalik, an ancient fish-like creature with limb-like fins, and Hallucigenia, a bizarre worm-like animal with spines on its back. However, Archeopteryx remains one of the most significant discoveries in paleontology, offering a glimpse into the mysteries of life's diversification during the Jurassic period. This ancient bird-like creature serves as a poignant reminder of our shared history with the natural world.\n",
      "For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE\n"
     ]
    }
   ],
   "source": [
    "parser = JsonOutputParser(pydantic_object=ArticleResponse1)\n",
    "prompt = prompt_system_format.partial(\n",
    "    format_instructions=parser.get_format_instructions()\n",
    ")\n",
    "\n",
    "structured_llm = prompt | llm_model | parser\n",
    "try:\n",
    "    output = structured_llm.invoke(dict(question=questions[0]))\n",
    "    print(output)\n",
    "except Exception as e:\n",
    "    print(f\"{type(e).__name__}: {str(e)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fixing the output with `OutputFixingParser`, it could be better to use another model with lower temperature instead of the original model?\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OutputParserException: Failed to parse ArticleResponse2 from completion {\"$defs\": {\"HistoricalEvent\": {\"description\": \"The year and explanation of a historical event.\", \"properties\": {\"year\": {\"description\": \"The year of the historical event\", \"title\": \"Year\", \"type\": \"integer\"}, \"description\": {\"description\": \"A clear description of what happened in this event\", \"title\": \"Description\", \"type\": \"string\"}}, \"required\": [\"year\", \"description\"], \"title\": \"HistoricalEvent\", \"type\": \"object\"}}, \"description\": \"Structured article for publication answering a reader's question.\", \"properties\": {\"title\": {\"description\": \"Title of the article\", \"title\": \"Title\", \"type\": \"string\"}, \"historical_event_1\": {\"$ref\": \"#/$defs/HistoricalEvent\", \"description\": \"The oldest recorded fossil is that of Dickinsonia, a species of ancient animal that lived over 600 million years ago. Discovered in 1909 by Russian paleontologist Raup in what is now present-day Australia, this fossil provides a glimpse into the evolution of life on Earth.\", \"year\": 375000000}, \"historical_event_2\": {\"$ref\": \"#/$defs/HistoricalEvent\", \"description\": \"Although not the oldest recorded fossil, the discovery of Tiktaalik in 2004 has helped scientists better understand the transition from fish to tetrapods during the Devonian period.\", \"year\": 30000000}}, \"required\": [\"title\", \"historical_event_1\", \"historical_event_2\"]}. Got: 3 validation errors for ArticleResponse2\n",
      "title\n",
      "  Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict]\n",
      "    For further information visit https://errors.pydantic.dev/2.9/v/missing\n",
      "historical_event_1\n",
      "  Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict]\n",
      "    For further information visit https://errors.pydantic.dev/2.9/v/missing\n",
      "historical_event_2\n",
      "  Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict]\n",
      "    For further information visit https://errors.pydantic.dev/2.9/v/missing\n",
      "For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE\n"
     ]
    }
   ],
   "source": [
    "parser = PydanticOutputParser(pydantic_object=ArticleResponse2)\n",
    "prompt = prompt_user_format.partial(\n",
    "    format_instructions=parser.get_format_instructions()\n",
    ")\n",
    "llm_model_fix = ChatOllama(model=\"llama3.2\", temperature=0)\n",
    "\n",
    "parser_fix = OutputFixingParser.from_llm(parser=parser, llm=llm_model_fix)\n",
    "\n",
    "try:\n",
    "    structured_llm = prompt | llm_model | parser_fix\n",
    "    output = structured_llm.invoke(dict(question=questions[0]))\n",
    "    print(output)\n",
    "except Exception as e:\n",
    "    print(f\"{type(e).__name__}: {str(e)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Output without stucture\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\"The Oldest Recorded Fossil: Uncovering Earth's Ancient Past\"\n",
      "\n",
      "For over a century, scientists have been fascinated by fossils – remnants of ancient creatures that once roamed our planet. Among these relics, one fossil stands out for its remarkable age and significance.\n",
      "\n",
      "Meet the fossilized remains of \"Hallucigenia,\" a bizarre creature discovered in 1909 in what is now western Australia. Initially dismissed as a curiosity, recent dating analysis has pushed its estimated age back by millions of years – placing it at an astonishing 500 million years old!\n",
      "\n",
      "To put that in perspective, when Hallucigenia lived, the Earth was still in its Neoproterozoic era, long before dinosaurs roamed the planet. This ancient relic provides a glimpse into the evolution of life on our planet during one of its most formative periods.\n",
      "\n",
      "The discovery of Hallucigenia serves as a reminder that fossils hold secrets to understanding the complex history of our world. Each new find offers insights into Earth's past, shedding light on the mysteries of how life evolved and adapted over millions of years.\n"
     ]
    }
   ],
   "source": [
    "structured_llm = prompt_direct | llm_model\n",
    "output = structured_llm.invoke(dict(question=questions[0]))\n",
    "print(output.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which models support what?\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Joke(BaseModel):\n",
    "    \"\"\"Joke to tell user.\"\"\"\n",
    "\n",
    "    setup: str = Field(description=\"The setup of the joke\")\n",
    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
    "    rating: int = Field(description=\"How funny the joke is, from 1 to 10\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm_models = {\n",
    "    # \"Anthropic_Haiku\": ChatAnthropic(model=\"claude-3-haiku-20240307\", api_key=claude_api_key),\n",
    "    \"Ollama_llama32\": ChatOllama(model=\"llama3.2\", temperature=1),\n",
    "    \"Ollama_llama32_json\": ChatOllama(model=\"llama3.2\", format=\"json\", temperature=1),\n",
    "    \"Ollama_gemma2\": ChatOllama(model=\"gemma2\", temperature=1),\n",
    "    \"Ollama_gemma2_json\": ChatOllama(model=\"gemma2\", format=\"json\", temperature=1),\n",
    "    \"Ollama_phi3\": ChatOllama(model=\"phi3\", temperature=1),\n",
    "    \"Ollama_phi3_json\": ChatOllama(model=\"phi3\", format=\"json\", temperature=1),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: ChatOllama(model='llama3.2', temperature=1.0)\n",
      "  Tool use support\n",
      "Model: ChatOllama(model='llama3.2', temperature=1.0, format='json')\n",
      "  Tool use support\n",
      "Model: ChatOllama(model='gemma2', temperature=1.0)\n",
      "  No tool use\n",
      "Model: ChatOllama(model='gemma2', temperature=1.0, format='json')\n",
      "  No tool use\n",
      "Model: ChatOllama(model='phi3', temperature=1.0)\n",
      "  No tool use\n",
      "Model: ChatOllama(model='phi3', temperature=1.0, format='json')\n",
      "  No tool use\n"
     ]
    }
   ],
   "source": [
    "for llm_model in llm_models.values():\n",
    "    print(f\"Model: {llm_model.__repr__()}\")\n",
    "    test_structured_llm = llm_model.with_structured_output(JokeTD)\n",
    "    try:\n",
    "        output = test_structured_llm.invoke(\"Tell me a joke about cats\")\n",
    "        print(\"  Tool use support\")\n",
    "    except Exception as e:\n",
    "        print(\"  No tool use\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchain-simple-examples-IVmI2CVJ-py3.12",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}