{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cb10925c-cccb-420e-be3b-b5ca49ad5cf5",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Compare class with morph where morph=ADV (N1904LFT)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5e4bdf3-b108-4c6a-b99b-a4bf4723afee",
   "metadata": {},
   "source": [
    "The following script reads the xml atributes class and morph for tag w and compare them. It will analyse the words whenever morph atribute is 'adv' and the class atribute is unequal to 'adv'. It prints first a number of examples (with verse/word location) and finishes with a table showing the frequency of the cases where morph=adv is not matching class=adv."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "aa00a05d-0251-4510-87b3-94a79dbb4b9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Comparing atributes class morph for file xml/20230628/01-matthew.xml\n",
      "\n",
      "Result:\n",
      "\n",
      "At ref=MAT 1:17!7 found class=prep and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 1:17!14 found class=prep and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 1:17!25 found class=prep and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 1:24!9 found class=conj and morph=ADV for lemma=ὡς\n",
      "At ref=MAT 1:25!5 found class=prep and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 2:8!18 found class=conj and morph=ADV for lemma=ὅπως\n",
      "At ref=MAT 2:9!18 found class=conj and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 2:9!21 found class=prep and morph=ADV for lemma=ἐπάνω\n",
      "At ref=MAT 2:13!28 found class=conj and morph=ADV for lemma=ἕως\n",
      "At ref=MAT 2:15!4 found class=prep and morph=ADV for lemma=ἕως\n",
      "╒═══════════════════════════════════════╤═════════════╕\n",
      "│ lemma, morph, class                   │   frequency │\n",
      "╞═══════════════════════════════════════╪═════════════╡\n",
      "│ lemma=ὡς, morph=ADV, class=conj       │          40 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἕως, morph=ADV, class=prep      │          35 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὅπως, morph=ADV, class=conj     │          17 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἕως, morph=ADV, class=conj      │          14 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὅτε, morph=ADV, class=conj      │          12 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὥσπερ, morph=ADV, class=conj    │          10 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἐπάνω, morph=ADV, class=prep    │           8 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὕστερος, morph=ADV, class=adj   │           7 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=μόνος, morph=ADV, class=adj     │           7 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὅπου, morph=ADV, class=conj     │           6 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὀπίσω, morph=ADV, class=prep    │           5 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=εὐθύς, morph=ADV, class=adj     │           5 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=πλήν, morph=ADV, class=conj     │           5 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=πέραν, morph=ADV, class=prep    │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ταχύς, morph=ADV, class=adj     │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἔξω, morph=ADV, class=prep      │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=χωρίς, morph=ADV, class=prep    │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὅθεν, morph=ADV, class=conj     │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=καθώς, morph=ADV, class=conj    │           3 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὡσεί, morph=ADV, class=conj     │           2 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=μέχρι, morph=ADV, class=prep    │           2 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἅμα, morph=ADV, class=prep      │           2 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=μεταξύ, morph=ADV, class=prep   │           2 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=παρεκτός, morph=ADV, class=prep │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=μακράν, morph=ADV, class=adj    │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=πῶς, morph=ADV, class=conj      │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὄπισθεν, morph=ADV, class=prep  │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=οὗ, morph=ADV, class=conj       │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὑποκάτω, morph=ADV, class=prep  │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ἄχρι, morph=ADV, class=prep     │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=καθά, morph=ADV, class=conj     │           1 │\n",
      "├───────────────────────────────────────┼─────────────┤\n",
      "│ lemma=ὀψέ, morph=ADV, class=prep      │           1 │\n",
      "╘═══════════════════════════════════════╧═════════════╛\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import xml.etree.ElementTree as ET\n",
    "from tabulate import tabulate\n",
    "\n",
    "ResultDict = {}\n",
    "\n",
    "def compare_class_and_morph(file_path):\n",
    "    ExampleNumber = 0\n",
    "    tree = ET.parse(file_path)\n",
    "    root = tree.getroot()\n",
    "\n",
    "    for w_tag in root.iter('w'):\n",
    "        # Extract attributes class and morph for the tag w\n",
    "        class_attr = w_tag.get('class')\n",
    "        morph_attr = w_tag.get('morph')\n",
    "        lemma_attr = w_tag.get('lemma')\n",
    "        ref_attr = w_tag.get('ref')\n",
    "\n",
    "        # Compare class and morph attributes\n",
    "        if morph_attr.lower()=='adv' and class_attr!='adv':\n",
    "            ExampleNumber += 1\n",
    "\n",
    "            Mapping=f\"lemma={lemma_attr}, morph={morph_attr}, class={class_attr}\"    \n",
    "            # Check if this Change already exists in ResultDict\n",
    "            if Mapping in ResultDict:\n",
    "               # If it exists, add the count to the existing value\n",
    "               ResultDict[Mapping]+=1\n",
    "            else:\n",
    "               # If it doesn't exist, initialize the count as the value\n",
    "               ResultDict[Mapping]=1\n",
    "            if ExampleNumber<=NumberExamples: \n",
    "               print(f\"At ref={ref_attr} found class={class_attr} and morph={morph_attr} for lemma={lemma_attr}\")\n",
    "\n",
    "    return \n",
    "\n",
    "# Following variable should contain the relative path and name of file to check\n",
    "InputFile=\"xml/20230628/01-matthew.xml\"\n",
    "# How many difference to show prior to table\n",
    "NumberExamples = 10\n",
    "\n",
    "# First check if the file exists, then analyze its content\n",
    "if os.path.exists(InputFile):\n",
    "    print(f\"Comparing atributes class morph for file {InputFile}\\n\\nResult:\\n\\n\", end=\"\")\n",
    "    differences = compare_class_and_morph(InputFile)\n",
    "    \n",
    "    # Convert the dictionary into a list of key-value pairs and sort it according to frequency\n",
    "    UnsortedTableData = [[key, value] for key, value in ResultDict.items()]\n",
    "    TableData= sorted(UnsortedTableData, key=lambda row: row[1], reverse=True)\n",
    "\n",
    "    # Produce the table\n",
    "    headers = [\"lemma, morph, class\",\"frequency\"]\n",
    "    print(tabulate(TableData, headers=headers, tablefmt='fancy_grid'))\n",
    "else:\n",
    "    print(f\"Could not find file {InputFile}.\")\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6095ea9-0f56-4526-a49a-547180f64b46",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}