{ "cells": [ { "cell_type": "markdown", "id": "e8b09e8e-7ffb-4fb8-86ee-16b1f2bf0f80", "metadata": {}, "source": [ "# Difference between feature 'word' and 'unicode'" ] }, { "cell_type": "markdown", "id": "1a22bdfd-2f51-4383-b2a9-a78818e969ae", "metadata": {}, "source": [ "The following small script shows the differences between feature 'word' and 'unicode'." ] }, { "cell_type": "code", "execution_count": 1, "id": "93a208bc-058f-45ae-81cf-2371e9158f4f", "metadata": {}, "outputs": [], "source": [ "# Following variables should contain the relative path and name of the two files to compare\n", "WordFile=\"../tf/0.4/word.tf\"\n", "UnicodeFile=\"../tf/0.4/unicode.tf\"\n", "# How many difference to show\n", "NumberExamples = 10" ] }, { "cell_type": "code", "execution_count": 2, "id": "b1005f2a-dd1c-45a0-ab4c-6282b9486a5a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Comparing file ../tf/0.4/word.tf with ../tf/0.4/unicode.tf \n", "\n", "Result:\n", "\n", "Line 28 differs:\n", "File ../tf/0.4/word.tf: Ἀβραάμ\n", "File ../tf/0.4/unicode.tf: Ἀβραάμ.\n", "\n", "Line 32 differs:\n", "File ../tf/0.4/word.tf: Ἰσαάκ\n", "File ../tf/0.4/unicode.tf: Ἰσαάκ,\n", "\n", "Line 37 differs:\n", "File ../tf/0.4/word.tf: Ἰακώβ\n", "File ../tf/0.4/unicode.tf: Ἰακώβ,\n", "\n", "Line 46 differs:\n", "File ../tf/0.4/word.tf: αὐτοῦ\n", "File ../tf/0.4/unicode.tf: αὐτοῦ,\n", "\n", "Line 57 differs:\n", "File ../tf/0.4/word.tf: Θάμαρ\n", "File ../tf/0.4/unicode.tf: Θάμαρ,\n", "\n", "Line 62 differs:\n", "File ../tf/0.4/word.tf: Ἐσρώμ\n", "File ../tf/0.4/unicode.tf: Ἐσρώμ,\n", "\n", "Line 67 differs:\n", "File ../tf/0.4/word.tf: Ἀράμ\n", "File ../tf/0.4/unicode.tf: Ἀράμ,\n", "\n", "Line 72 differs:\n", "File ../tf/0.4/word.tf: Ἀμιναδάβ\n", "File ../tf/0.4/unicode.tf: Ἀμιναδάβ,\n", "\n", "Line 77 differs:\n", "File ../tf/0.4/word.tf: Ναασσών\n", "File ../tf/0.4/unicode.tf: Ναασσών,\n", "\n", "Line 82 differs:\n", "File ../tf/0.4/word.tf: Σαλμών\n", "File ../tf/0.4/unicode.tf: Σαλμών,\n", "\n", "Stoped comparing after 10 differences\n", "Finished.\n" ] } ], "source": [ "import os\n", "\n", "def compare_files(file1_path, file2_path):\n", " FoundDifferences=0\n", " with open(file1_path, 'r', encoding='utf-8') as file1, open(file2_path, 'r',encoding='utf-8') as file2:\n", " lines_file1 = file1.readlines()\n", " lines_file2 = file2.readlines()\n", "\n", " # Check the number of lines in both files\n", " if len(lines_file1) != len(lines_file2):\n", " print(\"Files have different numbers of lines.\")\n", " return\n", "\n", " # Compare content line by line\n", " for line_num, (line1, line2) in enumerate(zip(lines_file1, lines_file2)):\n", " line1 = line1.strip()\n", " line2 = line2.strip()\n", "\n", " # Compare the lines and print any differences\n", " if line1 != line2:\n", " \n", " if line1.startswith(\"@\"):\n", " continue # Skip lines that start with \"@\"\n", " if line1.startswith(\"\\n\"):\n", " continue # Skip lines that start with \" \"\n", " \n", " print(f\"Line {line_num + 1} differs:\")\n", " print(f\"File {file1_path}: {line1}\")\n", " print(f\"File {file2_path}: {line2}\")\n", " print()\n", " FoundDifferences+=1\n", " if FoundDifferences==NumberExamples: \n", " print ('Stoped comparing after ', FoundDifferences,'differences')\n", " break\n", "\n", " # If no differences found, print a message\n", " print(\"Finished.\")\n", "\n", "# main part\n", "#First check if the file exist, then check its content\n", "if os.path.exists(WordFile):\n", " if os.path.exists(UnicodeFile):\n", " print (\"Comparing file \",WordFile,\" with \",UnicodeFile,\"\\n\\nResult:\\n\\n\",end=\"\") \n", " compare_files(WordFile, UnicodeFile)\n", " else:\n", " print (f\"Could not find file {UnicodeFile}.\")\n", "else:\n", " print(f\"Could not find file {WordFile}.\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d2322619-d269-4297-8152-187abecaf376", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }