{ "cells": [ { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n", "\n", "First line of text\n", "Second line of text\n", "Third line of text\n" ] } ], "source": [ "file = open('example_text.txt', 'r')\n", "print(type(file), '\\n')\n", "contents = file.read()\n", "print(contents)\n", "file.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First line of text\n", "Second line of text\n", "Third line of text\n" ] } ], "source": [ "# менеджер контекста\n", "with open('example_text.txt', 'r') as file:\n", " contents = file.read()\n", "print(contents)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/dm_fedorov'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.getcwd()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1: Александр Пушкин — Осень.\n", "I\n", "Октябрь уж наступил — уж роща отряхает\n", "Последние листы с нагих своих ветвей;\n", "Дохнул осенний хлад — дорога промерзает.\n", "Журча еще бежит за мельницу ручей,\n", "Но пруд уже застыл; сосед мой поспешает\n", "В отъезжие поля с охотою своей,\n", "И страждут озими от бешеной забавы,\n", "И будит лай собак уснувшие дубравы.\n", "II\n", "Теперь моя пора: я не люблю весны;\n", "Скучна мне оттепель; вонь, грязь — весной я болен;\n", "Кровь бродит; чувства, ум тоскою стеснены.\n", "Суровою зимой я более доволен,\n", "Люблю ее сне\n", "2: га; в присутствии луны\n", "К\n" ] } ], "source": [ "with open('data_files/test_file.txt', 'r') as file:\n", " contents = file.read(500)\n", " part_2 = file.read(24)\n", "print('1:', contents)\n", "print('2:', part_2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['First line of text\\n', 'Second line of text\\n', 'Third line of text']\n" ] } ], "source": [ "with open('example_text.txt') as file:\n", " lines = file.readlines()\n", "print(lines)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Mercury\\n', 'Venus\\n', 'Earth\\n', 'Mars\\n', 'Jupiter\\n', 'Saturn\\n', 'Uranus\\n', 'Neptune\\n'] \n", "\n", "Neptune\n", "Uranus\n", "Saturn\n", "Jupiter\n", "Mars\n", "Earth\n", "Venus\n", "Mercury\n" ] } ], "source": [ "with open('plan.txt') as file:\n", " planets = file.readlines()\n", "print(planets, '\\n')\n", "for planet in reversed(planets):\n", " print(planet.strip())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7\n", "5\n", "5\n", "4\n", "7\n", "6\n", "6\n", "7\n" ] } ], "source": [ "with open('plan.txt') as file:\n", " for line in file:\n", " print(len(line.strip()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "with open(\"top.txt\", 'w') as output_file:\n", " output_file.write(\"Hello!\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "this is string example....wow!!!\n", "th1s 1s str1ng xampl....wow!!!\n" ] } ], "source": [ "print(\"this is string example....wow!!!\")\n", "print(\"th1s 1s str1ng xampl....wow!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trantab = str.maketrans({'i': '1', 'e': None})\n", "print(trantab)\n", "\n", "s = \"this is string example....wow!!!\"\n", "print(s.translate(trantab))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{97: 49, 101: 50, 105: 51, 111: 52, 117: 53}\n", "th3s 3s str3ng 2x1mpl2....w4w!!!\n" ] } ], "source": [ "intab = \"aeiou\"\n", "outtab = \"12345\"\n", "trantab = str.maketrans(intab, outtab)\n", "print(trantab)\n", "\n", "s = \"this is string example....wow!!!\"\n", "print(s.translate(trantab))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{115: None, 119: None}\n", "thi i tring example....o!!!\n" ] } ], "source": [ "trantab = str.maketrans(\"\", \"\", \"sw\")\n", "print(trantab)\n", "\n", "s = \"this is string example....wow!!!\"\n", "print(s.translate(trantab))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Документация про кодировки: https://ancatmara.gitbooks.io/digital-literacy/content/seminar-2.html" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# про unicode: https://djbook.ru/examples/24/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# указывайте кодировку по умолчанию!" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "with open(\"top_cp1251.txt\", 'w', encoding='cp1251') as output_file:\n", " output_file.write(\"Привет, мир!\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "ename": "UnicodeDecodeError", "evalue": "'utf-8' codec can't decode byte 0xcf in position 0: invalid continuation byte", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"top_cp1251.txt\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0moutput_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/anaconda3/lib/python3.7/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0xcf in position 0: invalid continuation byte" ] } ], "source": [ "with open(\"top_cp1251.txt\") as output_file:\n", " print(output_file.read())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'😀'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'\\N{GRINNING FACE}'" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'😀'" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'\\U0001f600'" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'²'" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'\\N{SUPERSCRIPT TWO}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'\\xd0\\xbf\\xd1\\x80\\xd0\\xb8\\xd0\\xb2\\xd0\\xb5\\xd1\\x82'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'привет'.encode(encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'hello'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'hello'.encode(encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'\\xef\\xf0\\xe8\\xe2\\xe5\\xf2'" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'привет'.encode(encoding='cp1251')" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "ename": "UnicodeDecodeError", "evalue": "'utf-8' codec can't decode byte 0xef in position 0: invalid continuation byte", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34mb'\\xef\\xf0\\xe8\\xe2\\xe5\\xf2'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0xef in position 0: invalid continuation byte" ] } ], "source": [ "b'\\xef\\xf0\\xe8\\xe2\\xe5\\xf2'.decode(encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'привет'" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b'\\xef\\xf0\\xe8\\xe2\\xe5\\xf2'.decode(encoding='cp1251')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Привет, мир!\n", "\n" ] } ], "source": [ "with open(\"top_cp1251.txt\", 'r', encoding='cp1251') as output_file:\n", " print(output_file.read())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }