{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Введение в Pytorch\n", "\n", "Александр Дьяконов, 2020\n", "\n", "#### использованные материалы\n", "\n", "* https://github.com/MLWhiz/data_science_blogs/blob/master/pytorch_guide/Pytorch%20Guide.ipynb\n", "* https://d2l.ai/\n", "* https://atcold.github.io/pytorch-Deep-Learning/\n", "* семинары OzonMasters\n", "* https://habr.com/ru/post/334380/\n", "\n", "### Фишки Pytorch:\n", "\n", "* очень похож на numpy, но многие вещи строже\n", "* динамический граф вычислений (создаётся при работе)\n", "* можно вычислять на GPU (минимальные изменения кода)\n", "* хорошо поддерживается, есть полезные модули (например, torchvision)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# from __future__ import print_function\n", "import torch\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# служебная функция\n", "\n", "from matplotlib import pyplot as plt\n", "\n", "def plot_data(X, y, d=0, auto=False, zoom=1):\n", " X = X.cpu()\n", " y = y.cpu()\n", " plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y, s=20, cmap=plt.cm.Spectral)\n", " plt.axis('square')\n", " plt.axis(np.array((-1.1, 1.1, -1.1, 1.1)) * zoom)\n", " if auto is True: plt.axis('equal')\n", " plt.axis('off')\n", "\n", " _m, _c = 0, '.15'\n", " plt.axvline(0, ymin=_m, color=_c, lw=1, zorder=0)\n", " plt.axhline(0, xmin=_m, color=_c, lw=1, zorder=0)\n", "\n", "def plot_model(X, y, model):\n", " model.cpu()\n", " mesh = np.arange(-1.1, 1.1, 0.01)\n", " xx, yy = np.meshgrid(mesh, mesh)\n", " with torch.no_grad():\n", " data = torch.from_numpy(np.vstack((xx.reshape(-1), yy.reshape(-1))).T).float()\n", " Z = model(data).detach()\n", " Z = np.argmax(Z, axis=1).reshape(xx.shape)\n", " plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)\n", " plot_data(X, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Помощь, общие моменты" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# основная помощь\n", "torch.nn.Module?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# + код функции\n", "torch.nn.Module??" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.7.3\n", "1.5.0\n" ] } ], "source": [ "# Проверка версии\n", "from platform import python_version\n", "print(python_version())\n", "print(torch.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Тензоры\n", "* тензоры\n", "* доступ к элементам, слайсинг\n", "* реализация inplace-операций\n", "* связь с numpy\n", "* параметры тензора\n", "* выцепление элементов (item)" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[0., 0., 0.],\n", " [0., 0., 0.]]) 1685962910464\n" ] } ], "source": [ "x = torch.FloatTensor(2, 3)\n", "print (x, x.data_ptr()) # + где лежит в памяти" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1.0194e-38, 8.4490e-39, 1.0469e-38, 9.3674e-39, 9.9184e-39],\n", " [8.7245e-39, 9.2755e-39, 8.9082e-39, 9.9184e-39, 8.4490e-39],\n", " [9.6429e-39, 1.0653e-38, 1.0469e-38, 4.2246e-39, 1.0378e-38]])\n" ] } ], "source": [ "# пустая матрица (тензор)\n", "x = torch.empty(3, 5)\n", "print(x)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1.0194e-38, 8.4490e-39],\n", " [8.7245e-39, 9.2755e-39],\n", " [9.6429e-39, 1.0653e-38]])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x[:,:2]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[8.4490e-39],\n", " [9.2755e-39],\n", " [1.0653e-38]]),\n", " tensor([8.4490e-39, 9.2755e-39, 1.0653e-38]))" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x[:,[1]], x[:, 1]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[0.2961, 0.5166, 0.2517, 0.6886, 0.0740],\n", " [0.8665, 0.1366, 0.1025, 0.1841, 0.7264],\n", " [0.3153, 0.6871, 0.0756, 0.1966, 0.3164]])\n" ] } ], "source": [ "# случайная матрица\n", "torch.manual_seed(123)\n", "x = torch.rand(3, 5)\n", "print(x)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[3., 3., 3., 3., 3.],\n", " [3., 3., 3., 3., 3.],\n", " [3., 3., 3., 3., 3.]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# заполнение\n", "x.fill_(3) # черта - признак выполнения на данном тензоре" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0.]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# обнуление\n", "x.zero_()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1, 2, 3, 4])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.arange(1, 5)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1, 2],\n", " [3, 4]]) torch.LongTensor 2 torch.Size([2, 2]) 4\n" ] } ], "source": [ "# матрица из данных\n", "x = torch.tensor([[1, 2], [3, 4]])\n", "print(x, x.type(), x.dim(), x.size(), x.numel()) # тип тензора" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 1.],\n", " [1., 1.]], dtype=torch.float64)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# из numpy.array\n", "torch.from_numpy(np.ones((2, 2)))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[0.1479, 0.5331, 0.4066],\n", " [0.2318, 0.4545, 0.9737],\n", " [0.4606, 0.5159, 0.4220],\n", " [0.5786, 0.9455, 0.8057],\n", " [0.6775, 0.6087, 0.6179]])\n", "tensor(0.9737) 0.9737018942832947\n" ] } ], "source": [ "x = torch.rand(5, 3)\n", "print(x)\n", "print (x.max(), x.max().item()) # item - выцепляет элемент" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1, 2],\n", " [3, 4]]) 1\n", " 2\n", " 3\n", " 4\n", "[torch.LongStorage of size 4] (2, 1) (1, 2)\n" ] } ], "source": [ "x = torch.tensor([[1, 2], [3, 4]])\n", "\n", "# как хранятся данные, где следующий элемент по каждой из разметностей\n", "print (x, x.storage(), x.stride(), x.t().stride()) " ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[10, 2],\n", " [ 3, 4]]) tensor([[1, 2],\n", " [3, 4]])\n", "tensor([[30, 3],\n", " [ 2, 4]]) tensor([[30, 3],\n", " [ 2, 4]])\n" ] } ], "source": [ "x = torch.tensor([[1, 2], [3, 4]])\n", "y = x.clone()\n", "# print (id(x.storage()) == id(y.storage()))\n", "x[0, 0] = 10\n", "print (x, y)\n", "xt = x.t_()\n", "x[0, 0] = 30\n", "print (x, xt)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Тензоры - получение одних из других " ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 1, 10],\n", " [ 3, 4]]),\n", " tensor([[1, 2],\n", " [3, 4]]))" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# клонирование\n", "x = torch.tensor([[1, 2], [3, 4]])\n", "x2 = x.clone() # в отличие от copy_() через оригинал проносят градиенты\n", "x[0, 1] = 10 \n", "x, x2" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[ 0.3123, 1.3540],\n", " [-0.3501, 1.6162]])\n" ] } ], "source": [ "# матрица такого же размера\n", "y = torch.randn_like(x, dtype=torch.float)\n", "print(y)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[1, 2, 2, 2],\n", " [3, 4, 2, 2]]),\n", " tensor([[1, 2],\n", " [3, 4],\n", " [2, 2],\n", " [2, 2]]))" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# конкатенация по 0 и 1 размерностям\n", "x = torch.tensor([[1, 2], [3, 4]])\n", "y = torch.tensor([[2, 2], [2, 2]])\n", "torch.cat([x, y], axis=1), torch.cat([x, y], axis=0)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([1, 2, 2]), torch.Size([2, 1, 2]), torch.Size([2, 2, 1]))" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# создание фиктивной размерности - в какую позицию вставлять фиктивную\n", "x.unsqueeze(dim=0).shape, x.unsqueeze(dim=1).shape, x.unsqueeze(dim=2).shape" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[[1, 2],\n", " [2, 2]],\n", "\n", " [[3, 2],\n", " [4, 2]]])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# конкатенация по 2-й размерности\n", "torch.cat([x.unsqueeze(dim=2), y.unsqueeze(dim=2)], axis=2)" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([3, 2])" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# удаляем единичные размеры\n", "torch.empty(3, 1, 2, 1).squeeze().shape" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[ 9, 12],\n", " [15, 18]])\n", "tensor([[ 3, 8],\n", " [15, 24]])\n", "tensor([[ 9, 16],\n", " [25, 36]])\n" ] } ], "source": [ "x = torch.tensor([[1, 2], [3, 4]])\n", "print(3 * x.add(2)) # смотри на порядок операций\n", "print(x * x.add(2)) # смотри на порядок операций\n", "print(x * x.add_(2)) # смотри на порядок операций" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Операции над тензорами" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [], "source": [ "x = torch.tensor([[1, 2], [3, 4]])\n", "y = torch.tensor([[2, 2], [2, 2]])\n", "v = torch.tensor([1, 2])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[3, 4],\n", " [5, 6]])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# сложение\n", "x + y" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[3, 4],\n", " [5, 6]])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.add(y)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[2, 4],\n", " [6, 8]])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x * y" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 6, 6],\n", " [14, 14]]),\n", " tensor([[ 6, 6],\n", " [14, 14]]))" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x @ y, x.mm(y)\n", "# скалярные произведения по строкам" ] }, { "cell_type": "code", "execution_count": 217, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor(5) tensor(5)\n" ] }, { "data": { "text/plain": [ "tensor(20)" ] }, "execution_count": 217, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print (torch.dot(v, v), v.dot(v)) # скалярное умножение\n", "torch.dot(x.view(-1), y.view(-1))" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([ 5, 11]), tensor([ 5, 11]))" ] }, "execution_count": 211, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.mv(x, v), x.mv(v) # умножение на вектор" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[3, 4],\n", " [5, 6]])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# inplace\n", "y.add_(x)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0.0000, 0.6931],\n", " [1.0986, 1.3863]], dtype=torch.float64)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.type(torch.DoubleTensor).log() # приводим тип - иначе не сработает log" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 1, 4],\n", " [ 9, 16]]),\n", " tensor([[ 1, 4],\n", " [ 9, 16]]))" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.pow(2), x**2" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1, 3],\n", " [2, 4]])" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# транспонирование\n", "x.t()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1, 3],\n", " [2, 4]])" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# эквивалениное транспонирование\n", "x.transpose(0, 1)" ] }, { "cell_type": "code", "execution_count": 222, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([2., 1.])\n", "tensor([2., 1.])\n", "tensor([2., 1.])\n", "tensor([2., 1.])\n" ] } ], "source": [ "# как распределяется память\n", "# ПРОВЕРИТЬ В ПОСЛЕДНЕЙ ВЕРСИИ?????\n", "\n", "x = torch.Tensor([1, 2])\n", "y = torch.Tensor([1, 1])\n", "z = torch.Tensor([0, 2])\n", "\n", "print (x + y - z) # два промежуточных тензора будут созданы.\n", "print (x.add(y).sub_(z)) # один промежуточный тензор.\n", "print (x.add_(y).sub_(z)) # не будет создано промежуточных тензоров\n", "print(x) # поменяется" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## статистики над тензорами, размеры\n", "* ...\n", "* связь с Numpy\n", "* приведение размеров" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "x = torch.tensor([[1, 2], [3, 4]])\n", "y = torch.tensor([[2, 2], [2, 2]])" ] }, { "cell_type": "code", "execution_count": 218, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor(10), tensor(10), tensor([4, 6]), tensor([3, 7]))" ] }, "execution_count": 218, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.sum(x), x.sum(), x.sum(axis=0), x.sum(axis=1)" ] }, { "cell_type": "code", "execution_count": 219, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor(4),\n", " tensor(4),\n", " torch.return_types.max(\n", " values=tensor([3, 4]),\n", " indices=tensor([1, 1])),\n", " torch.return_types.max(\n", " values=tensor([2, 4]),\n", " indices=tensor([1, 1])))" ] }, "execution_count": 219, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.max(x), x.max(), x.max(axis=0), x.max(axis=1)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([2, 2]) torch.Size([4]) torch.Size([2, 1, 2]) torch.Size([1, 4])\n" ] } ], "source": [ "# resize/reshape\n", "x = torch.rand(2, 2)\n", "\n", "a = x.view(4)\n", "b = x.view(2, 1, -1)\n", "\n", "y = x.reshape(1, 4)\n", "\n", "print (x.size(), a.size(), b.size(), y.size())" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([3, 2, 5])" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# переставить размерности\n", "\n", "x = torch.rand(2, 3, 5)\n", "x.permute(1,0,2).size()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2],\n", " [3, 4]], dtype=int64)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# -> numpy\n", "\n", "x.numpy()" ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1. 1. 1. 1. 1.]\n", "tensor([1., 1., 1., 1., 1.], dtype=torch.float64)\n", "tensor([1., 1., 1., 1., 1.])\n", "[11. 11. 11. 11. 11.]\n", "tensor([11., 11., 11., 11., 11.], dtype=torch.float64)\n", "tensor([1., 1., 1., 1., 1.])\n" ] } ], "source": [ "# numpy -> pytorch\n", "# изменение в Numpy атоматически меняет тензор\n", "\n", "import numpy as np\n", "a = np.ones(5)\n", "b = torch.from_numpy(a)\n", "c = torch.Tensor(a)\n", "print(a)\n", "print(b)\n", "print(c)\n", "np.add(a, 10, out=a)\n", "# a = a + 1 # а так - нет\n", "print(a)\n", "print(b)\n", "print(c) # а тут нет" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11, 22],\n", " [13, 24]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# приведение размеров\n", "\n", "x = torch.tensor([[1, 2], [3, 4]])\n", "y = torch.tensor([10, 20])\n", "\n", "x + y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Сохранение и загрузка тензоров" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1., 2., 3.])" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# сохранение и загрузка тензоров\n", "torch.save(x, 'x-file')\n", "x2 = torch.load(\"x-file\")\n", "x2" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([1., 2., 3.]),\n", " tensor([[ 3., 0.],\n", " [ 3., 12.]], requires_grad=True))" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.save([x, y],'x-files')\n", "x2, y2 = torch.load('x-files')\n", "(x2, y2)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'x': tensor([1., 2., 3.]),\n", " 'y': tensor([[ 3., 0.],\n", " [ 3., 12.]], requires_grad=True)}" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mydict = {'x': x, 'y': y}\n", "torch.save(mydict, 'mydict')\n", "mydict2 = torch.load('mydict')\n", "mydict2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Тензоры: примеры" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n", " [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# OHE\n", "target = torch.tensor([0,3,2,2,0])\n", "target_onehot = torch.zeros(target.shape[0], 10)\n", "target_onehot.scatter_(1, target.unsqueeze(1), 1.0)" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([10, 20, 1200])\n", "torch.Size([10, 20, 1200])\n" ] } ], "source": [ "# матрица -> вектор\n", "x = torch.rand(10, 20, 30, 40)\n", "print (x.view(-1, 20, 30*40).shape)\n", "print (torch.flatten(x, 2).shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## GPU\n", "\n", "переменные и модели на разных устройствах не видят друг друга!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# самая популярная конструкция\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "m.to(device) # перенос на доступное устройство" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[2, 3],\n", " [4, 5]], device='cuda:0')\n", "tensor([[2., 3.],\n", " [4., 5.]], dtype=torch.float64)\n" ] } ], "source": [ "if torch.cuda.is_available():\n", " device = torch.device(\"cuda\")\n", " y = torch.ones_like(x, device=device) \n", " x = x.to(device) \n", " z = x + y\n", " print(z)\n", " print(z.to(\"cpu\", torch.double)) \n", "else:\n", " device = torch.device(\"cpu\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x.cuda()\n", "x.cpu()\n", "x.is_cuda" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Whether to train on a gpu\n", "train_on_gpu = torch.cuda.is_available()\n", "print(f'Train on gpu: {train_on_gpu}')# Number of gpus\n", "if train_on_gpu:\n", " gpu_count = torch.cuda.device_count()\n", " print(f'{gpu_count} gpus detected.')\n", " if gpu_count > 1:\n", " multi_gpu = True\n", " else:\n", " multi_gpu = False\n", "if train_on_gpu:\n", " model = model.to('cuda')\n", "if multi_gpu:\n", " model = nn.DataParallel(model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Ниже эксперименты со временем" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [], "source": [ "x = torch.randn((1000, 1000))\n", "y = torch.randn((1000, 1000))" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 11 ms\n" ] } ], "source": [ "%%time\n", "z = x @ y " ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 4.96 ms\n" ] } ], "source": [ "%%time\n", "x = x.cuda()\n", "y = y.cuda() # на перебрасывание тоже нужно время\n", "z = x @ y " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Авто дифференцирование" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor(4.5000, grad_fn=)\n" ] } ], "source": [ "x = torch.tensor([[1, 2], [3, 4]], requires_grad=True, dtype=torch.float32)\n", "\n", "y = 3 * (x - 2) ** 2\n", "\n", "f = y.mean()\n", "\n", "print (f)" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "%3\r\n", "\r\n", "\r\n", "1686543498824\r\n", "\r\n", "MeanBackward0\r\n", "\r\n", "\r\n", "1686543497144\r\n", "\r\n", "MulBackward0\r\n", "\r\n", "\r\n", "1686543497144->1686543498824\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "1686543496136\r\n", "\r\n", "PowBackward0\r\n", "\r\n", "\r\n", "1686543496136->1686543497144\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "1686401450672\r\n", "\r\n", "SubBackward0\r\n", "\r\n", "\r\n", "1686401450672->1686543496136\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "1686543516616\r\n", "\r\n", " (2, 2)\r\n", "\r\n", "\r\n", "1686543516616->1686401450672\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ], "text/plain": [ "" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from torchviz import make_dot\n", "\n", "make_dot(f)" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1., 2.],\n", " [3., 4.]], requires_grad=True) tensor([[-1.5000, 0.0000],\n", " [ 1.5000, 3.0000]]) tensor([[-1.5000, 0.0000],\n", " [ 1.5000, 3.0000]], grad_fn=)\n" ] } ], "source": [ "f.backward()\n", "print(x, x.grad, 3*2*(x - 2)/4)" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x += 1 # будет ошибка - inplace-операции не работают\n", "f.grad, y.grad # тут тоже ничего нет !!! (не было requires_grad=True - по этим переменным не бралась производная)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "tensor([1., 2., 3.])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\djako\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " \n" ] } ], "source": [ "x = torch.Tensor([1, 2, 3])\n", "w = torch.tensor(torch.Tensor([1, 1, 1]), requires_grad=True)\n", "z = w @ x\n", "z.backward()\n", "print(x.grad, w.grad, sep='\\n')" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "tensor([2., 4., 6.])\n" ] } ], "source": [ "z = w @ x\n", "z.backward()\n", "print(x.grad, w.grad, sep='\\n') # идёт накопление!!!" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "tensor([2., 4., 6.])\n", "None\n", "tensor([1., 2., 3.])\n" ] } ], "source": [ "with torch.no_grad(): # нет накопления\n", " z = w @ x\n", " # z.backward()\n", "print(x.grad, w.grad, sep='\\n')\n", "\n", "w.grad.data.zero_() # а так - совсем обнулить\n", "z = w @ x\n", "z.backward()\n", "print(x.grad, w.grad, sep='\\n')" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 1., 1.], dtype=float32)" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# w.numpy() - будет ошибка\n", "w.detach().numpy() # создаётся копия, которую можно в np - у неё requires_grad=False" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[ 1., 16.],\n", " [ 81., 256.]], grad_fn=) None\n", "tensor([[1., 2.],\n", " [3., 4.]], requires_grad=True) tensor([[ 1., 8.],\n", " [27., 64.]])\n" ] } ], "source": [ "# динамический граф вычислений в цикле\n", "\n", "x = torch.tensor([[1, 2], [3, 4]], requires_grad=True, dtype=torch.float32)\n", "x0 = x\n", "for i in range(2):\n", " x = x * x\n", "\n", "z = x.mean() # здесь будет 1/4 !!!\n", "z.backward()\n", "\n", "print(x, x.grad)\n", "print(x0, x0.grad) # градиент лежит здесь!!!\n", "# поскольку x превратился во внутреннюю вершину графа вычислений" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TensorDataset / DataLoader\n", "\n", "организация подачи данных в модель\n", "* DataLoader - подаёт батчами\n", "* TensorDataset - для представления датасета" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[3., 1.],\n", " [9., 1.],\n", " [7., 1.],\n", " [5., 1.]])\n", "tensor([[ 9.],\n", " [81.],\n", " [49.],\n", " [25.]])\n", "tensor([[1., 1.],\n", " [8., 1.],\n", " [6., 1.],\n", " [4., 1.]])\n", "tensor([[ 1.],\n", " [64.],\n", " [36.],\n", " [16.]])\n", "tensor([[2., 1.],\n", " [0., 1.]])\n", "tensor([[4.],\n", " [0.]])\n" ] } ], "source": [ "from torch.utils.data import TensorDataset\n", "import numpy as np\n", "\n", "x = torch.from_numpy(np.vstack([np.arange(10, dtype='float32'), np.ones(10, dtype='float32')]).T)\n", "y = torch.from_numpy(np.arange(10, dtype='float32')[:, np.newaxis] ** 2)\n", "\n", "train_ds = TensorDataset(x, y)\n", "\n", "from torch.utils.data import DataLoader\n", "\n", "batch_size = 4\n", "train_dl = DataLoader(train_ds, batch_size, shuffle=True)\n", "\n", "for xb, yb in train_dl:\n", " print(xb)\n", " print(yb)\n", " # break" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# загрузка с трансформациями\n", "from torchvision import datasets\n", "\n", "train_loader = torch.utils.data.DataLoader(\n", " datasets.MNIST('../data', train=True, download=True,\n", " transform=transforms.Compose([\n", " transforms.ToTensor(),\n", " transforms.Normalize((0.1307,), (0.3081,))\n", " ])),\n", " batch_size=64, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# есть стандартные датасеты\n", "trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n", "testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# загрузка из директории\n", "\n", "traindir = \"/home/rahul/projects/compvisblog/data/train/\"\n", "t = transforms.Compose([transforms.Resize(size=256),\n", " transforms.CenterCrop(size=224),\n", " transforms.ToTensor()])\n", "\n", "train_dataset = torchvision.datasets.ImageFolder(root=traindir, transform=t)\n", "\n", "for i in range(0,len(train_dataset)):\n", " image ,label = train_dataset[i]\n", " print(image,label)\n", " break\n", " \n", "train_dataloader = DataLoader(train_dataset,batch_size = 64, shuffle=True, num_workers=10)\n", "for image_batch, label_batch in train_dataloader:\n", " print(image_batch.size(),label_batch.size())\n", " break" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['CenterCrop',\n", " 'ColorJitter',\n", " 'Compose',\n", " 'FiveCrop',\n", " 'Grayscale',\n", " 'Lambda',\n", " 'LinearTransformation',\n", " 'Normalize',\n", " 'Pad',\n", " 'RandomAffine',\n", " 'RandomApply',\n", " 'RandomChoice',\n", " 'RandomCrop',\n", " 'RandomErasing',\n", " 'RandomGrayscale',\n", " 'RandomHorizontalFlip',\n", " 'RandomOrder',\n", " 'RandomPerspective',\n", " 'RandomResizedCrop',\n", " 'RandomRotation',\n", " 'RandomSizedCrop',\n", " 'RandomVerticalFlip',\n", " 'Resize',\n", " 'Scale',\n", " 'TenCrop',\n", " 'ToPILImage',\n", " 'ToTensor',\n", " '__builtins__',\n", " '__cached__',\n", " '__doc__',\n", " '__file__',\n", " '__loader__',\n", " '__name__',\n", " '__package__',\n", " '__path__',\n", " '__spec__',\n", " 'functional',\n", " 'transforms']" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# все трансформации\n", "from torchvision import transforms\n", "dir(transforms)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Чтобы pytorch работал с датасетом, надо определить\n", "* __getitem__\n", "* __len__" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Свой датасет\n", "class CustomTextDataset(Dataset):\n", " '''\n", " Simple Dataset initializes with X and y vectors\n", " We start by sorting our X and y vectors by sequence lengths\n", " '''\n", " def __init__(self,X,y=None):\n", " self.data = list(zip(X,y))\n", " # Sort by length of first element in tuple\n", " self.data = sorted(self.data, key=lambda x: len(x[0]))\n", " \n", " def __len__(self):\n", " return len(self.data)\n", "\n", " def __getitem__(self, idx):\n", " return self.data[idx]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# свой загрузчик данных\n", "from glob import glob\n", "from PIL import Image\n", "from torch.utils.data import Dataset\n", "\n", "class customImageFolderDataset(Dataset):\n", " \"\"\"Custom Image Loader dataset.\"\"\"\n", " def __init__(self, root, transform=None):\n", " \"\"\"\n", " Args:\n", " root (string): Path to the images organized in a particular folder structure.\n", " transform: Any Pytorch transform to be applied\n", " \"\"\"\n", " # Get all image paths from a directory\n", " self.image_paths = glob(f\"{root}/*/*\")\n", " # Get the labels from the image paths\n", " self.labels = [x.split(\"/\")[-2] for x in self.image_paths]\n", " # Create a dictionary mapping each label to a index from 0 to len(classes).\n", " self.label_to_idx = {x:i for i,x in enumerate(set(self.labels))}\n", " self.transform = transform\n", " \n", " def __len__(self):\n", " # return length of dataset\n", " return len(self.image_paths)\n", " \n", " def __getitem__(self, idx):\n", " # open and send one image and label\n", " img_name = self.image_paths[idx]\n", " label = self.labels[idx]\n", " image = Image.open(img_name)\n", " if self.transform:\n", " image = self.transform(image)\n", " return image,self.label_to_idx[label]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### датасет с несколькими входами в сеть" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# если несколько входов в сеть\n", "\n", "from torch.utils.data import DataLoader, Dataset\n", "\n", "class TrainDataset(Dataset):\n", " def __init__(self, df, num_features, cat_features, labels):\n", " self.cont_values = df[num_features].values\n", " self.cate_values = df[cat_features].values\n", " self.labels = labels\n", " \n", " def __len__(self):\n", " return len(self.cont_values)\n", "\n", " def __getitem__(self, idx):\n", " cont_x = torch.FloatTensor(self.cont_values[idx])\n", " cate_x = torch.LongTensor(self.cate_values[idx])\n", " label = torch.tensor(self.labels[idx]).float()\n", " \n", " return cont_x, cate_x, label\n", " \n", "\n", "class TestDataset(Dataset):\n", " def __init__(self, df, num_features, cat_features):\n", " self.cont_values = df[num_features].values\n", " self.cate_values = df[cat_features].values\n", " \n", " def __len__(self):\n", " return len(self.cont_values)\n", "\n", " def __getitem__(self, idx):\n", " cont_x = torch.FloatTensor(self.cont_values[idx])\n", " cate_x = torch.LongTensor(self.cate_values[idx])\n", " \n", " return cont_x, cate_x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## nn" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parameter containing:\n", "tensor([[0.5873, 0.3891]], requires_grad=True)\n", "Parameter containing:\n", "tensor([0.2473], requires_grad=True)\n" ] }, { "data": { "text/plain": [ "[Parameter containing:\n", " tensor([[0.5873, 0.3891]], requires_grad=True),\n", " Parameter containing:\n", " tensor([0.2473], requires_grad=True)]" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from torch import nn\n", "model = nn.Linear(2, 1)\n", "print(model.weight)\n", "print(model.bias)\n", "list(model.parameters())" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1.8111],\n", " [0.6364]], grad_fn=)" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model(xb)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor(2.5982, grad_fn=)\n" ] } ], "source": [ "import torch.nn.functional as F\n", "loss_fn = F.mse_loss\n", "loss = loss_fn(model(xb), yb)\n", "print(loss)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* train() / eval() - какой режим использовать\n", "\n", "влияет на BN и DO" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "opt = torch.optim.SGD(model.parameters(), lr=1e-5)\n", "\n", "# Пример обучения модели\n", "def fit(num_epochs, model, loss_fn, opt, train_dl):\n", " \n", " # повторяем нужное число эпох\n", " for epoch in range(num_epochs):\n", " model.train() # перенести раньше, если не выходим из режима обучения\n", " for xb, yb in train_dl: # по батчам\n", " pred = model(xb) # прогнать прямой ход\n", " opt.zero_grad() # обнулить градиенты \n", " loss = loss_fn(pred, yb) # вычислить ошибку\n", " loss.backward() # обратный ход - вычислить градиенты\n", " opt.step() # изменить параметры\n", " if (epoch+1) % 10 == 0: # прогресс\n", " print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# валидация\n", "model.eval()\n", "with torch.no_grad():\n", " train, y_train = train_dataset.tensors\n", " # train, y_train = train.to(device), y_train.to(device)\n", " train_preds = model(train)\n", " train_loss = loss_fn(train_preds, y_train).item()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# само обучение\n", "optimizer.zero_grad() # = net.zero_grad()\n", "output = net(x)\n", "loss = criterion(output,y)\n", "loss.backward()\n", "optimizer.step()\n", "print(loss)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch [10/100], Loss: 19.5047\n", "Epoch [20/100], Loss: 2674.4380\n", "Epoch [30/100], Loss: 83.3502\n", "Epoch [40/100], Loss: 185.8370\n", "Epoch [50/100], Loss: 175.2909\n", "Epoch [60/100], Loss: 54.3152\n", "Epoch [70/100], Loss: 2347.3945\n", "Epoch [80/100], Loss: 7.9410\n", "Epoch [90/100], Loss: 182.1962\n", "Epoch [100/100], Loss: 331.3730\n" ] } ], "source": [ "fit(100, model, loss_fn, opt, train_dl)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Простейшее задание нейронной сети, параметры\n", "* nn\n", "* эквивалентная форма\n", "* доступ к параметрам" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.1064, -0.4379],\n", " [-0.0843, -0.4140],\n", " [-0.0656, -0.4279]], grad_fn=)" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# через nn.Sequential\n", "net = nn.Sequential(nn.Linear(10, 5),\n", " nn.ReLU(),\n", " nn.Linear(5, 2))\n", "\n", "X = torch.rand(3, 10)\n", "net(X)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0.2691, -0.5603],\n", " [ 0.2428, -0.5813],\n", " [ 0.2394, -0.4632]], grad_fn=)" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# эквивалентная форма\n", "\n", "class MLP(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.hidden = nn.Linear(10, 5) # Hidden layer\n", " self.out = nn.Linear(5, 2) # Output layer\n", "\n", " def forward(self, X):\n", " # как получается ответ\n", " return self.out(F.relu(self.hidden(X)))\n", "\n", "net2 = MLP()\n", "net2(X)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sequential(\n", " (0): Linear(in_features=10, out_features=5, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=5, out_features=2, bias=True)\n", ")" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MLP(\n", " (hidden): Linear(in_features=10, out_features=5, bias=True)\n", " (out): Linear(in_features=5, out_features=2, bias=True)\n", ")" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net2" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Linear(in_features=10, out_features=5, bias=True),\n", " Linear(in_features=10, out_features=5, bias=True))" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net[0], net2.hidden" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(OrderedDict([('weight',\n", " tensor([[-0.2913, 0.2741, -0.1572, -0.0602, -0.1266, 0.3091, 0.0150, -0.2267,\n", " -0.2864, 0.2689],\n", " [ 0.1637, 0.1041, -0.2499, 0.0425, 0.2751, 0.1211, 0.0142, 0.0580,\n", " -0.1686, -0.1782],\n", " [-0.2128, -0.2699, -0.2654, 0.1590, 0.1555, 0.0836, 0.1484, -0.2917,\n", " -0.0426, -0.3108],\n", " [ 0.1652, -0.2611, -0.0391, -0.0729, 0.0614, 0.2785, -0.0926, 0.2232,\n", " 0.0170, -0.1833],\n", " [ 0.0161, -0.2227, -0.0627, 0.2687, -0.2884, -0.1729, 0.0918, -0.2091,\n", " -0.1096, 0.2262]])),\n", " ('bias',\n", " tensor([ 0.2531, -0.1746, -0.2381, 0.0756, -0.3050]))]),\n", " OrderedDict([('weight',\n", " tensor([[-0.0298, -0.1098, 0.2952, 0.0838, -0.1459, 0.2697, 0.2458, -0.0543,\n", " 0.1449, 0.1264],\n", " [-0.2429, 0.1897, -0.1040, 0.2425, -0.2561, 0.1547, -0.0331, -0.2910,\n", " -0.0013, -0.1380],\n", " [-0.2489, 0.1274, -0.0307, -0.2244, -0.0548, 0.2250, -0.0645, -0.1102,\n", " -0.0484, -0.1338],\n", " [ 0.1526, 0.1736, -0.1106, -0.0456, 0.1500, -0.0332, -0.2646, -0.0886,\n", " 0.1390, 0.1706],\n", " [-0.0915, -0.3143, 0.1661, 0.1954, 0.1994, -0.0674, 0.0939, 0.2139,\n", " -0.0600, 0.1535]])),\n", " ('bias',\n", " tensor([-0.2741, -0.0290, 0.2044, -0.3139, 0.1633]))]))" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net[0].state_dict(), net2.hidden.state_dict()" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([ 0.2531, -0.1746, -0.2381, 0.0756, -0.3050]),\n", " tensor([-0.2741, -0.0290, 0.2044, -0.3139, 0.1633]))" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net[0].bias.data, net2.hidden.bias.data" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "net[0].bias.grad, net2.hidden.bias.grad # не было обучения (BP), поэтому None" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('0.weight', torch.Size([5, 10])) ('0.bias', torch.Size([5])) ('2.weight', torch.Size([2, 5])) ('2.bias', torch.Size([2]))\n", "tensor([[-0.2913, 0.2741, -0.1572, -0.0602, -0.1266, 0.3091, 0.0150, -0.2267,\n", " -0.2864, 0.2689],\n", " [ 0.1637, 0.1041, -0.2499, 0.0425, 0.2751, 0.1211, 0.0142, 0.0580,\n", " -0.1686, -0.1782],\n", " [-0.2128, -0.2699, -0.2654, 0.1590, 0.1555, 0.0836, 0.1484, -0.2917,\n", " -0.0426, -0.3108]], grad_fn=)\n", "tensor([ 0.2531, -0.1746, -0.2381], grad_fn=)\n", "tensor([[-0.0220, -0.1656, 0.3063, 0.3028, -0.2868],\n", " [-0.3693, -0.2013, 0.1348, 0.3793, 0.2336]],\n", " grad_fn=)\n", "tensor([-0.1386, 0.3527], grad_fn=)\n" ] } ], "source": [ "# перечислить параметры\n", "print(*[(name, param.shape) for name, param in net.named_parameters()])\n", "\n", "for param in net.parameters():\n", " print(param[:3])" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(67, [50, 5, 10, 2])" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# считаем число параметров\n", "numel_list = [p.numel() for p in net.parameters() if p.requires_grad == True]\n", "sum(numel_list), numel_list" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Linear output shape: \t torch.Size([20, 5])\n", "ReLU output shape: \t torch.Size([20, 5])\n", "Linear output shape: \t torch.Size([20, 2])\n" ] } ], "source": [ "# вывод размеров по слоям\n", "\n", "# X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)\n", "X = torch.rand(size=(20, 10), dtype=torch.float32)\n", "\n", "for layer in net:\n", " X = layer(X)\n", " print(layer.__class__.__name__,'output shape: \\t',X.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ещё примеры заданий нейросетей" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "class myCrazyNeuralNet(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.lin1 = nn.Linear(784, 30)\n", " self.lin2 = nn.Linear(30, 784)\n", " self.lin3 = nn.Linear(30, 10)\n", " \n", " def forward(self, x):\n", " x_lin1 = self.lin1(x)\n", " x_lin2 = x + self.lin2(x_lin1)\n", " x_lin2 = self.lin1(x_lin2)\n", " x = self.lin3(x_lin2)\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def conv_block(in_f, out_f, activation='relu', *args, **kwargs):\n", " activations = nn.ModuleDict([\n", " ['lrelu', nn.LeakyReLU()],\n", " ['relu', nn.ReLU()]\n", " ])\n", " \n", " return nn.Sequential(\n", " nn.Conv2d(in_f, out_f, *args, **kwargs),\n", " nn.BatchNorm2d(out_f),\n", " activations[activation]\n", " )\n", "\n", "def dec_block(in_f, out_f):\n", " return nn.Sequential(\n", " nn.Linear(in_f, out_f),\n", " nn.Sigmoid()\n", " )\n", "\n", "class MyEncoder(nn.Module):\n", " def __init__(self, enc_sizes, *args, **kwargs):\n", " super().__init__()\n", " self.conv_blokcs = nn.Sequential(*[conv_block(in_f, out_f, kernel_size=3, padding=1, *args, **kwargs) \n", " for in_f, out_f in zip(enc_sizes, enc_sizes[1:])])\n", " \n", " def forward(self, x):\n", " return self.conv_blokcs(x)\n", " \n", "class MyDecoder(nn.Module):\n", " def __init__(self, dec_sizes, n_classes):\n", " super().__init__()\n", " self.dec_blocks = nn.Sequential(*[dec_block(in_f, out_f) \n", " for in_f, out_f in zip(dec_sizes, dec_sizes[1:])])\n", " self.last = nn.Linear(dec_sizes[-1], n_classes)\n", "\n", " def forward(self, x):\n", " return self.dec_blocks()\n", " \n", " \n", "class MyCNNClassifier(nn.Module):\n", " def __init__(self, in_c, enc_sizes, dec_sizes, n_classes, activation='relu'):\n", " super().__init__()\n", " self.enc_sizes = [in_c, *enc_sizes]\n", " self.dec_sizes = [32 * 28 * 28, *dec_sizes]\n", "\n", " self.encoder = MyEncoder(self.enc_sizes, activation=activation)\n", " \n", " self.decoder = MyDecoder(dec_sizes, n_classes)\n", " \n", " def forward(self, x):\n", " x = self.encoder(x)\n", " \n", " x = x.flatten(1) # flat\n", " \n", " x = self.decoder(x)\n", " \n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# явное прописывание весов\n", "\n", "class MyNetworkWithParams(nn.Module):\n", " def __init__(self,input_size, hidden_size, output_size):\n", " super(MyNetworkWithParams,self).__init__()\n", " self.layer1_weights = nn.Parameter(torch.randn(input_size,hidden_size))\n", " self.layer1_bias = nn.Parameter(torch.randn(hidden_size))\n", " self.layer2_weights = nn.Parameter(torch.randn(hidden_size,output_size))\n", " self.layer2_bias = nn.Parameter(torch.randn(output_size))\n", " \n", " def forward(self,x):\n", " h1 = torch.matmul(x,self.layer1_weights) + self.layer1_bias\n", " h1_act = torch.max(h1, torch.zeros(h1.size())) # ReLU\n", " output = torch.matmul(h1_act,self.layer2_weights) + self.layer2_bias\n", " return output\n", "\n", "net = MyNetworkWithParams(32,128,10)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sequential(\n", " (0): Sequential(\n", " (block 0): Sequential(\n", " (0): Linear(in_features=10, out_features=4, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=4, out_features=10, bias=True)\n", " (3): ReLU()\n", " )\n", " (block 1): Sequential(\n", " (0): Linear(in_features=10, out_features=4, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=4, out_features=10, bias=True)\n", " (3): ReLU()\n", " )\n", " (block 2): Sequential(\n", " (0): Linear(in_features=10, out_features=4, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=4, out_features=10, bias=True)\n", " (3): ReLU()\n", " )\n", " (block 3): Sequential(\n", " (0): Linear(in_features=10, out_features=4, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=4, out_features=10, bias=True)\n", " (3): ReLU()\n", " )\n", " )\n", " (1): Linear(in_features=10, out_features=1, bias=True)\n", ")\n" ] } ], "source": [ "# вложенные блоки\n", "\n", "def block1():\n", " return nn.Sequential(nn.Linear(10, 4), nn.ReLU(),\n", " nn.Linear(4, 10), nn.ReLU())\n", "\n", "def block2():\n", " net = nn.Sequential()\n", " for i in range(4):\n", " # Nested here\n", " net.add_module(f'block {i}', block1())\n", " return net\n", "\n", "rgnet = nn.Sequential(block2(), nn.Linear(10, 1))\n", "\n", "rgnet(X)\n", "\n", "print(rgnet)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([ 0.2119, 0.1545, -0.0612, 0.2647, -0.3946, -0.0332, -0.0323, 0.3765,\n", " -0.1765, -0.4842], requires_grad=True)" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rgnet[0][2][2].bias" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Разделение параметров" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "shared = nn.Linear(8, 8)\n", "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n", " shared, nn.ReLU(),\n", " shared, nn.ReLU(),\n", " nn.Linear(8, 1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Свои слои, задание слоёв\n", "* пример слоя\n", "* слой + NN\n", "* списки слоёв" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "# центрирующий слой без параметров\n", "\n", "class CenteredLayer(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", "\n", " def forward(self, X):\n", " return X - X.mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from torch import nn\n", "\n", "class myCustomLinearLayer(nn.Module):\n", " def __init__(self,in_size,out_size):\n", " super().__init__()\n", " self.weights = nn.Parameter(torch.randn(in_size, out_size))\n", " self.bias = nn.Parameter(torch.zeros(out_size))\n", "\n", " def forward(self, x):\n", " return x.mm(self.weights) + self.bias\n", "\n", "class myCustomNeuralNet(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " # Define all Layers Here\n", " self.lin1 = myCustomLinearLayer(784,10)\n", " \n", " def forward(self, x):\n", " # Connect the layer Outputs here to define the forward pass\n", " x = self.lin1(x)\n", " return x\n", " \n", "x = torch.randn((100,784))\n", "model = myCustomNeuralNet()\n", "model(x).size()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "# списки слоёв\n", "\n", "class MyNet(nn.Module):\n", " def __init__(self,n_hidden_layers):\n", " super(MyNet,self).__init__()\n", " self.n_hidden_layers=n_hidden_layers\n", " self.final_layer = nn.Linear(128,10)\n", " self.act = nn.ReLU()\n", " self.hidden = []\n", " for i in range(n_hidden_layers):\n", " self.hidden.append(nn.Linear(128,128))\n", " self.hidden = nn.ModuleList(self.hidden) # это важно!\n", " \n", " def forward(self,x):\n", " h = x\n", " for i in range(self.n_hidden_layers):\n", " h = self.hidden[i](h)\n", " h = self.act(h)\n", " out = self.final_layer(h)\n", " return out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Инициализация" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([-0.0137, -0.0014, 0.0093, 0.0124, -0.0141, 0.0154, -0.0074, 0.0009,\n", " 0.0061, -0.0013]),\n", " tensor(0.))" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# инициализация\n", "\n", "def init_normal(m):\n", " if type(m) == nn.Linear:\n", " nn.init.normal_(m.weight, mean=0, std=0.01)\n", " nn.init.zeros_(m.bias)\n", "\n", "net.apply(init_normal)\n", "\n", "net[0].weight.data[0], net[0].bias.data[0]" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([-0.3876, 0.1836, 0.5121, -0.5632, 0.3504, 0.0779, -0.6140, -0.2752,\n", " -0.0533, -0.3310])\n", "tensor([[42., 42., 42., 42., 42.],\n", " [42., 42., 42., 42., 42.]])\n" ] } ], "source": [ "# инициализация по блокам\n", "\n", "def xavier(m):\n", " if type(m) == nn.Linear:\n", " nn.init.xavier_uniform_(m.weight)\n", "\n", "def init_42(m):\n", " if type(m) == nn.Linear:\n", " nn.init.constant_(m.weight, 42)\n", "\n", "net[0].apply(xavier)\n", "net[2].apply(init_42)\n", "\n", "print(net[0].weight.data[0])\n", "print(net[2].weight.data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Функции обучения и теста на одной эпохе" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def train_epoch(model, train_loader, criterion, optimizer):\n", " model.train()\n", "\n", " running_loss = 0.0\n", " \n", " start_time = time.time()\n", " for batch_idx, (data, target) in enumerate(train_loader): \n", " optimizer.zero_grad() # .backward() accumulates gradients\n", " data = data.to(device)\n", " target = target.to(device) # all data & model on same device\n", "\n", " outputs = model(data)\n", " loss = criterion(outputs, target)\n", " running_loss += loss.item()\n", "\n", " loss.backward()\n", " optimizer.step()\n", " \n", " end_time = time.time()\n", " \n", " running_loss /= len(train_loader)\n", " print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')\n", " return running_loss\n", "\n", "def test_model(model, test_loader, criterion):\n", " with torch.no_grad():\n", " model.eval()\n", "\n", " running_loss = 0.0\n", " total_predictions = 0.0\n", " correct_predictions = 0.0\n", "\n", " for batch_idx, (data, target) in enumerate(test_loader): \n", " data = data.to(device)\n", " target = target.to(device)\n", "\n", " outputs = model(data)\n", "\n", " _, predicted = torch.max(outputs.data, 1)\n", " total_predictions += target.size(0)\n", " correct_predictions += (predicted == target).sum().item()\n", "\n", " loss = criterion(outputs, target).detach()\n", " running_loss += loss.item()\n", "\n", "\n", " running_loss /= len(test_loader)\n", " acc = (correct_predictions/total_predictions)*100.0\n", " print('Testing Loss: ', running_loss)\n", " print('Testing Accuracy: ', acc, '%')\n", " return running_loss, acc\n", " \n", "n_epochs = 10\n", "Train_loss = []\n", "Test_loss = []\n", "Test_acc = []\n", "\n", "for i in range(n_epochs):\n", " train_loss = train_epoch(model, train_loader, criterion, optimizer)\n", " test_loss, test_acc = test_model(model, test_loader, criterion)\n", " Train_loss.append(train_loss)\n", " Test_loss.append(test_loss)\n", " Test_acc.append(test_acc)\n", " print('='*20)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Сохранение / загрузка сети" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "torch.save(model, \"/tmp/model\")\n", "torch.save(model.state_dict(), \"/tmp/model\") # только параметры\n", "\n", "model = torch.load(\"/tmp/model\")\n", "\n", "model = MLP()\n", "model.load_state_dict(torch.load(\"/tmp/model\")) # только параметры\n", "model.eval()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Свёртки, пулинг" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([100, 64, 24, 24])" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conv_layer = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = (3,3), stride = 1, padding=1)\n", "x = torch.randn((100,3,24,24))\n", "conv_layer(x).size()" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([1, 10, 2, 3])" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Адаптивный пулинг - выход фиксированного размера\n", "m = nn.AdaptiveAvgPool2d((2, 3))\n", "input = torch.randn(1, 10, 20, 30)\n", "output = m(input)\n", "output.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Своя Loss-функция" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class CustomNLLLoss(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " def forward(self, x, y):\n", " # x should be output from LogSoftmax Layer \n", " log_prob = -1.0 * x\n", " # Get log_prob based on y class_index as loss=-mean(ylogp)\n", " loss = log_prob.gather(1, y.unsqueeze(1))\n", " loss = loss.mean()\n", " return loss\n", " \n", "criterion = CustomNLLLoss() # nn.NLLLoss()\n", "CustomNLLLossClass = criterion(preds,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transfer Learning" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading: \"https://download.pytorch.org/models/resnet34-333f7ec4.pth\" to C:\\Users\\djako/.cache\\torch\\checkpoints\\resnet34-333f7ec4.pth\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "96c51bea5eb3415db816795f3f102d5d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=87306240.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# берём готовую модель\n", "from torchvision import models\n", "transfer_model = models.resnet34(pretrained=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# заморозка слоёв\n", "for name, param in model_cnn.named_parameters():\n", " if (\"bn\" not in name):\n", " param.requires_grad = False" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# меняем голову\n", "\n", "# TO DO\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# BiLSTM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class BiLSTM(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.hidden_size = 64\n", " drp = 0.1\n", " max_features, embed_size = 10000,300\n", " self.embedding = nn.Embedding(max_features, embed_size)\n", " self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)\n", " self.linear = nn.Linear(self.hidden_size*4 , 64)\n", " self.relu = nn.ReLU()\n", " self.dropout = nn.Dropout(drp)\n", " self.out = nn.Linear(64, 1)\n", "\n", "\n", " def forward(self, x):\n", " h_embedding = self.embedding(x)\n", " h_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))\n", " \n", " h_lstm, _ = self.lstm(h_embedding)\n", " avg_pool = torch.mean(h_lstm, 1)\n", " max_pool, _ = torch.max(h_lstm, 1)\n", " conc = torch.cat(( avg_pool, max_pool), 1)\n", " conc = self.relu(self.linear(conc))\n", " conc = self.dropout(conc)\n", " out = self.out(conc)\n", " return out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Как сеть искриволяет пространство" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import torch.nn as nn\n", "\n", "X = torch.rand(500, 2)\n", "y = X[:, 0] + X[:, 1]\n", "plt.figure(figsize=(12, 5))\n", "plt.subplot(1, 2, 1)\n", "plt.scatter(X[:,0], X[:,1], 20, y)\n", "\n", "n_hidden = 10\n", "model = nn.Sequential(nn.Linear(2, n_hidden),\n", " nn.ReLU(),\n", " nn.Linear(n_hidden, n_hidden),\n", " nn.ReLU(),\n", " nn.Linear(n_hidden, 2))\n", "# model.to(device)\n", "with torch.no_grad():\n", " Y = model(X)\n", "plt.subplot(1, 2, 2)\n", "plt.scatter(Y[:,0], Y[:,1], 20, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Пример" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shapes:\n", "X: (3000, 2)\n", "y: (3000,)\n" ] } ], "source": [ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "import random, math\n", "seed = 12345\n", "random.seed(seed)\n", "torch.manual_seed(seed)\n", "N = 1000 # num_samples_per_class\n", "D = 2 # dimensions\n", "C = 3 # num_classes\n", "H = 100 # num_hidden_units\n", "\n", "# данные\n", "X = torch.zeros(N * C, D).to(device)\n", "y = torch.zeros(N * C, dtype=torch.long).to(device)\n", "for c in range(C):\n", " index = 0\n", " t = torch.linspace(0, 1, N)\n", "\n", " inner_var = torch.linspace((2 * math.pi / C) * (c), (2 * math.pi / C) * (2 + c), N) + torch.randn(N) * 0.2\n", " \n", " for ix in range(N * c, N * (c + 1)):\n", " X[ix] = t[index] * torch.FloatTensor((math.sin(inner_var[index]), math.cos(inner_var[index])))\n", " y[ix] = c\n", " index += 1\n", "\n", "print(\"Shapes:\")\n", "print(\"X:\", tuple(X.size()))\n", "print(\"y:\", tuple(y.size()))" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[EPOCH]: 999, [LOSS]: 0.161706, [ACCURACY]: 0.954\n" ] } ], "source": [ "from IPython import display\n", "learning_rate = 1e-3\n", "lambda_l2 = 1e-5\n", "\n", "model = nn.Sequential(\n", " nn.Linear(D, H),\n", " nn.ReLU(),\n", " nn.Linear(H, C)\n", ")\n", "model.to(device)\n", "\n", "criterion = torch.nn.CrossEntropyLoss()\n", "\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2)\n", "\n", "for t in range(1000):\n", " \n", " y_pred = model(X)\n", "\n", " loss = criterion(y_pred, y)\n", " score, predicted = torch.max(y_pred, 1)\n", " acc = (y == predicted).sum().float() / len(y)\n", " print(\"[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f\" % (t, loss.item(), acc))\n", " display.clear_output(wait=True)\n", " \n", " optimizer.zero_grad()\n", " \n", " loss.backward()\n", "\n", " optimizer.step()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sequential(\n", " (0): Linear(in_features=2, out_features=100, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=100, out_features=3, bias=True)\n", ")\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "print(model)\n", "plot_model(X, y, model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### НС на numpy\n", "\n", "дальше примеры из https://pytorch.org/tutorials/beginner/pytorch_with_examples.html" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.74824768139619e-24\n", "Wall time: 2.55 s\n" ] } ], "source": [ "%%time\n", "\n", "import numpy as np\n", "\n", "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "# Create random input and output data\n", "x = np.random.randn(N, D_in)\n", "y = np.random.randn(N, D_out)\n", "\n", "# Randomly initialize weights\n", "w1 = np.random.randn(D_in, H)\n", "w2 = np.random.randn(H, D_out)\n", "\n", "learning_rate = 1e-6\n", "for t in range(5000):\n", " # Forward pass: compute predicted y\n", " h = x.dot(w1)\n", " h_relu = np.maximum(h, 0)\n", " y_pred = h_relu.dot(w2)\n", "\n", " # Compute and print loss\n", " loss = np.square(y_pred - y).sum()\n", " # print(t, loss)\n", "\n", " # Backprop to compute gradients of w1 and w2 with respect to loss\n", " grad_y_pred = 2.0 * (y_pred - y)\n", " grad_w2 = h_relu.T.dot(grad_y_pred)\n", " grad_h_relu = grad_y_pred.dot(w2.T)\n", " grad_h = grad_h_relu.copy()\n", " grad_h[h < 0] = 0\n", " grad_w1 = x.T.dot(grad_h)\n", "\n", " # Update weights\n", " w1 -= learning_rate * grad_w1\n", " w2 -= learning_rate * grad_w2\n", " \n", "print(loss)" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.74573608294304e-07\n", "Wall time: 7.42 s\n" ] } ], "source": [ "%%time\n", "\n", "import torch\n", "\n", "\n", "dtype = torch.float\n", "# device = torch.device(\"cpu\")\n", "device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n", "\n", "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "# Create random input and output data\n", "x = torch.randn(N, D_in, device=device, dtype=dtype)\n", "y = torch.randn(N, D_out, device=device, dtype=dtype)\n", "\n", "# Randomly initialize weights\n", "w1 = torch.randn(D_in, H, device=device, dtype=dtype)\n", "w2 = torch.randn(H, D_out, device=device, dtype=dtype)\n", "\n", "learning_rate = 1e-6\n", "for t in range(5000):\n", " # Forward pass: compute predicted y\n", " h = x.mm(w1)\n", " h_relu = h.clamp(min=0)\n", " y_pred = h_relu.mm(w2)\n", "\n", " # Compute and print loss\n", " loss = (y_pred - y).pow(2).sum().item()\n", " #if t % 100 == 99:\n", " # print(t, loss)\n", "\n", " # Backprop to compute gradients of w1 and w2 with respect to loss\n", " grad_y_pred = 2.0 * (y_pred - y)\n", " grad_w2 = h_relu.t().mm(grad_y_pred)\n", " grad_h_relu = grad_y_pred.mm(w2.t())\n", " grad_h = grad_h_relu.clone()\n", " grad_h[h < 0] = 0\n", " grad_w1 = x.t().mm(grad_h)\n", "\n", " # Update weights using gradient descent\n", " w1 -= learning_rate * grad_w1\n", " w2 -= learning_rate * grad_w2\n", "\n", "print(loss)\n", "# CPU 3.05 s\n", "# GPU 6.77 s" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.4651128544282983e-07\n", "Wall time: 7.52 s\n" ] } ], "source": [ "%%time\n", "\n", "import torch\n", "\n", "dtype = torch.float\n", "# device = torch.device(\"cpu\")\n", "device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n", "\n", "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "# Create random Tensors to hold input and outputs.\n", "# Setting requires_grad=False indicates that we do not need to compute gradients\n", "# with respect to these Tensors during the backward pass.\n", "x = torch.randn(N, D_in, device=device, dtype=dtype)\n", "y = torch.randn(N, D_out, device=device, dtype=dtype)\n", "\n", "# Create random Tensors for weights.\n", "# Setting requires_grad=True indicates that we want to compute gradients with\n", "# respect to these Tensors during the backward pass.\n", "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n", "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n", "\n", "learning_rate = 1e-6\n", "for t in range(5000):\n", " # Forward pass: compute predicted y using operations on Tensors; these\n", " # are exactly the same operations we used to compute the forward pass using\n", " # Tensors, but we do not need to keep references to intermediate values since\n", " # we are not implementing the backward pass by hand.\n", " y_pred = x.mm(w1).clamp(min=0).mm(w2)\n", "\n", " # Compute and print loss using operations on Tensors.\n", " # Now loss is a Tensor of shape (1,)\n", " # loss.item() gets the scalar value held in the loss.\n", " loss = (y_pred - y).pow(2).sum()\n", " #if t % 100 == 99:\n", " # print(t, loss.item())\n", "\n", " # Use autograd to compute the backward pass. This call will compute the\n", " # gradient of loss with respect to all Tensors with requires_grad=True.\n", " # After this call w1.grad and w2.grad will be Tensors holding the gradient\n", " # of the loss with respect to w1 and w2 respectively.\n", " loss.backward()\n", "\n", " # Manually update weights using gradient descent. Wrap in torch.no_grad()\n", " # because weights have requires_grad=True, but we don't need to track this\n", " # in autograd.\n", " # An alternative way is to operate on weight.data and weight.grad.data.\n", " # Recall that tensor.data gives a tensor that shares the storage with\n", " # tensor, but doesn't track history.\n", " # You can also use torch.optim.SGD to achieve this.\n", " with torch.no_grad():\n", " w1 -= learning_rate * w1.grad\n", " w2 -= learning_rate * w2.grad\n", "\n", " # Manually zero the gradients after updating weights\n", " w1.grad.zero_()\n", " w2.grad.zero_()\n", " \n", "print (loss.item())\n", "\n", "# CPU 5.7 s\n", "# GPU 8.87 s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Пример пакета nn" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0499554350951179e-11\n", "Wall time: 7.21 s\n" ] } ], "source": [ "%%time\n", "\n", "import torch\n", "\n", "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "# Create random Tensors to hold inputs and outputs\n", "x = torch.randn(N, D_in)\n", "y = torch.randn(N, D_out)\n", "\n", "# Use the nn package to define our model as a sequence of layers. nn.Sequential\n", "# is a Module which contains other Modules, and applies them in sequence to\n", "# produce its output. Each Linear Module computes output from input using a\n", "# linear function, and holds internal Tensors for its weight and bias.\n", "model = torch.nn.Sequential(\n", " torch.nn.Linear(D_in, H),\n", " torch.nn.ReLU(),\n", " torch.nn.Linear(H, D_out),\n", ")\n", "\n", "# The nn package also contains definitions of popular loss functions; in this\n", "# case we will use Mean Squared Error (MSE) as our loss function.\n", "loss_fn = torch.nn.MSELoss(reduction='sum')\n", "\n", "learning_rate = 1e-4\n", "for t in range(5000):\n", " # Forward pass: compute predicted y by passing x to the model. Module objects\n", " # override the __call__ operator so you can call them like functions. When\n", " # doing so you pass a Tensor of input data to the Module and it produces\n", " # a Tensor of output data.\n", " y_pred = model(x)\n", "\n", " # Compute and print loss. We pass Tensors containing the predicted and true\n", " # values of y, and the loss function returns a Tensor containing the\n", " # loss.\n", " loss = loss_fn(y_pred, y)\n", " #if t % 100 == 99:\n", " # print(t, loss.item())\n", "\n", " # Zero the gradients before running the backward pass.\n", " model.zero_grad()\n", "\n", " # Backward pass: compute gradient of the loss with respect to all the learnable\n", " # parameters of the model. Internally, the parameters of each Module are stored\n", " # in Tensors with requires_grad=True, so this call will compute gradients for\n", " # all learnable parameters in the model.\n", " loss.backward()\n", "\n", " # Update the weights using gradient descent. Each parameter is a Tensor, so\n", " # we can access its gradients like we did before.\n", " with torch.no_grad():\n", " for param in model.parameters():\n", " param -= learning_rate * param.grad\n", " \n", "print(loss.item())\n", "# CPU 8.05 s" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0213095993916177e-11\n", "Wall time: 10.3 s\n" ] } ], "source": [ "%%time\n", "\n", "import torch\n", "\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "x = torch.randn(N, D_in)\n", "y = torch.randn(N, D_out)\n", "\n", "model = torch.nn.Sequential(\n", " torch.nn.Linear(D_in, H),\n", " torch.nn.ReLU(),\n", " torch.nn.Linear(H, D_out),\n", ")\n", "\n", "loss_fn = torch.nn.MSELoss(reduction='sum')\n", "\n", "\n", "learning_rate = 1e-4\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", "\n", "for t in range(5000):\n", " y_pred = model(x)\n", "\n", " loss = loss_fn(y_pred, y)\n", " # if t % 100 == 99:\n", " # print(t, loss.item())\n", "\n", " optimizer.zero_grad()\n", "\n", " loss.backward()\n", "\n", " optimizer.step()\n", " \n", "print(loss.item())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## отдельные Log-регрессии и одна многомерная\n", "\n", "* отдельные - очень медленно на 1 GPU" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [], "source": [ "class MyOneModel(nn.Module):\n", " def __init__(self, in_features, out_features):\n", " super(MyOneModel, self).__init__()\n", " self.bn1 = nn.BatchNorm1d(in_features)\n", " self.do1 = nn.Dropout(0.3)\n", " self.ln1 = nn.Linear(in_features, out_features) # nn.utils.weight_norm(nn.Linear(n1, n2))\n", " self.ph1 = nn.Sigmoid()\n", " \n", " def forward(self, x):\n", " z = self.ph1(self.ln1(self.do1(self.bn1(x)))) # \n", " return z\n", " \n", "class MyModel(nn.Module):\n", " def __init__(self, in_features, out_features):\n", " super(MyModel, self).__init__()\n", " # self.out_features = out_features\n", " self.mymodules = nn.ModuleList([MyOneModel(in_features, 1) for _ in range(out_features)])\n", " # self.parameters = nn.ParameterList([f.parameters for f in self.mymodules])\n", " \n", " def forward(self, x):\n", " z = torch.cat([f(x) for f in self.mymodules], axis=1)\n", " # print(z.size(), self.mymodules[0](x).size())\n", " return z" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }