{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "41ac809a-f7c9-4a9d-9a6e-1be18bf49fa8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from lets_plot import *\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "id": "b197f201-0c65-4c5d-9e74-5a8f3fae3d20", "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)" ] }, { "cell_type": "code", "execution_count": 3, "id": "9cb90585-0450-4b1d-b7bf-5e8b55162ecb", "metadata": {}, "outputs": [], "source": [ "n = 50\n", "x = np.arange(n)\n", "y = x + np.random.normal(scale=10, size=n)\n" ] }, { "cell_type": "markdown", "id": "c3fb2929-89e5-4c6a-aa81-3a2e1afe66db", "metadata": {}, "source": [ "#### Default" ] }, { "cell_type": "code", "execution_count": 4, "id": "6ffbffee-5705-432e-b7f4-02103ee870ea", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_smooth(labels = smooth_labels()) " ] }, { "cell_type": "markdown", "id": "06e00213-fdae-41bf-af7c-c5fd1c785395", "metadata": {}, "source": [ "#### This is the equivalent to" ] }, { "cell_type": "code", "execution_count": 5, "id": "d02b7432-cb91-4a70-bdb6-725655bd112b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_smooth() + geom_stat_r2(labels = smooth_labels())" ] }, { "cell_type": "markdown", "id": "a6223b50-5a6f-4076-8165-36fe291dbc53", "metadata": {}, "source": [ "#### You can use `geom_stat_r2()` without `geom_smooth()`." ] }, { "cell_type": "code", "execution_count": 6, "id": "fcbedaa1-b29c-4f8b-994f-9a893f493ed7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_stat_r2(labels = smooth_labels())" ] }, { "cell_type": "markdown", "id": "561f8c5f-f0a2-4d85-b1e1-49483ef1edd9", "metadata": {}, "source": [ "#### `eq` instead default `r2`" ] }, { "cell_type": "code", "execution_count": 7, "id": "2ed51740-aba6-4b73-bed2-cea5e39bb052", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_smooth(labels = smooth_labels().eq()) " ] }, { "cell_type": "markdown", "id": "f7490f4e-ff85-4654-9026-6430d22c6882", "metadata": {}, "source": [ "#### R2 with format" ] }, { "cell_type": "code", "execution_count": 8, "id": "a3e1e255-34db-429a-a101-9002f86ffa74", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_smooth(labels = smooth_labels().rr(format = '{.1f}')) " ] }, { "cell_type": "markdown", "id": "2b17fafb-b3ed-47cf-bbcf-faf9418c2fc8", "metadata": {}, "source": [ "#### `eq` with format" ] }, { "cell_type": "code", "execution_count": 9, "id": "fc415d37-e08f-4a47-a69d-76dea37fdcb6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + geom_smooth(labels = smooth_labels().eq(format = '{.2f}')) " ] }, { "cell_type": "markdown", "id": "804aa503-117e-42d4-bece-f9b64c2a9e63", "metadata": {}, "source": [ "#### You can replace the left-hand side of the formula or disable it." ] }, { "cell_type": "code", "execution_count": 10, "id": "8d93babd-3779-4b3c-b7e3-9febb0b54d29", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot = ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point()\n", "gggrid([\n", " plot + geom_smooth(labels = smooth_labels().eq(with_lhs = 'f(x)')), \n", " plot + geom_smooth(labels = smooth_labels().eq(with_lhs = None)), \n", "])\n", " " ] }, { "cell_type": "markdown", "id": "1fd3f64f-0690-457f-9017-00d336589b8a", "metadata": {}, "source": [ "#### You can replace the right-hand side of the formula" ] }, { "cell_type": "code", "execution_count": 11, "id": "bde0caf0-3de4-41e9-99e2-b3b0ca0e1341", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point() + \\\n", " geom_smooth(labels = smooth_labels().eq(rhs='t')) " ] }, { "cell_type": "code", "execution_count": 12, "id": "3d5c1e34-684e-4b62-aa15-55df21d2640f", "metadata": {}, "outputs": [], "source": [ "n = 100\n", "x = np.linspace(-2, 2, n)\n", "y = x ** 2 + np.random.normal(size=n)" ] }, { "cell_type": "code", "execution_count": 13, "id": "569c0768-e61f-43a3-b3f9-500ba032d186", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " geom_point() + \\\n", " geom_smooth(color='red', deg=2, se=False, \n", " labels=smooth_labels()\n", " .eq(with_lhs = 'f(x)', rhs = 't', format='{.2f}')\n", " .rr('{.3f}'), \n", " label_x='center', \n", " label_y='middle')" ] }, { "cell_type": "code", "execution_count": 14, "id": "c31415e1-f230-42c1-b44f-e99201535190", "metadata": {}, "outputs": [], "source": [ "t = np.linspace(0, 1, 100)\n", "mean = 1 + np.zeros(2)\n", "cov = np.eye(2)\n", "x, y = np.random.multivariate_normal(mean, cov, t.size).T\n", "df = pd.DataFrame({'t': t, 'x': x, 'y': y})\n", "df = df.melt(id_vars=['t'], value_vars=['x', 'y'])" ] }, { "cell_type": "code", "execution_count": 15, "id": "5558f870-e320-4e5a-8bf5-5f305e17e3e0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(x='t', y='value', group='variable')) + \\\n", " geom_point(aes(color='variable'), size=3, alpha=.5) + \\\n", " geom_smooth(aes(color='variable'), size=1, span=.3, deg=5, level=.7, seed=42, labels = smooth_labels().eq().rr())" ] }, { "cell_type": "code", "execution_count": 16, "id": "2e881c41-330e-4d3e-bfb2-ea40a887b0f9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def generate_exact_parabola(n_points=50, a=1, b=0, c=0):\n", " \"\"\"\n", " Генерирует точные координаты параболы y = ax^2 + bx + c.\n", " Без шума.\n", " \"\"\"\n", " # 1. Генерируем X (равномерно от -10 до 10)\n", " x = np.linspace(-10, 10, n_points)\n", " \n", " # 2. Вычисляем Y строго по формуле\n", " y = a * x**2 + b * x + c\n", " \n", " # 3. Упаковываем в DataFrame\n", " df = pd.DataFrame({\n", " 'x': x,\n", " 'y': y\n", " })\n", " \n", " return df\n", "\n", "# --- Параметры параболы ---\n", "# Например: y = 2x^2 - 4x + 1\n", "a_true = 2\n", "b_true = -4\n", "c_true = 1\n", "\n", "# Генерация данных\n", "df = generate_exact_parabola(n_points=130, a=a_true, b=b_true, c=c_true)\n", "\n", "# --- Визуализация ---\n", "plot = (\n", " ggplot(df, aes(x='x', y='y')) + \n", " \n", " # Слой линии (показывает идеальную форму)\n", " geom_line(color='#3498db', size=1.0) +\n", " \n", " # Слой точек (показывает конкретные сэмплы данных, которые пойдут в эстиматор)\n", " geom_point(size=4, color='#e74c3c', shape=1) +\n", " \n", " # Оформление\n", " ggtitle(f'Точные данные: y = {a_true}x² + ({b_true})x + {c_true}') +\n", " labs(x='Аргумент (X)', y='Значение функции (Y)') +\n", " theme_minimal()\n", ")\n", "\n", "plot.show()\n", "\n", "ggplot(df, aes(x='x', y='y')) + geom_smooth(deg=2, labels = smooth_labels().eq(), label_x='center') + geom_point()" ] }, { "cell_type": "code", "execution_count": null, "id": "e4b6d773-fc71-4cf0-947b-d63325ae8ffe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.20" } }, "nbformat": 4, "nbformat_minor": 5 }