{ "cells": [ { "cell_type": "markdown", "id": "6b79fd3a-ad11-4f23-97b1-c79a8b41e4c8", "metadata": {}, "source": [ "# Инициализация и получение данных\n", "\n", "Ноутбук демонстрирует пример запроса и работы с данными [MyTracker](https://tracker.my.com).\n", "\n", "> **Внимание!** Чтобы всё заработало, в ячейку ниже вам необходио вписать UserId и SecretKey! Получить их можно в личном кабинете, если у вас есть права на экспорт сырых данных. Пожалуйста, проконсультируйтесь с [документацией](https://docs.tracker.my.com/api/export-api/access/), как сделать это.\n", "\n", "Таймстампы все в UTC исходно, мы их приводим в Europe/Moscow и специально даём суффикс полям `_MSK`!" ] }, { "cell_type": "code", "execution_count": 27, "id": "35501613-b7b5-4e08-a966-37e8efd53ba0", "metadata": {}, "outputs": [], "source": [ "API_USER_ID = 49759\n", "API_SECRET_KEY = 'qwert'" ] }, { "cell_type": "code", "execution_count": 2, "id": "bdf4f71b-fadf-4991-a266-d4dcf499ce66", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: mytracker_export_api in /opt/conda/lib/python3.11/site-packages (1.0.5)\n", "Requirement already satisfied: requests>=2.20.0 in /opt/conda/lib/python3.11/site-packages (from mytracker_export_api) (2.31.0)\n", "Requirement already satisfied: pandas>=1.1.0 in /opt/conda/lib/python3.11/site-packages (from mytracker_export_api) (2.0.3)\n", "Requirement already satisfied: urllib3<2,>=1.26.0 in /opt/conda/lib/python3.11/site-packages (from mytracker_export_api) (1.26.19)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.1.0->mytracker_export_api) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.1.0->mytracker_export_api) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.1.0->mytracker_export_api) (2023.3)\n", "Requirement already satisfied: numpy>=1.21.0 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.1.0->mytracker_export_api) (1.24.4)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests>=2.20.0->mytracker_export_api) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests>=2.20.0->mytracker_export_api) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests>=2.20.0->mytracker_export_api) (2023.5.7)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.1.0->mytracker_export_api) (1.16.0)\n", "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install mytracker_export_api" ] }, { "cell_type": "code", "execution_count": 3, "id": "1b5acf9f-fcf2-4063-8cf0-09ca77912e73", "metadata": {}, "outputs": [], "source": [ "from mytracker_export_api import MyTracker\n", "params = {\n", " 'dateFrom': '2024-06-24',\n", " 'dateTo': '2024-06-25',\n", " 'selectors': 'eventName,idAppVersionTitle,idOsTitle,tsEvent,tsInsert,idDevice,params.name,params.value,customUserId',\n", " 'idApp': 61475,\n", "# 'idApp': '61389,61390,61475,61476,80840,80842,80843,95584',\n", " 'event': 'customEvents'\n", "}\n", "client = MyTracker(api_user_id=API_USER_ID, api_secret_key=API_SECRET_KEY)\n", "df = client.get_raw_data(params)" ] }, { "cell_type": "code", "execution_count": 4, "id": "faebbf0a-68e9-45d8-8969-6fc288ca998e", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params.name</th>\n", " <th>params.value</th>\n", " <th>customUserId</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296521</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296530</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296778</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296906</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>mt_login</td>\n", " <td>2.20.0-02c55807</td>\n", " <td>Android</td>\n", " <td>1719306146</td>\n", " <td>1719312270</td>\n", " <td>86868404654</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[04203afa-27b3-4844-95da-8238bf69df4a,2fed9ec3...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " eventName idAppVersionTitle idOsTitle tsEvent tsInsert idDevice \\\n", "0 mt_login 2.19.2-d1a60611 Android 1719296521 1719301344 52344631831 \n", "1 mt_login 2.19.2-d1a60611 Android 1719296530 1719301344 52344631831 \n", "2 mt_login 2.19.1-08f155b8 Android 1719296778 1719301344 52344631831 \n", "3 mt_login 2.19.1-08f155b8 Android 1719296906 1719301344 52344631831 \n", "4 mt_login 2.20.0-02c55807 Android 1719306146 1719312270 86868404654 \n", "\n", " params.name params.value customUserId \n", "0 [] [] [f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93... \n", "1 [] [] [f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93... \n", "2 [] [] [432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a... \n", "3 [] [] [432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a... \n", "4 [] [] [04203afa-27b3-4844-95da-8238bf69df4a,2fed9ec3... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "6cc5d5ab-3fd7-47a6-94ba-1986ccebff6e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params.name</th>\n", " <th>params.value</th>\n", " <th>customUserId</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296521</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296530</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296778</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296906</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>mt_login</td>\n", " <td>2.20.0-02c55807</td>\n", " <td>Android</td>\n", " <td>1719306146</td>\n", " <td>1719312270</td>\n", " <td>86868404654</td>\n", " <td>[]</td>\n", " <td>[]</td>\n", " <td>[04203afa-27b3-4844-95da-8238bf69df4a,2fed9ec3...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " eventName idAppVersionTitle idOsTitle tsEvent tsInsert idDevice \\\n", "0 mt_login 2.19.2-d1a60611 Android 1719296521 1719301344 52344631831 \n", "1 mt_login 2.19.2-d1a60611 Android 1719296530 1719301344 52344631831 \n", "2 mt_login 2.19.1-08f155b8 Android 1719296778 1719301344 52344631831 \n", "3 mt_login 2.19.1-08f155b8 Android 1719296906 1719301344 52344631831 \n", "4 mt_login 2.20.0-02c55807 Android 1719306146 1719312270 86868404654 \n", "\n", " params.name params.value customUserId \n", "0 [] [] [f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93... \n", "1 [] [] [f2fa6186-1c44-4f82-b62b-34ac023d17b1,a5086f93... \n", "2 [] [] [432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a... \n", "3 [] [] [432b4ca1-82f3-4cf7-a714-f08adfdb4343,27d6f62a... \n", "4 [] [] [04203afa-27b3-4844-95da-8238bf69df4a,2fed9ec3... " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(df.head())" ] }, { "cell_type": "markdown", "id": "ecec118b-58cc-44ea-98c7-3e09603b5dfa", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Предобработка\n", "Конвертация дат, фильтрация" ] }, { "cell_type": "markdown", "id": "5378ddb8-2e0a-45cd-84fc-4dd40cf46376", "metadata": {}, "source": [ "## Convert params to dict" ] }, { "cell_type": "code", "execution_count": 6, "id": "342ec60b-3ce7-4ac0-a166-d3b1f9be79f6", "metadata": {}, "outputs": [], "source": [ "import ast\n", "df['params'] = df.apply(lambda it: dict(zip(ast.literal_eval(it['params.name']), ast.literal_eval(it['params.value']))), axis=1)\n", "df.drop(columns=['params.name', 'params.value'], axis=1, inplace=True)" ] }, { "cell_type": "markdown", "id": "10c5761d-4504-4bdd-a9f1-bd479c9cf3eb", "metadata": {}, "source": [ "## Extract account_id" ] }, { "cell_type": "code", "execution_count": 7, "id": "f1ec9e4f-8b6b-45c7-acc5-c4cc66bd3fad", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params</th>\n", " <th>account_id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296521</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>{}</td>\n", " <td>f2fa6186-1c44-4f82-b62b-34ac023d17b1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>mt_login</td>\n", " <td>2.19.2-d1a60611</td>\n", " <td>Android</td>\n", " <td>1719296530</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>{}</td>\n", " <td>f2fa6186-1c44-4f82-b62b-34ac023d17b1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296778</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>{}</td>\n", " <td>432b4ca1-82f3-4cf7-a714-f08adfdb4343</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>mt_login</td>\n", " <td>2.19.1-08f155b8</td>\n", " <td>Android</td>\n", " <td>1719296906</td>\n", " <td>1719301344</td>\n", " <td>52344631831</td>\n", " <td>{}</td>\n", " <td>432b4ca1-82f3-4cf7-a714-f08adfdb4343</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>mt_login</td>\n", " <td>2.20.0-02c55807</td>\n", " <td>Android</td>\n", " <td>1719306146</td>\n", " <td>1719312270</td>\n", " <td>86868404654</td>\n", " <td>{}</td>\n", " <td>04203afa-27b3-4844-95da-8238bf69df4a</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>7664</th>\n", " <td>swipe</td>\n", " <td>2.20.0-ddc6a301</td>\n", " <td>Android</td>\n", " <td>1719309083</td>\n", " <td>1719317825</td>\n", " <td>70284455492</td>\n", " <td>{'code': 'LOYALTY_BALANCE.ACCOUNTS_WIDGETS', '...</td>\n", " <td>43807c7d-14c9-41d0-9f0d-3041524180e6</td>\n", " </tr>\n", " <tr>\n", " <th>7665</th>\n", " <td>swipe</td>\n", " <td>2.20.0-ddc6a301</td>\n", " <td>Android</td>\n", " <td>1719309089</td>\n", " <td>1719317825</td>\n", " <td>70284455492</td>\n", " <td>{'code': 'LOYALTY_BALANCE.TRANSACTION_FILTERS'...</td>\n", " <td>43807c7d-14c9-41d0-9f0d-3041524180e6</td>\n", " </tr>\n", " <tr>\n", " <th>7666</th>\n", " <td>swipe</td>\n", " <td>2.20.0-ddc6a301</td>\n", " <td>Android</td>\n", " <td>1719314829</td>\n", " <td>1719318680</td>\n", " <td>70284455492</td>\n", " <td>{'code': 'PROFILE.PROFILE', 'name': 'Profile',...</td>\n", " <td>43807c7d-14c9-41d0-9f0d-3041524180e6</td>\n", " </tr>\n", " <tr>\n", " <th>7667</th>\n", " <td>swipe</td>\n", " <td>2.20.0-ddc6a301</td>\n", " <td>Android</td>\n", " <td>1719314830</td>\n", " <td>1719318680</td>\n", " <td>70284455492</td>\n", " <td>{'code': 'PROFILE.PROFILE', 'name': 'Profile',...</td>\n", " <td>43807c7d-14c9-41d0-9f0d-3041524180e6</td>\n", " </tr>\n", " <tr>\n", " <th>7668</th>\n", " <td>swipe</td>\n", " <td>2.20.0-ddc6a301</td>\n", " <td>Android</td>\n", " <td>1719314831</td>\n", " <td>1719318680</td>\n", " <td>70284455492</td>\n", " <td>{'code': 'PROFILE.PROFILE', 'name': 'Profile',...</td>\n", " <td>43807c7d-14c9-41d0-9f0d-3041524180e6</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>7669 rows × 8 columns</p>\n", "</div>" ], "text/plain": [ " eventName idAppVersionTitle idOsTitle tsEvent tsInsert \\\n", "0 mt_login 2.19.2-d1a60611 Android 1719296521 1719301344 \n", "1 mt_login 2.19.2-d1a60611 Android 1719296530 1719301344 \n", "2 mt_login 2.19.1-08f155b8 Android 1719296778 1719301344 \n", "3 mt_login 2.19.1-08f155b8 Android 1719296906 1719301344 \n", "4 mt_login 2.20.0-02c55807 Android 1719306146 1719312270 \n", "... ... ... ... ... ... \n", "7664 swipe 2.20.0-ddc6a301 Android 1719309083 1719317825 \n", "7665 swipe 2.20.0-ddc6a301 Android 1719309089 1719317825 \n", "7666 swipe 2.20.0-ddc6a301 Android 1719314829 1719318680 \n", "7667 swipe 2.20.0-ddc6a301 Android 1719314830 1719318680 \n", "7668 swipe 2.20.0-ddc6a301 Android 1719314831 1719318680 \n", "\n", " idDevice params \\\n", "0 52344631831 {} \n", "1 52344631831 {} \n", "2 52344631831 {} \n", "3 52344631831 {} \n", "4 86868404654 {} \n", "... ... ... \n", "7664 70284455492 {'code': 'LOYALTY_BALANCE.ACCOUNTS_WIDGETS', '... \n", "7665 70284455492 {'code': 'LOYALTY_BALANCE.TRANSACTION_FILTERS'... \n", "7666 70284455492 {'code': 'PROFILE.PROFILE', 'name': 'Profile',... \n", "7667 70284455492 {'code': 'PROFILE.PROFILE', 'name': 'Profile',... \n", "7668 70284455492 {'code': 'PROFILE.PROFILE', 'name': 'Profile',... \n", "\n", " account_id \n", "0 f2fa6186-1c44-4f82-b62b-34ac023d17b1 \n", "1 f2fa6186-1c44-4f82-b62b-34ac023d17b1 \n", "2 432b4ca1-82f3-4cf7-a714-f08adfdb4343 \n", "3 432b4ca1-82f3-4cf7-a714-f08adfdb4343 \n", "4 04203afa-27b3-4844-95da-8238bf69df4a \n", "... ... \n", "7664 43807c7d-14c9-41d0-9f0d-3041524180e6 \n", "7665 43807c7d-14c9-41d0-9f0d-3041524180e6 \n", "7666 43807c7d-14c9-41d0-9f0d-3041524180e6 \n", "7667 43807c7d-14c9-41d0-9f0d-3041524180e6 \n", "7668 43807c7d-14c9-41d0-9f0d-3041524180e6 \n", "\n", "[7669 rows x 8 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def exctr_account_id(s: str):\n", " parts = re.split('[\\[\\,]+', str(s))\n", " try:\n", " return parts[1]\n", " except IndexError:\n", " return ''\n", "\n", "import re\n", "#df['account_id'] = df['customUserId'].apply(lambda it: re.split('[\\[\\,]+', it))\n", "df['account_id'] = df['customUserId'].apply(exctr_account_id)\n", "df.drop(columns=['customUserId'], axis=1, inplace=True)\n", "df" ] }, { "cell_type": "markdown", "id": "29bf192e-c93e-494c-84f6-df3ed6313ec6", "metadata": {}, "source": [ "## Преобразование timestampt в дату и время" ] }, { "cell_type": "code", "execution_count": 8, "id": "bb2655f8-97ba-4d62-b0e0-8840e6956bbb", "metadata": { "scrolled": true }, "outputs": [], "source": [ "import pandas as pd\n", "# tz_localize/tz_convert by https://stackoverflow.com/questions/42826388/using-time-zone-in-pandas-to-datetime/42826430#42826430\n", "#df['eventDT'] = pd.to_datetime(df['tsEvent'], unit='s').dt.tz_localize('UTC')\n", "df['eventDT_MSK'] = pd.to_datetime(df['tsEvent'], unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Moscow')\n", "df['eventInsertDT_MSK'] = pd.to_datetime(df['tsInsert'], unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Moscow')" ] }, { "cell_type": "markdown", "id": "60a01d99-dafb-437e-8588-5fbcd12f821b", "metadata": {}, "source": [ "### Считаем время \"долёта\" событий" ] }, { "cell_type": "code", "execution_count": 9, "id": "ef31683c-01dc-436e-920a-b948678d6d23", "metadata": {}, "outputs": [], "source": [ "df['ts_diff'] = df['eventInsertDT_MSK'] - df['eventDT_MSK']" ] }, { "cell_type": "markdown", "id": "f9e2c3df-759c-4cfb-9dd2-1a7bd7e9ac5a", "metadata": {}, "source": [ "### Сортируем по времени событий" ] }, { "cell_type": "code", "execution_count": 10, "id": "86b85fc0-40e2-47bd-8e4a-5ec219de90bb", "metadata": {}, "outputs": [], "source": [ "df.sort_values(by=['tsEvent'], inplace=True)" ] }, { "cell_type": "markdown", "id": "2392bce6-b47b-4102-a3f0-aabc5b8d517d", "metadata": {}, "source": [ "# Фильтрация и примеры выборки" ] }, { "cell_type": "markdown", "id": "23bc5555-a217-44d6-a67e-7c8af6c3aff2", "metadata": {}, "source": [ "## Фильтрация по имени события eventName" ] }, { "cell_type": "code", "execution_count": 11, "id": "f9b36c21-ac07-4851-828c-2775db14c2db", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params</th>\n", " <th>account_id</th>\n", " <th>eventDT_MSK</th>\n", " <th>eventInsertDT_MSK</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "Empty DataFrame\n", "Columns: [eventName, idAppVersionTitle, idOsTitle, tsEvent, tsInsert, idDevice, params, account_id, eventDT_MSK, eventInsertDT_MSK, ts_diff]\n", "Index: []" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['_MyTrackerIdTestEventDEBUG' == df['eventName']]" ] }, { "cell_type": "markdown", "id": "963549a8-9162-4aaa-b518-358de3928807", "metadata": {}, "source": [ "## Фильтрация по account_id" ] }, { "cell_type": "code", "execution_count": 14, "id": "1444e3b1-fddc-4841-ac3d-30489a42e1b0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params</th>\n", " <th>account_id</th>\n", " <th>eventDT_MSK</th>\n", " <th>eventInsertDT_MSK</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "Empty DataFrame\n", "Columns: [eventName, idAppVersionTitle, idOsTitle, tsEvent, tsInsert, idDevice, params, account_id, eventDT_MSK, eventInsertDT_MSK, ts_diff]\n", "Index: []" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['262c20d2-2d71-40bf-aa86-908f27e08c48' == df['account_id']]" ] }, { "cell_type": "markdown", "id": "5941b0ee-d503-4d63-9925-7c5be11037eb", "metadata": {}, "source": [ "## Фильтрация по номеру девайса" ] }, { "cell_type": "code", "execution_count": 24, "id": "e127f588-914a-4733-8bab-c9b1fea46227", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params</th>\n", " <th>account_id</th>\n", " <th>eventDT_MSK</th>\n", " <th>eventInsertDT_MSK</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "Empty DataFrame\n", "Columns: [eventName, idAppVersionTitle, idOsTitle, tsEvent, tsInsert, idDevice, params, account_id, eventDT_MSK, eventInsertDT_MSK, ts_diff]\n", "Index: []" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[1234 == df['idDevice']]" ] }, { "cell_type": "markdown", "id": "3005c0d6-b5fb-4fff-a77b-5e8c62ccd6f6", "metadata": {}, "source": [ "## Фильтрация по двум параметрам - девайсу И событию вместе" ] }, { "cell_type": "code", "execution_count": null, "id": "1353c691-ba47-4c8c-b4e6-62ae08dcfebf", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df[(df['eventName'] == 'press') & (1234 == df['idDevice'])]" ] }, { "cell_type": "markdown", "id": "f75dad59-f353-4127-bf38-59e2560c8cd9", "metadata": {}, "source": [ "## Фильтрация по ИЛИ - несколько событий" ] }, { "cell_type": "code", "execution_count": 12, "id": "cc455f9e-334d-4346-84e1-8f9c786d5779", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>params</th>\n", " <th>account_id</th>\n", " <th>eventDT_MSK</th>\n", " <th>eventInsertDT_MSK</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "Empty DataFrame\n", "Columns: [eventName, idAppVersionTitle, idOsTitle, tsEvent, tsInsert, idDevice, params, account_id, eventDT_MSK, eventInsertDT_MSK, ts_diff]\n", "Index: []" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[ ('_MyTrackerIdTestEventDEBUG' == df['eventName']) | ('_MyTrackerIdTestEventDEBUG_BeforeInitMyTracker' == df['eventName']) ]" ] }, { "cell_type": "markdown", "id": "f22d6b23-d2b9-42a1-bf42-c8af8fff3b48", "metadata": {}, "source": [ "## Примеры вывода и форматирования" ] }, { "cell_type": "markdown", "id": "83975805-85f0-48bf-848f-a5503aef42c9", "metadata": {}, "source": [ "## Вывод определённых (выбранных) столбцов" ] }, { "cell_type": "code", "execution_count": 20, "id": "6b900db7-a92a-4a00-94af-7833744b03cd", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>account_id</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>4713</th>\n", " <td>close</td>\n", " <td>b59243b4-9581-4f09-9844-1e6572d8622d</td>\n", " <td>0 days 04:51:02</td>\n", " </tr>\n", " <tr>\n", " <th>4991</th>\n", " <td>press</td>\n", " <td>b59243b4-9581-4f09-9844-1e6572d8622d</td>\n", " <td>0 days 04:50:56</td>\n", " </tr>\n", " <tr>\n", " <th>4468</th>\n", " <td>channels_channel_open</td>\n", " <td>b59243b4-9581-4f09-9844-1e6572d8622d</td>\n", " <td>0 days 04:50:56</td>\n", " </tr>\n", " <tr>\n", " <th>4842</th>\n", " <td>main_open</td>\n", " <td>b59243b4-9581-4f09-9844-1e6572d8622d</td>\n", " <td>0 days 00:41:15</td>\n", " </tr>\n", " <tr>\n", " <th>4843</th>\n", " <td>main_open</td>\n", " <td>b59243b4-9581-4f09-9844-1e6572d8622d</td>\n", " <td>0 days 00:40:45</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>5739</th>\n", " <td>swipe</td>\n", " <td>c356eb4c-5a33-48e2-b447-963ff0064328</td>\n", " <td>0 days 00:46:48</td>\n", " </tr>\n", " <tr>\n", " <th>5740</th>\n", " <td>swipe</td>\n", " <td>c356eb4c-5a33-48e2-b447-963ff0064328</td>\n", " <td>0 days 00:46:47</td>\n", " </tr>\n", " <tr>\n", " <th>5722</th>\n", " <td>main_open</td>\n", " <td>c356eb4c-5a33-48e2-b447-963ff0064328</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " <tr>\n", " <th>5726</th>\n", " <td>open</td>\n", " <td>c356eb4c-5a33-48e2-b447-963ff0064328</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " <tr>\n", " <th>5730</th>\n", " <td>radio_widget</td>\n", " <td>c356eb4c-5a33-48e2-b447-963ff0064328</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>7669 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " eventName account_id \\\n", "4713 close b59243b4-9581-4f09-9844-1e6572d8622d \n", "4991 press b59243b4-9581-4f09-9844-1e6572d8622d \n", "4468 channels_channel_open b59243b4-9581-4f09-9844-1e6572d8622d \n", "4842 main_open b59243b4-9581-4f09-9844-1e6572d8622d \n", "4843 main_open b59243b4-9581-4f09-9844-1e6572d8622d \n", "... ... ... \n", "5739 swipe c356eb4c-5a33-48e2-b447-963ff0064328 \n", "5740 swipe c356eb4c-5a33-48e2-b447-963ff0064328 \n", "5722 main_open c356eb4c-5a33-48e2-b447-963ff0064328 \n", "5726 open c356eb4c-5a33-48e2-b447-963ff0064328 \n", "5730 radio_widget c356eb4c-5a33-48e2-b447-963ff0064328 \n", "\n", " ts_diff \n", "4713 0 days 04:51:02 \n", "4991 0 days 04:50:56 \n", "4468 0 days 04:50:56 \n", "4842 0 days 00:41:15 \n", "4843 0 days 00:40:45 \n", "... ... \n", "5739 0 days 00:46:48 \n", "5740 0 days 00:46:47 \n", "5722 0 days 00:46:41 \n", "5726 0 days 00:46:41 \n", "5730 0 days 00:46:41 \n", "\n", "[7669 rows x 3 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Александр Петров дев\n", "df[['eventName', 'account_id', 'ts_diff']]" ] }, { "cell_type": "markdown", "id": "605b07b6-6bc6-423f-8d73-4efdafeca747", "metadata": {}, "source": [ "## Вывод всех, кроме выбранных столбцов" ] }, { "cell_type": "code", "execution_count": 25, "id": "e85c1185-4f1a-4ceb-93b1-2ef046f86fab", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>eventName</th>\n", " <th>idAppVersionTitle</th>\n", " <th>idOsTitle</th>\n", " <th>tsEvent</th>\n", " <th>tsInsert</th>\n", " <th>idDevice</th>\n", " <th>eventDT_MSK</th>\n", " <th>eventInsertDT_MSK</th>\n", " <th>ts_diff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>4713</th>\n", " <td>close</td>\n", " <td>3.0.0</td>\n", " <td>Android</td>\n", " <td>1719196195</td>\n", " <td>1719213657</td>\n", " <td>46509549178</td>\n", " <td>2024-06-24 05:29:55+03:00</td>\n", " <td>2024-06-24 10:20:57+03:00</td>\n", " <td>0 days 04:51:02</td>\n", " </tr>\n", " <tr>\n", " <th>4991</th>\n", " <td>press</td>\n", " <td>3.0.0</td>\n", " <td>Android</td>\n", " <td>1719196201</td>\n", " <td>1719213657</td>\n", " <td>46509549178</td>\n", " <td>2024-06-24 05:30:01+03:00</td>\n", " <td>2024-06-24 10:20:57+03:00</td>\n", " <td>0 days 04:50:56</td>\n", " </tr>\n", " <tr>\n", " <th>4468</th>\n", " <td>channels_channel_open</td>\n", " <td>3.0.0</td>\n", " <td>Android</td>\n", " <td>1719196201</td>\n", " <td>1719213657</td>\n", " <td>46509549178</td>\n", " <td>2024-06-24 05:30:01+03:00</td>\n", " <td>2024-06-24 10:20:57+03:00</td>\n", " <td>0 days 04:50:56</td>\n", " </tr>\n", " <tr>\n", " <th>4842</th>\n", " <td>main_open</td>\n", " <td>3.0.0</td>\n", " <td>Android</td>\n", " <td>1719211182</td>\n", " <td>1719213657</td>\n", " <td>46509549178</td>\n", " <td>2024-06-24 09:39:42+03:00</td>\n", " <td>2024-06-24 10:20:57+03:00</td>\n", " <td>0 days 00:41:15</td>\n", " </tr>\n", " <tr>\n", " <th>4843</th>\n", " <td>main_open</td>\n", " <td>3.0.0</td>\n", " <td>Android</td>\n", " <td>1719211212</td>\n", " <td>1719213657</td>\n", " <td>46509549178</td>\n", " <td>2024-06-24 09:40:12+03:00</td>\n", " <td>2024-06-24 10:20:57+03:00</td>\n", " <td>0 days 00:40:45</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>5739</th>\n", " <td>swipe</td>\n", " <td>2.20.0-61b7e141</td>\n", " <td>Android</td>\n", " <td>1719327881</td>\n", " <td>1719330689</td>\n", " <td>86898910861</td>\n", " <td>2024-06-25 18:04:41+03:00</td>\n", " <td>2024-06-25 18:51:29+03:00</td>\n", " <td>0 days 00:46:48</td>\n", " </tr>\n", " <tr>\n", " <th>5740</th>\n", " <td>swipe</td>\n", " <td>2.20.0-61b7e141</td>\n", " <td>Android</td>\n", " <td>1719327882</td>\n", " <td>1719330689</td>\n", " <td>86898910861</td>\n", " <td>2024-06-25 18:04:42+03:00</td>\n", " <td>2024-06-25 18:51:29+03:00</td>\n", " <td>0 days 00:46:47</td>\n", " </tr>\n", " <tr>\n", " <th>5722</th>\n", " <td>main_open</td>\n", " <td>2.20.0-61b7e141</td>\n", " <td>Android</td>\n", " <td>1719327888</td>\n", " <td>1719330689</td>\n", " <td>86898910861</td>\n", " <td>2024-06-25 18:04:48+03:00</td>\n", " <td>2024-06-25 18:51:29+03:00</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " <tr>\n", " <th>5726</th>\n", " <td>open</td>\n", " <td>2.20.0-61b7e141</td>\n", " <td>Android</td>\n", " <td>1719327888</td>\n", " <td>1719330689</td>\n", " <td>86898910861</td>\n", " <td>2024-06-25 18:04:48+03:00</td>\n", " <td>2024-06-25 18:51:29+03:00</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " <tr>\n", " <th>5730</th>\n", " <td>radio_widget</td>\n", " <td>2.20.0-61b7e141</td>\n", " <td>Android</td>\n", " <td>1719327888</td>\n", " <td>1719330689</td>\n", " <td>86898910861</td>\n", " <td>2024-06-25 18:04:48+03:00</td>\n", " <td>2024-06-25 18:51:29+03:00</td>\n", " <td>0 days 00:46:41</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>7669 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " eventName idAppVersionTitle idOsTitle tsEvent \\\n", "4713 close 3.0.0 Android 1719196195 \n", "4991 press 3.0.0 Android 1719196201 \n", "4468 channels_channel_open 3.0.0 Android 1719196201 \n", "4842 main_open 3.0.0 Android 1719211182 \n", "4843 main_open 3.0.0 Android 1719211212 \n", "... ... ... ... ... \n", "5739 swipe 2.20.0-61b7e141 Android 1719327881 \n", "5740 swipe 2.20.0-61b7e141 Android 1719327882 \n", "5722 main_open 2.20.0-61b7e141 Android 1719327888 \n", "5726 open 2.20.0-61b7e141 Android 1719327888 \n", "5730 radio_widget 2.20.0-61b7e141 Android 1719327888 \n", "\n", " tsInsert idDevice eventDT_MSK \\\n", "4713 1719213657 46509549178 2024-06-24 05:29:55+03:00 \n", "4991 1719213657 46509549178 2024-06-24 05:30:01+03:00 \n", "4468 1719213657 46509549178 2024-06-24 05:30:01+03:00 \n", "4842 1719213657 46509549178 2024-06-24 09:39:42+03:00 \n", "4843 1719213657 46509549178 2024-06-24 09:40:12+03:00 \n", "... ... ... ... \n", "5739 1719330689 86898910861 2024-06-25 18:04:41+03:00 \n", "5740 1719330689 86898910861 2024-06-25 18:04:42+03:00 \n", "5722 1719330689 86898910861 2024-06-25 18:04:48+03:00 \n", "5726 1719330689 86898910861 2024-06-25 18:04:48+03:00 \n", "5730 1719330689 86898910861 2024-06-25 18:04:48+03:00 \n", "\n", " eventInsertDT_MSK ts_diff \n", "4713 2024-06-24 10:20:57+03:00 0 days 04:51:02 \n", "4991 2024-06-24 10:20:57+03:00 0 days 04:50:56 \n", "4468 2024-06-24 10:20:57+03:00 0 days 04:50:56 \n", "4842 2024-06-24 10:20:57+03:00 0 days 00:41:15 \n", "4843 2024-06-24 10:20:57+03:00 0 days 00:40:45 \n", "... ... ... \n", "5739 2024-06-25 18:51:29+03:00 0 days 00:46:48 \n", "5740 2024-06-25 18:51:29+03:00 0 days 00:46:47 \n", "5722 2024-06-25 18:51:29+03:00 0 days 00:46:41 \n", "5726 2024-06-25 18:51:29+03:00 0 days 00:46:41 \n", "5730 2024-06-25 18:51:29+03:00 0 days 00:46:41 \n", "\n", "[7669 rows x 9 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# By https://stackoverflow.com/questions/29763620/how-to-select-all-columns-except-one-in-pandas/53075169#53075169\n", "df.loc[:, ~df.columns.isin(['params', 'account_id'])]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }