{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNnNS3+49QYKw3akPEdY2/i", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "markdown", "source": [ "# Creating Fake Data\n", "## Create your own data for test and analyse.\n", " See it:\n", "># https://towardsdatascience.com/how-to-create-fake-data-with-faker-a835e5b7a9d9\n", "># https://faker.readthedocs.io/en/master/\n", "># https://www.datacamp.com/tutorial/creating-synthetic-data-with-python-faker-tutorial" ], "metadata": { "id": "JcneT7VbmULv" } }, { "cell_type": "code", "source": [ "!pip install --q Faker" ], "metadata": { "id": "5mn-VYgbh22U" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "ZDZlP_iEhdir" }, "outputs": [], "source": [ "import string\n", "from faker import Faker \n", "import json \n", "import numpy as np\n", "import pandas as pd\n", "fake = Faker() \n", "\n", "fake = Faker('pt_BR')" ] }, { "cell_type": "markdown", "source": [ "# Creating 10k users with Portuguese source words." ], "metadata": { "id": "xe6DvFYYmpCd" } }, { "cell_type": "code", "source": [ "fake_name=[]\n", "for i in range(10000):\n", " #print(\"Name:\", i)\n", " name = fake.name()\n", " #print(name)\n", " fake_name.append(name)" ], "metadata": { "id": "tJfaYPQTiJfC" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "len(fake_name)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SVaCsL99KvsY", "outputId": "e94600b7-5113-420d-b447-e3e417e35008" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "10000" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "# Removing duplicate names." ], "metadata": { "id": "kB5V5E8enbD2" } }, { "cell_type": "code", "source": [ "lista_unic = set(fake_name)" ], "metadata": { "id": "efpSkKL2jHFk" }, "execution_count": 8, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Unique created names." ], "metadata": { "id": "oWwASa_wnimm" } }, { "cell_type": "code", "source": [ "len(lista_unic)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nP4TEniAjNLX", "outputId": "70651824-290b-4625-e7fe-f4079cca682a" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "7928" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "# Total lines created\n", "n = 1000\n", "\n", "infos = ['name','job', 'company','ssn', 'address', 'sex', 'birthdate']\n", "\n", "def create_profile(x): \n", " print(\"Creating profile with \", n, \" lines\")\n", " # dictionary \n", " profile_data ={} \n", " for i in range(0, x): \n", " profile_data[i]={} \n", " profile_data[i] = fake.profile(infos)\n", "\n", " print(\"done\")\n", " return profile_data\n", " \n", "\n", "\n", "def create_sales(x): \n", " print(\"Creating sales with \", n, \" lines\")\n", " \n", " # dictionary \n", " sales_data ={} \n", " for i in range(0, x): \n", " sales_data[i]={} \n", " sales_data[i]['price'] = np.random.randint(10,500)\n", " sales_data[i]['qtd'] = np.random.randint(1,10)\n", " sales_data[i]['product'] = chr(np.random.randint(ord('A'), ord('Z')))\n", " sales_data[i]['bill'] = sales_data[i]['price'] * sales_data[i]['qtd']\n", " sales_data[i]['way_of_payment'] = np.random.choice(['boleto', 'cartão_cred', 'a vista'])\n", " print(\"done\")\n", " return sales_data\n", "\n", "\n", "# Create fake profile\n", "profiles = create_profile(n)\n", "df_prof = pd.DataFrame.from_dict(profiles)\n", "df_profile = df_prof.T\n", "\n", "# Create fake sales\n", "sales = create_sales(n)\n", "temp_sales = pd.DataFrame.from_dict(sales)\n", "df_sale = temp_sales.T\n", "\n", "df_fake = pd.concat([df_profile, df_sale], axis=1)\n", "\n", "# Extracting and creating column UF\n", "lista_uf=[]\n", "for i in range(len(df_fake['address'])):\n", " lista_uf.append(df_fake['address'][i].split(' / ')[1])\n", "\n", "df_fake['uf'] = lista_uf\n", "df_fake.drop_duplicates(inplace=True)\n", "df_fake" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 710 }, "id": "-C36qW81PHTa", "outputId": "d615018c-3771-4af6-834d-d6373f0e9e27" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Creating profile with 1000 lines\n", "done\n", "Creating sales with 1000 lines\n", "done\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " job company ssn \\\n", "0 Instrumentista musical Jesus 17650392821 \n", "1 Tecnólogo em rochas ornamentais Araújo 67291058449 \n", "2 Ventríloquo Viana 12540369898 \n", "3 Telefonista Cunha 19283076559 \n", "4 Borracheiro Dias S.A. 45819023714 \n", ".. ... ... ... \n", "995 Gravurista Cunha 03286759465 \n", "996 Terapeuta ocupacional Campos Cardoso e Filhos 31072856417 \n", "997 Árbitro e mediador Farias Costela e Filhos 91735680230 \n", "998 Tecnólogo em recursos pesqueiros Ribeiro Moraes S.A. 86371950240 \n", "999 Meteorologista Rocha 14538702960 \n", "\n", " name sex \\\n", "0 Erick Silveira M \n", "1 Sarah da Cunha F \n", "2 Igor Silva M \n", "3 Sr. Bruno Fernandes M \n", "4 Alexandre Pinto M \n", ".. ... .. \n", "995 Diogo Souza M \n", "996 Sra. Valentina Silva F \n", "997 Melissa Ribeiro F \n", "998 Emanuel Nogueira M \n", "999 Anthony da Mota M \n", "\n", " address birthdate price qtd \\\n", "0 Trevo de da Luz, 962\\nMorro Dos Macacos\\n47974... 1957-01-10 124 5 \n", "1 Trevo Pereira, 96\\nGoiania\\n02270785 Barbosa d... 2017-09-15 158 2 \n", "2 Morro Pinto, 42\\nVila Paquetá\\n42119022 Barbos... 1931-04-25 369 4 \n", "3 Estação Lima\\nVila Fumec\\n12545833 da Conceiçã... 1985-10-26 413 7 \n", "4 Sítio de Pires, 78\\nVila Da Paz\\n02665127 da R... 1941-02-03 265 5 \n", ".. ... ... ... .. \n", "995 Viaduto Gonçalves\\nMarilandia\\n60879609 Lima / TO 1908-04-02 175 9 \n", "996 Lago de Silveira, 25\\nVila Canto Do Sabiá\\n198... 1937-04-15 212 5 \n", "997 Trecho Azevedo, 364\\nSanta Lúcia\\n32417292 Gon... 1990-12-03 175 3 \n", "998 Quadra de Dias, 96\\nEngenho Nogueira\\n95213125... 1944-06-02 92 9 \n", "999 Residencial de Moura, 13\\nJardim Guanabara\\n47... 2015-10-03 179 8 \n", "\n", " product bill way_of_payment uf \n", "0 X 620 cartão_cred AM \n", "1 E 316 cartão_cred MA \n", "2 X 1476 a vista BA \n", "3 C 2891 boleto AL \n", "4 P 1325 cartão_cred MS \n", ".. ... ... ... .. \n", "995 C 1575 boleto TO \n", "996 C 1060 cartão_cred AP \n", "997 C 525 cartão_cred PE \n", "998 F 828 a vista MS \n", "999 C 1432 boleto CE \n", "\n", "[1000 rows x 13 columns]" ], "text/html": [ "\n", "
\n", " | job | \n", "company | \n", "ssn | \n", "name | \n", "sex | \n", "address | \n", "birthdate | \n", "price | \n", "qtd | \n", "product | \n", "bill | \n", "way_of_payment | \n", "uf | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Instrumentista musical | \n", "Jesus | \n", "17650392821 | \n", "Erick Silveira | \n", "M | \n", "Trevo de da Luz, 962\\nMorro Dos Macacos\\n47974... | \n", "1957-01-10 | \n", "124 | \n", "5 | \n", "X | \n", "620 | \n", "cartão_cred | \n", "AM | \n", "
1 | \n", "Tecnólogo em rochas ornamentais | \n", "Araújo | \n", "67291058449 | \n", "Sarah da Cunha | \n", "F | \n", "Trevo Pereira, 96\\nGoiania\\n02270785 Barbosa d... | \n", "2017-09-15 | \n", "158 | \n", "2 | \n", "E | \n", "316 | \n", "cartão_cred | \n", "MA | \n", "
2 | \n", "Ventríloquo | \n", "Viana | \n", "12540369898 | \n", "Igor Silva | \n", "M | \n", "Morro Pinto, 42\\nVila Paquetá\\n42119022 Barbos... | \n", "1931-04-25 | \n", "369 | \n", "4 | \n", "X | \n", "1476 | \n", "a vista | \n", "BA | \n", "
3 | \n", "Telefonista | \n", "Cunha | \n", "19283076559 | \n", "Sr. Bruno Fernandes | \n", "M | \n", "Estação Lima\\nVila Fumec\\n12545833 da Conceiçã... | \n", "1985-10-26 | \n", "413 | \n", "7 | \n", "C | \n", "2891 | \n", "boleto | \n", "AL | \n", "
4 | \n", "Borracheiro | \n", "Dias S.A. | \n", "45819023714 | \n", "Alexandre Pinto | \n", "M | \n", "Sítio de Pires, 78\\nVila Da Paz\\n02665127 da R... | \n", "1941-02-03 | \n", "265 | \n", "5 | \n", "P | \n", "1325 | \n", "cartão_cred | \n", "MS | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "Gravurista | \n", "Cunha | \n", "03286759465 | \n", "Diogo Souza | \n", "M | \n", "Viaduto Gonçalves\\nMarilandia\\n60879609 Lima / TO | \n", "1908-04-02 | \n", "175 | \n", "9 | \n", "C | \n", "1575 | \n", "boleto | \n", "TO | \n", "
996 | \n", "Terapeuta ocupacional | \n", "Campos Cardoso e Filhos | \n", "31072856417 | \n", "Sra. Valentina Silva | \n", "F | \n", "Lago de Silveira, 25\\nVila Canto Do Sabiá\\n198... | \n", "1937-04-15 | \n", "212 | \n", "5 | \n", "C | \n", "1060 | \n", "cartão_cred | \n", "AP | \n", "
997 | \n", "Árbitro e mediador | \n", "Farias Costela e Filhos | \n", "91735680230 | \n", "Melissa Ribeiro | \n", "F | \n", "Trecho Azevedo, 364\\nSanta Lúcia\\n32417292 Gon... | \n", "1990-12-03 | \n", "175 | \n", "3 | \n", "C | \n", "525 | \n", "cartão_cred | \n", "PE | \n", "
998 | \n", "Tecnólogo em recursos pesqueiros | \n", "Ribeiro Moraes S.A. | \n", "86371950240 | \n", "Emanuel Nogueira | \n", "M | \n", "Quadra de Dias, 96\\nEngenho Nogueira\\n95213125... | \n", "1944-06-02 | \n", "92 | \n", "9 | \n", "F | \n", "828 | \n", "a vista | \n", "MS | \n", "
999 | \n", "Meteorologista | \n", "Rocha | \n", "14538702960 | \n", "Anthony da Mota | \n", "M | \n", "Residencial de Moura, 13\\nJardim Guanabara\\n47... | \n", "2015-10-03 | \n", "179 | \n", "8 | \n", "C | \n", "1432 | \n", "boleto | \n", "CE | \n", "
1000 rows × 13 columns
\n", "