{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNnNS3+49QYKw3akPEdY2/i", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# Creating Fake Data\n", "## Create your own data for test and analyse.\n", " See it:\n", "># https://towardsdatascience.com/how-to-create-fake-data-with-faker-a835e5b7a9d9\n", "># https://faker.readthedocs.io/en/master/\n", "># https://www.datacamp.com/tutorial/creating-synthetic-data-with-python-faker-tutorial" ], "metadata": { "id": "JcneT7VbmULv" } }, { "cell_type": "code", "source": [ "!pip install --q Faker" ], "metadata": { "id": "5mn-VYgbh22U" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "ZDZlP_iEhdir" }, "outputs": [], "source": [ "import string\n", "from faker import Faker \n", "import json \n", "import numpy as np\n", "import pandas as pd\n", "fake = Faker() \n", "\n", "fake = Faker('pt_BR')" ] }, { "cell_type": "markdown", "source": [ "# Creating 10k users with Portuguese source words." ], "metadata": { "id": "xe6DvFYYmpCd" } }, { "cell_type": "code", "source": [ "fake_name=[]\n", "for i in range(10000):\n", " #print(\"Name:\", i)\n", " name = fake.name()\n", " #print(name)\n", " fake_name.append(name)" ], "metadata": { "id": "tJfaYPQTiJfC" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "len(fake_name)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SVaCsL99KvsY", "outputId": "e94600b7-5113-420d-b447-e3e417e35008" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "10000" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "# Removing duplicate names." ], "metadata": { "id": "kB5V5E8enbD2" } }, { "cell_type": "code", "source": [ "lista_unic = set(fake_name)" ], "metadata": { "id": "efpSkKL2jHFk" }, "execution_count": 8, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Unique created names." ], "metadata": { "id": "oWwASa_wnimm" } }, { "cell_type": "code", "source": [ "len(lista_unic)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nP4TEniAjNLX", "outputId": "70651824-290b-4625-e7fe-f4079cca682a" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "7928" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "# Total lines created\n", "n = 1000\n", "\n", "infos = ['name','job', 'company','ssn', 'address', 'sex', 'birthdate']\n", "\n", "def create_profile(x): \n", " print(\"Creating profile with \", n, \" lines\")\n", " # dictionary \n", " profile_data ={} \n", " for i in range(0, x): \n", " profile_data[i]={} \n", " profile_data[i] = fake.profile(infos)\n", "\n", " print(\"done\")\n", " return profile_data\n", " \n", "\n", "\n", "def create_sales(x): \n", " print(\"Creating sales with \", n, \" lines\")\n", " \n", " # dictionary \n", " sales_data ={} \n", " for i in range(0, x): \n", " sales_data[i]={} \n", " sales_data[i]['price'] = np.random.randint(10,500)\n", " sales_data[i]['qtd'] = np.random.randint(1,10)\n", " sales_data[i]['product'] = chr(np.random.randint(ord('A'), ord('Z')))\n", " sales_data[i]['bill'] = sales_data[i]['price'] * sales_data[i]['qtd']\n", " sales_data[i]['way_of_payment'] = np.random.choice(['boleto', 'cartão_cred', 'a vista'])\n", " print(\"done\")\n", " return sales_data\n", "\n", "\n", "# Create fake profile\n", "profiles = create_profile(n)\n", "df_prof = pd.DataFrame.from_dict(profiles)\n", "df_profile = df_prof.T\n", "\n", "# Create fake sales\n", "sales = create_sales(n)\n", "temp_sales = pd.DataFrame.from_dict(sales)\n", "df_sale = temp_sales.T\n", "\n", "df_fake = pd.concat([df_profile, df_sale], axis=1)\n", "\n", "# Extracting and creating column UF\n", "lista_uf=[]\n", "for i in range(len(df_fake['address'])):\n", " lista_uf.append(df_fake['address'][i].split(' / ')[1])\n", "\n", "df_fake['uf'] = lista_uf\n", "df_fake.drop_duplicates(inplace=True)\n", "df_fake" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 710 }, "id": "-C36qW81PHTa", "outputId": "d615018c-3771-4af6-834d-d6373f0e9e27" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Creating profile with 1000 lines\n", "done\n", "Creating sales with 1000 lines\n", "done\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " job company ssn \\\n", "0 Instrumentista musical Jesus 17650392821 \n", "1 Tecnólogo em rochas ornamentais Araújo 67291058449 \n", "2 Ventríloquo Viana 12540369898 \n", "3 Telefonista Cunha 19283076559 \n", "4 Borracheiro Dias S.A. 45819023714 \n", ".. ... ... ... \n", "995 Gravurista Cunha 03286759465 \n", "996 Terapeuta ocupacional Campos Cardoso e Filhos 31072856417 \n", "997 Árbitro e mediador Farias Costela e Filhos 91735680230 \n", "998 Tecnólogo em recursos pesqueiros Ribeiro Moraes S.A. 86371950240 \n", "999 Meteorologista Rocha 14538702960 \n", "\n", " name sex \\\n", "0 Erick Silveira M \n", "1 Sarah da Cunha F \n", "2 Igor Silva M \n", "3 Sr. Bruno Fernandes M \n", "4 Alexandre Pinto M \n", ".. ... .. \n", "995 Diogo Souza M \n", "996 Sra. Valentina Silva F \n", "997 Melissa Ribeiro F \n", "998 Emanuel Nogueira M \n", "999 Anthony da Mota M \n", "\n", " address birthdate price qtd \\\n", "0 Trevo de da Luz, 962\\nMorro Dos Macacos\\n47974... 1957-01-10 124 5 \n", "1 Trevo Pereira, 96\\nGoiania\\n02270785 Barbosa d... 2017-09-15 158 2 \n", "2 Morro Pinto, 42\\nVila Paquetá\\n42119022 Barbos... 1931-04-25 369 4 \n", "3 Estação Lima\\nVila Fumec\\n12545833 da Conceiçã... 1985-10-26 413 7 \n", "4 Sítio de Pires, 78\\nVila Da Paz\\n02665127 da R... 1941-02-03 265 5 \n", ".. ... ... ... .. \n", "995 Viaduto Gonçalves\\nMarilandia\\n60879609 Lima / TO 1908-04-02 175 9 \n", "996 Lago de Silveira, 25\\nVila Canto Do Sabiá\\n198... 1937-04-15 212 5 \n", "997 Trecho Azevedo, 364\\nSanta Lúcia\\n32417292 Gon... 1990-12-03 175 3 \n", "998 Quadra de Dias, 96\\nEngenho Nogueira\\n95213125... 1944-06-02 92 9 \n", "999 Residencial de Moura, 13\\nJardim Guanabara\\n47... 2015-10-03 179 8 \n", "\n", " product bill way_of_payment uf \n", "0 X 620 cartão_cred AM \n", "1 E 316 cartão_cred MA \n", "2 X 1476 a vista BA \n", "3 C 2891 boleto AL \n", "4 P 1325 cartão_cred MS \n", ".. ... ... ... .. \n", "995 C 1575 boleto TO \n", "996 C 1060 cartão_cred AP \n", "997 C 525 cartão_cred PE \n", "998 F 828 a vista MS \n", "999 C 1432 boleto CE \n", "\n", "[1000 rows x 13 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
jobcompanyssnnamesexaddressbirthdatepriceqtdproductbillway_of_paymentuf
0Instrumentista musicalJesus17650392821Erick SilveiraMTrevo de da Luz, 962\\nMorro Dos Macacos\\n47974...1957-01-101245X620cartão_credAM
1Tecnólogo em rochas ornamentaisAraújo67291058449Sarah da CunhaFTrevo Pereira, 96\\nGoiania\\n02270785 Barbosa d...2017-09-151582E316cartão_credMA
2VentríloquoViana12540369898Igor SilvaMMorro Pinto, 42\\nVila Paquetá\\n42119022 Barbos...1931-04-253694X1476a vistaBA
3TelefonistaCunha19283076559Sr. Bruno FernandesMEstação Lima\\nVila Fumec\\n12545833 da Conceiçã...1985-10-264137C2891boletoAL
4BorracheiroDias S.A.45819023714Alexandre PintoMSítio de Pires, 78\\nVila Da Paz\\n02665127 da R...1941-02-032655P1325cartão_credMS
..........................................
995GravuristaCunha03286759465Diogo SouzaMViaduto Gonçalves\\nMarilandia\\n60879609 Lima / TO1908-04-021759C1575boletoTO
996Terapeuta ocupacionalCampos Cardoso e Filhos31072856417Sra. Valentina SilvaFLago de Silveira, 25\\nVila Canto Do Sabiá\\n198...1937-04-152125C1060cartão_credAP
997Árbitro e mediadorFarias Costela e Filhos91735680230Melissa RibeiroFTrecho Azevedo, 364\\nSanta Lúcia\\n32417292 Gon...1990-12-031753C525cartão_credPE
998Tecnólogo em recursos pesqueirosRibeiro Moraes S.A.86371950240Emanuel NogueiraMQuadra de Dias, 96\\nEngenho Nogueira\\n95213125...1944-06-02929F828a vistaMS
999MeteorologistaRocha14538702960Anthony da MotaMResidencial de Moura, 13\\nJardim Guanabara\\n47...2015-10-031798C1432boletoCE
\n", "

1000 rows × 13 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 10 } ] } ] }