{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"IEF-IVA-simple-numeric-categorical.ipynb","provenance":[{"file_id":"1V6fmJ9wGOkaa5dT9qG680wyP0dpIv_a4","timestamp":1632766454856}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"n4AN9zMneDUb"},"source":[""]},{"cell_type":"code","metadata":{"id":"hYF4ZMa-gXnq","executionInfo":{"status":"ok","timestamp":1632767312134,"user_tz":-120,"elapsed":223,"user":{"displayName":"Oscar Corcho","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3cF7DAfhib0_PTrsWLs1ct7LY6beP-TSnuSL4=s64","userId":"18270436930746444615"}}},"source":["import pandas as pd\n","import numpy as np\n","from sklearn.impute import SimpleImputer\n","from sklearn.preprocessing import StandardScaler, OneHotEncoder\n","from sklearn.compose import ColumnTransformer\n","from sklearn.linear_model import LogisticRegression\n","import matplotlib.pyplot as plt"],"execution_count":25,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"t2mBw0-KGiO9","executionInfo":{"status":"ok","timestamp":1632767312499,"user_tz":-120,"elapsed":5,"user":{"displayName":"Oscar Corcho","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3cF7DAfhib0_PTrsWLs1ct7LY6beP-TSnuSL4=s64","userId":"18270436930746444615"}},"outputId":"031858e0-c066-442f-e0f1-37dc0495b7df"},"source":["github_storage = \"https://raw.githubusercontent.com/oeg-upm/Instituto-Estudios-Fiscales-ontologias/master/machine-learning/\"\n","original_file = github_storage+\"datos-IVA.csv\"\n","\n","# Leer csv donde se encuentran los datos\n","df = pd.read_csv(original_file, sep=\";\", engine='python', header=None, names=['persona.actividadEmpresarial','sujetoPasivo.domicilioFiscal','operacion.tipoOperacion','operacion.lugarRealizacion','operacion.exencion','factura.importe','factura.fecha','operacion.docs','exencion.hechoImponibleCubiertoEnExencion','factura.descripcion','sujetoPasivo.acogidoARegimenEspecial','numFacturasMismoTopico','infraccion','tipoInfraccion'])\n","df = df.drop([0]) #eliminado porque no se lee bien la primera línea\n","df = df.drop(columns=['exencion.hechoImponibleCubiertoEnExencion']) #eliminado pues no tiene valores relevantes\n","df\n"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n"," | persona.actividadEmpresarial | \n","sujetoPasivo.domicilioFiscal | \n","operacion.tipoOperacion | \n","operacion.lugarRealizacion | \n","operacion.exencion | \n","factura.importe | \n","factura.fecha | \n","operacion.docs | \n","factura.descripcion | \n","sujetoPasivo.acogidoARegimenEspecial | \n","numFacturasMismoTopico | \n","infraccion | \n","tipoInfraccion | \n","
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n","venta de ropa | \n","Toledo | \n","adquisicion de bienes | \n","Madrid | \n","ExencionRecargoEquivalencia | \n","140.0 | \n","22/7/21 | \n","Camisa de hombre de color blanco. Pantalón vaq... | \n","Camisa de hombre de color blanco. Pantalón vaq... | \n","RecargoEquivalencia | \n","2.0 | \n","false | \n","NaN | \n","
2 | \n","venta de calzado | \n","Madrid | \n","adquisicion de bienes | \n","Madrid | \n","ExencionRecargoEquivalencia | \n","180.0 | \n","23/7/21 | \n","Mocasines de piel de color marrón | \n","Mocasines de piel de color marrón | \n","RecargoEquivalencia | \n","3.0 | \n","false | \n","NaN | \n","
3 | \n","venta de ropa | \n","Toledo | \n","adquisicion de bienes | \n","Toledo | \n","ExencionRecargoEquivalencia | \n","230.0 | \n","24/7/21 | \n","Camisa de hombre de color azul. Pantalones de ... | \n","Camisa de hombre de color azul. Pantalones de ... | \n","RecargoEquivalencia | \n","20.0 | \n","false | \n","NaN | \n","
4 | \n","venta de calzado | \n","Avila | \n","adquisicion de bienes | \n","Avila | \n","ExencionRecargoEquivalencia | \n","120.0 | \n","25/7/21 | \n","Zapatillas de deporte Adidas. Zapatos de cabal... | \n","Zapatillas de deporte Adidas. Zapatos de cabal... | \n","RecargoEquivalencia | \n","15.0 | \n","false | \n","NaN | \n","
5 | \n","venta de artículos de piel | \n","Madrid | \n","adquisicion de bienes | \n","Toledo | \n","ExencionRecargoEquivalencia | \n","560.0 | \n","26/7/21 | \n","Abrigo de visón largo con mangas anchas | \n","Abrigo de visón largo con mangas anchas | \n","RecargoEquivalencia | \n","2.0 | \n","true | \n","infraccionEnAdquisicionDeBienes | \n","
6 | \n","joyería | \n","Toledo | \n","adquisicion de bienes | \n","Madrid | \n","ExencionRecargoEquivalencia | \n","1200.0 | \n","27/7/21 | \n","Reloj de oro con incrustaciones de diamantes | \n","Reloj de oro con incrustaciones de diamantes | \n","RecargoEquivalencia | \n","3.0 | \n","true | \n","infraccionEnAdquisicionDeBienes | \n","
7 | \n","materiales de construcción | \n","Avila | \n","adquisicion de bienes | \n","Avila | \n","ExencionRecargoEquivalencia | \n","3400.0 | \n","28/7/21 | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","RecargoEquivalencia | \n","1.0 | \n","true | \n","infraccionEnAdquisicionDeBienes | \n","
8 | \n","joyería | \n","Madrid | \n","adquisicion de bienes | \n","Madrid | \n","ExencionRecargoEquivalencia | \n","1800.0 | \n","29/7/21 | \n","Collar de perlas. | \n","Collar de perlas. | \n","RecargoEquivalencia | \n","5.0 | \n","true | \n","infraccionEnAdquisicionDeBienes | \n","
9 | \n","materiales de construcción | \n","Madrid | \n","prestación de servicios | \n","Madrid | \n","0 | \n","3500.0 | \n","30/7/21 | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","0 | \n","20.0 | \n","false | \n","NaN | \n","
10 | \n","obras y edificaciones | \n","Madrid | \n","prestación de servicios | \n","Madrid | \n","0 | \n","12000.0 | \n","31/7/21 | \n","Instalación de ventanas en habitaciones. Remod... | \n","Instalación de ventanas en habitaciones. Remod... | \n","0 | \n","3.0 | \n","false | \n","NaN | \n","
11 | \n","materiales de construcción | \n","Toledo | \n","prestación de servicios | \n","Madrid | \n","0 | \n","13500.0 | \n","1/8/21 | \n","300kg de cemento. 850 metros cuadrados de azuj... | \n","300kg de cemento. 850 metros cuadrados de azuj... | \n","0 | \n","45.0 | \n","false | \n","NaN | \n","
12 | \n","obras y edificaciones | \n","Avila | \n","prestación de servicios | \n","Toledo | \n","0 | \n","1800.0 | \n","2/8/21 | \n","Instalación de una ventana en buhardilla | \n","Instalación de una ventana en buhardilla | \n","0 | \n","3.0 | \n","false | \n","NaN | \n","
13 | \n","materiales de construcción | \n","Madrid | \n","prestación de servicios | \n","Avila | \n","0 | \n","2450.0 | \n","3/8/21 | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","0 | \n","120.0 | \n","true | \n","faltaComunicacionEjecucionObra | \n","
14 | \n","obras y edificaciones | \n","Toledo | \n","prestación de servicios | \n","Madrid | \n","0 | \n","13000.0 | \n","4/8/21 | \n","Remodelación de fachada con fecha de 1/1/2020 | \n","Remodelación de fachada con fecha de 1/1/2020 | \n","0 | \n","3.0 | \n","true | \n","faltaComunicacionEjecucionObra | \n","
15 | \n","materiales de construcción | \n","Avila | \n","prestación de servicios | \n","Toledo | \n","0 | \n","11254.0 | \n","5/8/21 | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","45 kg de cemento. 20 metros cuadrados de azule... | \n","0 | \n","45.0 | \n","true | \n","faltaComunicacionEjecucionObra | \n","
16 | \n","obras y edificaciones | \n","Madrid | \n","prestación de servicios | \n","Avila | \n","0 | \n","340.0 | \n","6/8/21 | \n","Remodelación de fachada con fecha de 1/7/2021 | \n","Remodelación de fachada con fecha de 1/7/2021 | \n","0 | \n","3.0 | \n","true | \n","faltaComunicacionEjecucionObra | \n","
17 | \n","venta de electrónica | \n","Madrid | \n","adquisicionIntracomunitaria | \n","Francia | \n","0 | \n","340.0 | \n","7/8/21 | \n","Venta de móvil de gama media. Samsung X3 | \n","Venta de móvil de gama media. Samsung X3 | \n","0 | \n","4.0 | \n","false | \n","NaN | \n","
18 | \n","venta de ropa | \n","Toledo | \n","importacionBienes | \n","Alemania | \n","0 | \n","245.0 | \n","8/8/21 | \n","cinco cqmisas de marca HM | \n","cinco cqmisas de marca HM | \n","0 | \n","25.0 | \n","false | \n","NaN | \n","
19 | \n","venta de calzado | \n","Avila | \n","adquisicionIntracomunitaria | \n","Francia | \n","0 | \n","1200.0 | \n","9/8/21 | \n","30 mocasines de piel de vacuno de color marrón... | \n","30 mocasines de piel de vacuno de color marrón... | \n","0 | \n","30.0 | \n","false | \n","NaN | \n","
20 | \n","joyería | \n","Madrid | \n","importacionBienes | \n","Alemania | \n","0 | \n","3422.0 | \n","10/8/21 | \n","dos relojes de oro con incrustaciones de plata | \n","dos relojes de oro con incrustaciones de plata | \n","0 | \n","22.0 | \n","false | \n","NaN | \n","
21 | \n","materiales de construcción | \n","Madrid | \n","adquisicionIntracomunitaria | \n","Francia | \n","0 | \n","234.0 | \n","11/8/21 | \n","azulejos porcelánicos de alta gama | \n","azulejos porcelánicos de alta gama | \n","0 | \n","345.0 | \n","true | \n","infraccionNoPresentarDeclaracion | \n","
22 | \n","venta de artículos de piel | \n","Toledo | \n","importacionBienes | \n","Alemania | \n","0 | \n","430.0 | \n","12/8/21 | \n","10 bolsos de piel para mujer | \n","10 bolsos de piel para mujer | \n","0 | \n","22.0 | \n","true | \n","infraccionNoPresentarDeclaracion | \n","
23 | \n","venta de electrónica | \n","Avila | \n","adquisicionIntracomunitaria | \n","Francia | \n","0 | \n","222.0 | \n","13/8/21 | \n","2 calculadoras mutlifunción. 1 router inalámbrico | \n","2 calculadoras mutlifunción. 1 router inalámbrico | \n","0 | \n","1.0 | \n","true | \n","infraccionNoPresentarDeclaracion | \n","
24 | \n","venta de ropa | \n","Madrid | \n","importacionBienes | \n","Alemania | \n","0 | \n","13000.0 | \n","14/8/21 | \n","30 camisas de caballero. 20 vaqueros de señora | \n","30 camisas de caballero. 20 vaqueros de señora | \n","0 | \n","23.0 | \n","true | \n","infraccionNoPresentarDeclaracion | \n","