{ "cells": [ { "cell_type": "code", "execution_count": 18, "id": "00aa9915-e89f-4f4f-a7ec-7a8b89a266dd", "metadata": { "ExecuteTime": { "start_time": "2023-06-28T16:22:58.703948200Z" }, "datalink": { "c0e05319-2c92-4c1f-9aa8-fcb4c980e2a5": { "applied_filters": [], "dataframe_info": { "default_index_used": true, "orig_num_cols": 49, "orig_num_rows": 5, "orig_size_bytes": 2000, "truncated_num_cols": 49, "truncated_num_rows": 5, "truncated_size_bytes": 2000, "truncated_string_columns": [] }, "display_id": "c0e05319-2c92-4c1f-9aa8-fcb4c980e2a5", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:51:41.459116", "user_variable_name": null, "variable_name": "unk_dataframe_62d2139b273f4aed93cfc08dceb1d1cc" } }, "is_executing": true, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TECTONIC SETTINGLATITUDELONGITUDELAND OR SEASAMPLE NAMESIO2(WT%)TIO2(WT%)AL2O3(WT%)Fe2O3T(WT%)CAO(WT%)...GD(PPM)TB(PPM)DY(PPM)HO(PPM)ER(PPM)TM(PPM)YB(PPM)LU(PPM)HF(PPM)TA(PPM)
0CONVERGENT MARGIN34.0000131.5000SAEs_1 [4742]46.671.9613.1410.4578.87...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1OCEAN ISLAND-21.2500-159.7500SAEs_R40 [4683]45.972.9111.8611.4209.74...8.2430.935.150.842.030.261.43NaN5.72NaN
2CONVERGENT MARGIN47.1200152.2300SAEs_V-17-603 [7321] / s_V17-603 [4190]51.53NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3CONVERGENT MARGIN47.1200152.2300SAEs_V15-307 [6818] / s_V-15-307 [3042]58.220.6716.308.5877.04...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4OCEAN ISLAND27.6149-16.1633SAQs_157-956B-44R-4,G1 [4750]NaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 49 columns

\n", "
" ], "text/plain": [ " TECTONIC SETTING LATITUDE LONGITUDE LAND OR SEA \\\n", "0 CONVERGENT MARGIN 34.0000 131.5000 SAE \n", "1 OCEAN ISLAND -21.2500 -159.7500 SAE \n", "2 CONVERGENT MARGIN 47.1200 152.2300 SAE \n", "3 CONVERGENT MARGIN 47.1200 152.2300 SAE \n", "4 OCEAN ISLAND 27.6149 -16.1633 SAQ \n", "\n", " SAMPLE NAME SIO2(WT%) TIO2(WT%) AL2O3(WT%) \\\n", "0 s_1 [4742] 46.67 1.96 13.14 \n", "1 s_R40 [4683] 45.97 2.91 11.86 \n", "2 s_V-17-603 [7321] / s_V17-603 [4190] 51.53 NaN NaN \n", "3 s_V15-307 [6818] / s_V-15-307 [3042] 58.22 0.67 16.30 \n", "4 s_157-956B-44R-4,G1 [4750] NaN NaN NaN \n", "\n", " Fe2O3T(WT%) CAO(WT%) ... GD(PPM) TB(PPM) DY(PPM) HO(PPM) ER(PPM) \\\n", "0 10.457 8.87 ... NaN NaN NaN NaN NaN \n", "1 11.420 9.74 ... 8.243 0.93 5.15 0.84 2.03 \n", "2 NaN NaN ... NaN NaN NaN NaN NaN \n", "3 8.587 7.04 ... NaN NaN NaN NaN NaN \n", "4 NaN NaN ... NaN NaN NaN NaN NaN \n", "\n", " TM(PPM) YB(PPM) LU(PPM) HF(PPM) TA(PPM) \n", "0 NaN NaN NaN NaN NaN \n", "1 0.26 1.43 NaN 5.72 NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", "[5 rows x 49 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "# Load the data\n", "df = pd.read_csv('basalt_pre.csv')\n", "\n", "# Display the first few rows of the dataframe\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "a94b5ae4-919b-43bc-83cf-74fa59fa1bd1", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:52:24.353704+00:00", "start_time": "2023-06-28T15:52:23.700431+00:00" }, "datalink": { "edaa1142-a9d4-45db-92be-bdbb2257856a": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 49, "orig_num_rows": 11, "orig_size_bytes": 4400, "truncated_num_cols": 49, "truncated_num_rows": 11, "truncated_size_bytes": 4400, "truncated_string_columns": [] }, "display_id": "edaa1142-a9d4-45db-92be-bdbb2257856a", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:52:24.186734", "user_variable_name": null, "variable_name": "unk_dataframe_833188ae7e1c44d2adf928fd96bc2c94" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataframe shape: (83362, 49)\n", "\n", "Column names: ['TECTONIC SETTING', 'LATITUDE', 'LONGITUDE', 'LAND OR SEA', 'SAMPLE NAME', 'SIO2(WT%)', 'TIO2(WT%)', 'AL2O3(WT%)', 'Fe2O3T(WT%)', 'CAO(WT%)', 'MGO(WT%)', 'MNO(WT%)', 'K2O(WT%)', 'NA2O(WT%)', 'P2O5(WT%)', 'Fe2O3T', 'H2OT(WT%)', 'CO2(WT%)', 'LOI(WT%)', 'NI(PPM)', 'CU(PPM)', 'ZN(PPM)', 'SC(PPM)', 'V(PPM)', 'CR(PPM)', 'CO(PPM)', 'RB(PPM)', 'SR(PPM)', 'Y(PPM)', 'ZR(PPM)', 'NB(PPM)', 'CS(PPM)', 'BA(PPM)', 'LA(PPM)', 'CE(PPM)', 'PR(PPM)', 'ND(PPM)', 'SM(PPM)', 'EU(PPM)', 'GD(PPM)', 'TB(PPM)', 'DY(PPM)', 'HO(PPM)', 'ER(PPM)', 'TM(PPM)', 'YB(PPM)', 'LU(PPM)', 'HF(PPM)', 'TA(PPM)']\n", "\n", "Data types: TECTONIC SETTING object\n", "LATITUDE float64\n", "LONGITUDE float64\n", "LAND OR SEA object\n", "SAMPLE NAME object\n", "SIO2(WT%) float64\n", "TIO2(WT%) float64\n", "AL2O3(WT%) float64\n", "Fe2O3T(WT%) float64\n", "CAO(WT%) float64\n", "MGO(WT%) float64\n", "MNO(WT%) float64\n", "K2O(WT%) float64\n", "NA2O(WT%) float64\n", "P2O5(WT%) float64\n", "Fe2O3T float64\n", "H2OT(WT%) float64\n", "CO2(WT%) float64\n", "LOI(WT%) float64\n", "NI(PPM) float64\n", "CU(PPM) float64\n", "ZN(PPM) float64\n", "SC(PPM) float64\n", "V(PPM) float64\n", "CR(PPM) float64\n", "CO(PPM) float64\n", "RB(PPM) float64\n", "SR(PPM) float64\n", "Y(PPM) float64\n", "ZR(PPM) float64\n", "NB(PPM) float64\n", "CS(PPM) float64\n", "BA(PPM) float64\n", "LA(PPM) float64\n", "CE(PPM) float64\n", "PR(PPM) float64\n", "ND(PPM) float64\n", "SM(PPM) float64\n", "EU(PPM) float64\n", "GD(PPM) float64\n", "TB(PPM) float64\n", "DY(PPM) float64\n", "HO(PPM) float64\n", "ER(PPM) float64\n", "TM(PPM) float64\n", "YB(PPM) float64\n", "LU(PPM) float64\n", "HF(PPM) float64\n", "TA(PPM) float64\n", "dtype: object\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TECTONIC SETTINGLATITUDELONGITUDELAND OR SEASAMPLE NAMESIO2(WT%)TIO2(WT%)AL2O3(WT%)Fe2O3T(WT%)CAO(WT%)...GD(PPM)TB(PPM)DY(PPM)HO(PPM)ER(PPM)TM(PPM)YB(PPM)LU(PPM)HF(PPM)TA(PPM)
count8312683326.00000083326.000000833258332668901.00000068423.00000067894.00000068281.00000067935.000000...29347.00000032835.00000028941.00000025884.00000028460.00000023000.00000037444.00000035001.00000031849.00000029225.000000
unique13NaNNaN479531NaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topCONVERGENT MARGINNaNNaNSAEs_121-758A-2H-4W, 42-43 [20965]NaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freq19986NaNNaN7030833NaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
meanNaN20.8616840.072988NaNNaN49.1600641.91722914.93631211.1741579.607987...5.4483030.9070215.0354670.9835072.6504660.3818312.4399390.4494943.8991834.304624
stdNaN31.834860106.817620NaNNaN3.6916040.9898462.2839102.3635902.210575...4.2779951.2051539.7904800.9526264.2471020.2872983.6945035.3000314.521555130.297900
minNaN-87.070000-185.930800NaNNaN0.4000000.0000000.0300000.0200000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%NaN0.647200-94.414075NaNNaN47.1400001.08000013.6600009.7200008.550000...3.5000000.6000003.6000000.7300001.9600000.2800001.7200000.2500002.1000000.270000
50%NaN26.0500000.058000NaNNaN49.1500001.80000014.85000011.1400009.800000...5.0000000.8042554.5900000.9000002.3900000.3400002.1300000.3160993.4500000.850000
75%NaN43.850000100.250000NaNNaN50.9800002.60000016.35000012.51400010.950000...6.8000001.0400005.7400001.1000002.9700000.4300002.7100000.4080005.0000002.131000
maxNaN83.520000181.470000NaNNaN92.21000018.23000072.23000032.46000048.770000...500.00000049.300000939.00000029.706960391.00000012.600000277.000000602.000000499.0000009486.000000
\n", "

11 rows × 49 columns

\n", "
" ], "text/plain": [ " TECTONIC SETTING LATITUDE LONGITUDE LAND OR SEA \\\n", "count 83126 83326.000000 83326.000000 83325 \n", "unique 13 NaN NaN 4 \n", "top CONVERGENT MARGIN NaN NaN SAE \n", "freq 19986 NaN NaN 70308 \n", "mean NaN 20.861684 0.072988 NaN \n", "std NaN 31.834860 106.817620 NaN \n", "min NaN -87.070000 -185.930800 NaN \n", "25% NaN 0.647200 -94.414075 NaN \n", "50% NaN 26.050000 0.058000 NaN \n", "75% NaN 43.850000 100.250000 NaN \n", "max NaN 83.520000 181.470000 NaN \n", "\n", " SAMPLE NAME SIO2(WT%) TIO2(WT%) \\\n", "count 83326 68901.000000 68423.000000 \n", "unique 79531 NaN NaN \n", "top s_121-758A-2H-4W, 42-43 [20965] NaN NaN \n", "freq 33 NaN NaN \n", "mean NaN 49.160064 1.917229 \n", "std NaN 3.691604 0.989846 \n", "min NaN 0.400000 0.000000 \n", "25% NaN 47.140000 1.080000 \n", "50% NaN 49.150000 1.800000 \n", "75% NaN 50.980000 2.600000 \n", "max NaN 92.210000 18.230000 \n", "\n", " AL2O3(WT%) Fe2O3T(WT%) CAO(WT%) ... GD(PPM) \\\n", "count 67894.000000 68281.000000 67935.000000 ... 29347.000000 \n", "unique NaN NaN NaN ... NaN \n", "top NaN NaN NaN ... NaN \n", "freq NaN NaN NaN ... NaN \n", "mean 14.936312 11.174157 9.607987 ... 5.448303 \n", "std 2.283910 2.363590 2.210575 ... 4.277995 \n", "min 0.030000 0.020000 0.000000 ... 0.000000 \n", "25% 13.660000 9.720000 8.550000 ... 3.500000 \n", "50% 14.850000 11.140000 9.800000 ... 5.000000 \n", "75% 16.350000 12.514000 10.950000 ... 6.800000 \n", "max 72.230000 32.460000 48.770000 ... 500.000000 \n", "\n", " TB(PPM) DY(PPM) HO(PPM) ER(PPM) TM(PPM) \\\n", "count 32835.000000 28941.000000 25884.000000 28460.000000 23000.000000 \n", "unique NaN NaN NaN NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean 0.907021 5.035467 0.983507 2.650466 0.381831 \n", "std 1.205153 9.790480 0.952626 4.247102 0.287298 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.600000 3.600000 0.730000 1.960000 0.280000 \n", "50% 0.804255 4.590000 0.900000 2.390000 0.340000 \n", "75% 1.040000 5.740000 1.100000 2.970000 0.430000 \n", "max 49.300000 939.000000 29.706960 391.000000 12.600000 \n", "\n", " YB(PPM) LU(PPM) HF(PPM) TA(PPM) \n", "count 37444.000000 35001.000000 31849.000000 29225.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 2.439939 0.449494 3.899183 4.304624 \n", "std 3.694503 5.300031 4.521555 130.297900 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 1.720000 0.250000 2.100000 0.270000 \n", "50% 2.130000 0.316099 3.450000 0.850000 \n", "75% 2.710000 0.408000 5.000000 2.131000 \n", "max 277.000000 602.000000 499.000000 9486.000000 \n", "\n", "[11 rows x 49 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the shape of the dataframe\n", "print('Dataframe shape:', df.shape)\n", "\n", "# Display the column names\n", "print('\\nColumn names:', df.columns.tolist())\n", "\n", "# Display the data types of each column\n", "print('\\nData types:', df.dtypes)\n", "\n", "# Display basic statistical details like percentile, mean, std etc. of a data frame\n", "df.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 20, "id": "04e495b9-92b9-4ae8-9fe5-d65384663591", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:53:17.552695+00:00", "start_time": "2023-06-28T15:53:17.319119+00:00" }, "datalink": { "e5bc1e5e-3c5f-48a2-a9d3-defbee743e8b": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 49, "orig_size_bytes": 784, "truncated_num_cols": 1, "truncated_num_rows": 49, "truncated_size_bytes": 784, "truncated_string_columns": [] }, "display_id": "e5bc1e5e-3c5f-48a2-a9d3-defbee743e8b", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:53:17.396262", "user_variable_name": null, "variable_name": "unk_dataframe_1e9409e304d04eb6af9f5877dedc56eb" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "TECTONIC SETTING 236\n", "LATITUDE 36\n", "LONGITUDE 36\n", "LAND OR SEA 37\n", "SAMPLE NAME 36\n", "SIO2(WT%) 14461\n", "TIO2(WT%) 14939\n", "AL2O3(WT%) 15468\n", "Fe2O3T(WT%) 15081\n", "CAO(WT%) 15427\n", "MGO(WT%) 14418\n", "MNO(WT%) 17012\n", "K2O(WT%) 13636\n", "NA2O(WT%) 15319\n", "P2O5(WT%) 17290\n", "Fe2O3T 15109\n", "H2OT(WT%) 83362\n", "CO2(WT%) 77215\n", "LOI(WT%) 50411\n", "NI(PPM) 33021\n", "CU(PPM) 53571\n", "ZN(PPM) 51814\n", "SC(PPM) 48800\n", "V(PPM) 41738\n", "CR(PPM) 35291\n", "CO(PPM) 53507\n", "RB(PPM) 28068\n", "SR(PPM) 25611\n", "Y(PPM) 31865\n", "ZR(PPM) 29186\n", "NB(PPM) 34597\n", "CS(PPM) 63293\n", "BA(PPM) 30998\n", "LA(PPM) 38845\n", "CE(PPM) 39229\n", "PR(PPM) 57816\n", "ND(PPM) 42582\n", "SM(PPM) 43711\n", "EU(PPM) 46014\n", "GD(PPM) 54015\n", "TB(PPM) 50527\n", "DY(PPM) 54421\n", "HO(PPM) 57478\n", "ER(PPM) 54902\n", "TM(PPM) 60362\n", "YB(PPM) 45918\n", "LU(PPM) 48361\n", "HF(PPM) 51513\n", "TA(PPM) 54137\n", "dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for missing values in each column\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 21, "id": "22b490d7-330c-49df-809d-80ebfce557d5", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:54:41.943604+00:00", "start_time": "2023-06-28T15:54:41.404054+00:00" }, "datalink": { "25f0544c-7c45-46be-8a1e-ec6c01772d19": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 49, "orig_size_bytes": 784, "truncated_num_cols": 1, "truncated_num_rows": 49, "truncated_size_bytes": 784, "truncated_string_columns": [] }, "display_id": "25f0544c-7c45-46be-8a1e-ec6c01772d19", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:54:41.788037", "user_variable_name": null, "variable_name": "unk_dataframe_58f7c27386664fb789931e6b1b3a71f9" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "TECTONIC SETTING 236\n", "LATITUDE 0\n", "LONGITUDE 0\n", "LAND OR SEA 37\n", "SAMPLE NAME 36\n", "SIO2(WT%) 0\n", "TIO2(WT%) 0\n", "AL2O3(WT%) 0\n", "Fe2O3T(WT%) 0\n", "CAO(WT%) 0\n", "MGO(WT%) 0\n", "MNO(WT%) 0\n", "K2O(WT%) 0\n", "NA2O(WT%) 0\n", "P2O5(WT%) 0\n", "Fe2O3T 0\n", "H2OT(WT%) 83362\n", "CO2(WT%) 29\n", "LOI(WT%) 1\n", "NI(PPM) 1\n", "CU(PPM) 2\n", "ZN(PPM) 2\n", "SC(PPM) 1\n", "V(PPM) 1\n", "CR(PPM) 1\n", "CO(PPM) 3\n", "RB(PPM) 1\n", "SR(PPM) 1\n", "Y(PPM) 1\n", "ZR(PPM) 1\n", "NB(PPM) 1\n", "CS(PPM) 2\n", "BA(PPM) 1\n", "LA(PPM) 1\n", "CE(PPM) 1\n", "PR(PPM) 1\n", "ND(PPM) 1\n", "SM(PPM) 1\n", "EU(PPM) 1\n", "GD(PPM) 1\n", "TB(PPM) 1\n", "DY(PPM) 1\n", "HO(PPM) 1\n", "ER(PPM) 1\n", "TM(PPM) 1\n", "YB(PPM) 1\n", "LU(PPM) 8\n", "HF(PPM) 1\n", "TA(PPM) 8\n", "dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Fill missing values using interpolation\n", "df = df.interpolate()\n", "\n", "# Check for missing values again to confirm\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 22, "id": "dd24206f-2ad6-4058-9f90-4fd7ed5d337e", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:55:35.649644+00:00", "start_time": "2023-06-28T15:55:35.369597+00:00" }, "datalink": { "f42e5309-44b7-4e30-ad4f-759b8d22c260": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 49, "orig_size_bytes": 784, "truncated_num_cols": 1, "truncated_num_rows": 49, "truncated_size_bytes": 784, "truncated_string_columns": [] }, "display_id": "f42e5309-44b7-4e30-ad4f-759b8d22c260", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:55:35.494191", "user_variable_name": null, "variable_name": "unk_dataframe_5918954f4eea47b4b07c28e48bb5db00" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "TECTONIC SETTING 0\n", "LATITUDE 0\n", "LONGITUDE 0\n", "LAND OR SEA 0\n", "SAMPLE NAME 0\n", "SIO2(WT%) 0\n", "TIO2(WT%) 0\n", "AL2O3(WT%) 0\n", "Fe2O3T(WT%) 0\n", "CAO(WT%) 0\n", "MGO(WT%) 0\n", "MNO(WT%) 0\n", "K2O(WT%) 0\n", "NA2O(WT%) 0\n", "P2O5(WT%) 0\n", "Fe2O3T 0\n", "H2OT(WT%) 83362\n", "CO2(WT%) 29\n", "LOI(WT%) 1\n", "NI(PPM) 1\n", "CU(PPM) 2\n", "ZN(PPM) 2\n", "SC(PPM) 1\n", "V(PPM) 1\n", "CR(PPM) 1\n", "CO(PPM) 3\n", "RB(PPM) 1\n", "SR(PPM) 1\n", "Y(PPM) 1\n", "ZR(PPM) 1\n", "NB(PPM) 1\n", "CS(PPM) 2\n", "BA(PPM) 1\n", "LA(PPM) 1\n", "CE(PPM) 1\n", "PR(PPM) 1\n", "ND(PPM) 1\n", "SM(PPM) 1\n", "EU(PPM) 1\n", "GD(PPM) 1\n", "TB(PPM) 1\n", "DY(PPM) 1\n", "HO(PPM) 1\n", "ER(PPM) 1\n", "TM(PPM) 1\n", "YB(PPM) 1\n", "LU(PPM) 8\n", "HF(PPM) 1\n", "TA(PPM) 8\n", "dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Fill missing values in categorical columns with the mode\n", "for column in ['TECTONIC SETTING', 'LAND OR SEA', 'SAMPLE NAME']:\n", " df[column].fillna(df[column].mode()[0], inplace=True)\n", "\n", "# Check for missing values again to confirm\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 23, "id": "20e89145-b771-4ee1-96ae-9d199b2e7c8e", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:57:15.764179+00:00", "start_time": "2023-06-28T15:57:13.024051+00:00" }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", "# Compute the correlation matrix\n", "corr = df.corr()\n", "\n", "# Generate a mask for the upper triangle\n", "mask = np.triu(np.ones_like(corr, dtype=bool))\n", "\n", "# Set up the matplotlib figure\n", "f, ax = plt.subplots(figsize=(11, 9))\n", "\n", "# Generate a custom diverging colormap\n", "cmap = sns.diverging_palette(230, 20, as_cmap=True)\n", "\n", "# Draw the heatmap with the mask and correct aspect ratio\n", "sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,\n", " square=True, linewidths=.5, cbar_kws={\"shrink\": .5})" ] }, { "cell_type": "code", "execution_count": 24, "id": "e346404b-e9f5-4182-adb7-0d1409167f65", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:58:41.637895+00:00", "start_time": "2023-06-28T15:58:38.726706+00:00" }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Create histogram for 'SIO2(WT%)'\n", "plt.figure(figsize=(8, 6))\n", "sns.histplot(df['SIO2(WT%)'], bins=30, kde=True)\n", "plt.title('Histogram of SIO2(WT%)')\n", "plt.xlabel('SIO2(WT%)')\n", "plt.ylabel('Frequency')\n", "plt.show()\n", "\n", "# Create boxplot for 'SIO2(WT%)'\n", "plt.figure(figsize=(8, 6))\n", "sns.boxplot(x=df['SIO2(WT%)'])\n", "plt.title('Boxplot of SIO2(WT%)')\n", "plt.xlabel('SIO2(WT%)')\n", "plt.show()\n", "\n", "# Create histogram for 'NI(PPM)'\n", "plt.figure(figsize=(8, 6))\n", "sns.histplot(df['NI(PPM)'], bins=30, kde=True)\n", "plt.title('Histogram of NI(PPM)')\n", "plt.xlabel('NI(PPM)')\n", "plt.ylabel('Frequency')\n", "plt.show()\n", "\n", "# Create boxplot for 'NI(PPM)'\n", "plt.figure(figsize=(8, 6))\n", "sns.boxplot(x=df['NI(PPM)'])\n", "plt.title('Boxplot of NI(PPM)')\n", "plt.xlabel('NI(PPM)')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 25, "id": "5293b2b2-e551-442a-8c1a-9f6a7bb00b7e", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T15:59:38.914482+00:00", "start_time": "2023-06-28T15:59:38.524496+00:00" }, "datalink": { "64f1da02-cb63-43cc-82ee-70c8f68cd449": { "applied_filters": [], "dataframe_info": { "default_index_used": true, "orig_num_cols": 64, "orig_num_rows": 5, "orig_size_bytes": 2005, "truncated_num_cols": 64, "truncated_num_rows": 5, "truncated_size_bytes": 2005, "truncated_string_columns": [] }, "display_id": "64f1da02-cb63-43cc-82ee-70c8f68cd449", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T15:59:38.748183", "user_variable_name": null, "variable_name": "unk_dataframe_1754f8008a1d4db289cb74ac9e3e25c2" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDESAMPLE NAMESIO2(WT%)TIO2(WT%)AL2O3(WT%)Fe2O3T(WT%)CAO(WT%)MGO(WT%)MNO(WT%)...TECTONIC SETTING_OCEANIC PLATEAUTECTONIC SETTING_OPHIOLITETECTONIC SETTING_PASSIVE MARGINTECTONIC SETTING_RIFT VOLCANICSTECTONIC SETTING_SEAMOUNTTECTONIC SETTING_SUBMARINE RIDGELAND OR SEA_SAELAND OR SEA_SAQLAND OR SEA_SaeLAND OR SEA_Saq
034.0000131.5000s_1 [4742]46.671.9613.14010.45708.8714.210.120...0000001000
1-21.2500-159.7500s_R40 [4683]45.972.9111.86011.42009.7411.340.180...0000001000
247.1200152.2300s_V-17-603 [7321] / s_V17-603 [4190]51.531.7914.08010.00358.397.970.160...0000001000
347.1200152.2300s_V15-307 [6818] / s_V-15-307 [3042]58.220.6716.3008.58707.043.360.140...0000001000
427.6149-16.1633s_157-956B-44R-4,G1 [4750]54.510.7716.42510.67508.764.280.165...0000000100
\n", "

5 rows × 64 columns

\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE SAMPLE NAME SIO2(WT%) \\\n", "0 34.0000 131.5000 s_1 [4742] 46.67 \n", "1 -21.2500 -159.7500 s_R40 [4683] 45.97 \n", "2 47.1200 152.2300 s_V-17-603 [7321] / s_V17-603 [4190] 51.53 \n", "3 47.1200 152.2300 s_V15-307 [6818] / s_V-15-307 [3042] 58.22 \n", "4 27.6149 -16.1633 s_157-956B-44R-4,G1 [4750] 54.51 \n", "\n", " TIO2(WT%) AL2O3(WT%) Fe2O3T(WT%) CAO(WT%) MGO(WT%) MNO(WT%) ... \\\n", "0 1.96 13.140 10.4570 8.87 14.21 0.120 ... \n", "1 2.91 11.860 11.4200 9.74 11.34 0.180 ... \n", "2 1.79 14.080 10.0035 8.39 7.97 0.160 ... \n", "3 0.67 16.300 8.5870 7.04 3.36 0.140 ... \n", "4 0.77 16.425 10.6750 8.76 4.28 0.165 ... \n", "\n", " TECTONIC SETTING_OCEANIC PLATEAU TECTONIC SETTING_OPHIOLITE \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " TECTONIC SETTING_PASSIVE MARGIN TECTONIC SETTING_RIFT VOLCANICS \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " TECTONIC SETTING_SEAMOUNT TECTONIC SETTING_SUBMARINE RIDGE \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " LAND OR SEA_SAE LAND OR SEA_SAQ LAND OR SEA_Sae LAND OR SEA_Saq \n", "0 1 0 0 0 \n", "1 1 0 0 0 \n", "2 1 0 0 0 \n", "3 1 0 0 0 \n", "4 0 1 0 0 \n", "\n", "[5 rows x 64 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Perform one-hot encoding on categorical columns\n", "df = pd.get_dummies(df, columns=['TECTONIC SETTING', 'LAND OR SEA'])\n", "\n", "# Display the first few rows of the dataframe to confirm\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 26, "id": "dce2a8eb-d8be-4951-adcc-9d7507db2090", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T16:00:26.060916+00:00", "start_time": "2023-06-28T16:00:25.623703+00:00" }, "datalink": { "4a2e29d6-d1a5-4be9-8cec-81af8c8a4dfa": { "applied_filters": [], "dataframe_info": { "default_index_used": true, "orig_num_cols": 64, "orig_num_rows": 5, "orig_size_bytes": 2005, "truncated_num_cols": 64, "truncated_num_rows": 5, "truncated_size_bytes": 2005, "truncated_string_columns": [] }, "display_id": "4a2e29d6-d1a5-4be9-8cec-81af8c8a4dfa", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:00:25.895496", "user_variable_name": null, "variable_name": "unk_dataframe_198c353672304a549fda1524dfa680ae" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\25688\\AppData\\Local\\Temp\\ipykernel_136772\\444792743.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", " df.fillna(df.median(), inplace=True)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDESAMPLE NAMESIO2(WT%)TIO2(WT%)AL2O3(WT%)Fe2O3T(WT%)CAO(WT%)MGO(WT%)MNO(WT%)...TECTONIC SETTING_OCEANIC PLATEAUTECTONIC SETTING_OPHIOLITETECTONIC SETTING_PASSIVE MARGINTECTONIC SETTING_RIFT VOLCANICSTECTONIC SETTING_SEAMOUNTTECTONIC SETTING_SUBMARINE RIDGELAND OR SEA_SAELAND OR SEA_SAQLAND OR SEA_SaeLAND OR SEA_Saq
034.0000131.5000s_1 [4742]46.671.9613.14010.45708.8714.210.120...0000001000
1-21.2500-159.7500s_R40 [4683]45.972.9111.86011.42009.7411.340.180...0000001000
247.1200152.2300s_V-17-603 [7321] / s_V17-603 [4190]51.531.7914.08010.00358.397.970.160...0000001000
347.1200152.2300s_V15-307 [6818] / s_V-15-307 [3042]58.220.6716.3008.58707.043.360.140...0000001000
427.6149-16.1633s_157-956B-44R-4,G1 [4750]54.510.7716.42510.67508.764.280.165...0000000100
\n", "

5 rows × 64 columns

\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE SAMPLE NAME SIO2(WT%) \\\n", "0 34.0000 131.5000 s_1 [4742] 46.67 \n", "1 -21.2500 -159.7500 s_R40 [4683] 45.97 \n", "2 47.1200 152.2300 s_V-17-603 [7321] / s_V17-603 [4190] 51.53 \n", "3 47.1200 152.2300 s_V15-307 [6818] / s_V-15-307 [3042] 58.22 \n", "4 27.6149 -16.1633 s_157-956B-44R-4,G1 [4750] 54.51 \n", "\n", " TIO2(WT%) AL2O3(WT%) Fe2O3T(WT%) CAO(WT%) MGO(WT%) MNO(WT%) ... \\\n", "0 1.96 13.140 10.4570 8.87 14.21 0.120 ... \n", "1 2.91 11.860 11.4200 9.74 11.34 0.180 ... \n", "2 1.79 14.080 10.0035 8.39 7.97 0.160 ... \n", "3 0.67 16.300 8.5870 7.04 3.36 0.140 ... \n", "4 0.77 16.425 10.6750 8.76 4.28 0.165 ... \n", "\n", " TECTONIC SETTING_OCEANIC PLATEAU TECTONIC SETTING_OPHIOLITE \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " TECTONIC SETTING_PASSIVE MARGIN TECTONIC SETTING_RIFT VOLCANICS \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " TECTONIC SETTING_SEAMOUNT TECTONIC SETTING_SUBMARINE RIDGE \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " LAND OR SEA_SAE LAND OR SEA_SAQ LAND OR SEA_Sae LAND OR SEA_Saq \n", "0 1 0 0 0 \n", "1 1 0 0 0 \n", "2 1 0 0 0 \n", "3 1 0 0 0 \n", "4 0 1 0 0 \n", "\n", "[5 rows x 64 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Fill missing values with median of the columns\n", "df.fillna(df.median(), inplace=True)\n", "\n", "# Display the first few rows of the dataframe to confirm\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 27, "id": "32d02e18-8ebe-418f-b387-1c93459e8aaf", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T16:19:50.443424+00:00", "start_time": "2023-06-28T16:19:50.225031+00:00" }, "noteable": { "cell_type": "code" } }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Define the target variable and the feature variables\n", "y = df['TECTONIC SETTING_INTRAPLATE VOLCANICS']\n", "X = df.drop('TECTONIC SETTING_INTRAPLATE VOLCANICS', axis=1).drop('H2OT(WT%)', axis=1).drop('SAMPLE NAME', axis=1)\n", "# Split the data into training and test sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 28, "id": "a8d7c712-cead-4d24-b3a2-635bc8601fe4", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T16:19:59.029316+00:00", "start_time": "2023-06-28T16:19:58.824247+00:00" }, "datalink": { "15d83bee-b6a6-459f-9309-c595b60bad5a": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 62, "orig_size_bytes": 992, "truncated_num_cols": 1, "truncated_num_rows": 62, "truncated_size_bytes": 992, "truncated_string_columns": [] }, "display_id": "15d83bee-b6a6-459f-9309-c595b60bad5a", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:17:36.779439", "user_variable_name": null, "variable_name": "unk_dataframe_ecabb215c5874dba9e238661d0782b17" }, "a36956de-d165-4ff9-9956-14a961039435": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 62, "orig_size_bytes": 992, "truncated_num_cols": 1, "truncated_num_rows": 62, "truncated_size_bytes": 992, "truncated_string_columns": [] }, "display_id": "a36956de-d165-4ff9-9956-14a961039435", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:07:50.365431", "user_variable_name": null, "variable_name": "unk_dataframe_60a5ec5baa004dce94e8e67ab4011155" }, "da07bd2c-46a7-40ec-bfd0-09f3cbf17bbf": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 62, "orig_size_bytes": 992, "truncated_num_cols": 1, "truncated_num_rows": 62, "truncated_size_bytes": 992, "truncated_string_columns": [] }, "display_id": "da07bd2c-46a7-40ec-bfd0-09f3cbf17bbf", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:15:08.276198", "user_variable_name": null, "variable_name": "unk_dataframe_5b04bc8c10f94597b8faa7c71b4b73d6" }, "f4b7e779-1211-4910-97df-6ee01376376d": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 61, "orig_size_bytes": 976, "truncated_num_cols": 1, "truncated_num_rows": 61, "truncated_size_bytes": 976, "truncated_string_columns": [] }, "display_id": "f4b7e779-1211-4910-97df-6ee01376376d", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:19:58.873620", "user_variable_name": null, "variable_name": "unk_dataframe_98aa5d8c18c7471ca420870b413111fe" } }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "LATITUDE 0\n", "LONGITUDE 0\n", "SIO2(WT%) 0\n", "TIO2(WT%) 0\n", "AL2O3(WT%) 0\n", " ..\n", "TECTONIC SETTING_SUBMARINE RIDGE 0\n", "LAND OR SEA_SAE 0\n", "LAND OR SEA_SAQ 0\n", "LAND OR SEA_Sae 0\n", "LAND OR SEA_Saq 0\n", "Length: 61, dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for missing values in the training set\n", "X_train.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 29, "id": "e3c77dfc-22ef-459c-8e5a-4c7ae0a4e41e", "metadata": { "ExecuteTime": { "end_time": "2023-06-28T16:20:06.750253+00:00", "start_time": "2023-06-28T16:20:06.324195+00:00" }, "datalink": { "58c5e916-d829-4791-87a8-8581217a2d03": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 61, "orig_size_bytes": 976, "truncated_num_cols": 1, "truncated_num_rows": 61, "truncated_size_bytes": 976, "truncated_string_columns": [] }, "display_id": "58c5e916-d829-4791-87a8-8581217a2d03", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:20:06.594732", "user_variable_name": null, "variable_name": "unk_dataframe_0981636b1eb84a35843e88f18ded9bf4" }, "b1c927bd-bcc1-45d9-b7f3-4244cbbc09fd": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 62, "orig_size_bytes": 992, "truncated_num_cols": 1, "truncated_num_rows": 62, "truncated_size_bytes": 992, "truncated_string_columns": [] }, "display_id": "b1c927bd-bcc1-45d9-b7f3-4244cbbc09fd", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:11:32.310128", "user_variable_name": null, "variable_name": "unk_dataframe_d412e317fe1340ee9bc8938853c93962" }, "f3750d35-f50c-4fef-b6a5-67092c4fad8d": { "applied_filters": [], "dataframe_info": { "default_index_used": false, "orig_num_cols": 1, "orig_num_rows": 62, "orig_size_bytes": 992, "truncated_num_cols": 1, "truncated_num_rows": 62, "truncated_size_bytes": 992, "truncated_string_columns": [] }, "display_id": "f3750d35-f50c-4fef-b6a5-67092c4fad8d", "dx_settings": { "ALLOW_NOTEABLE_ATTRS": true, "COLUMN_SAMPLING_METHOD": "outer", "DB_LOCATION": ":memory:", "DEV_MODE": false, "DISPLAY_MAX_COLUMNS": 100, "DISPLAY_MAX_ROWS": 50000, "DISPLAY_MODE": "simple", "ENABLE_ASSIGNMENT": true, "ENABLE_DATALINK": true, "FLATTEN_COLUMN_VALUES": true, "FLATTEN_INDEX_VALUES": false, "GENERATE_DEX_METADATA": false, "HTML_TABLE_SCHEMA": false, "LOG_LEVEL": 30, "MAX_RENDER_SIZE_BYTES": 104857600, "MAX_STRING_LENGTH": 250, "NUM_PAST_SAMPLES_TRACKED": 3, "RANDOM_STATE": 12648430, "RESET_INDEX_VALUES": false, "ROW_SAMPLING_METHOD": "random", "SAMPLING_FACTOR": 0.1, "SAMPLING_METHOD": "random", "STRINGIFY_COLUMN_VALUES": true, "STRINGIFY_INDEX_VALUES": false }, "sample_history": [], "sampling_time": "2023-06-28T16:15:17.197808", "user_variable_name": null, "variable_name": "unk_dataframe_da910f99f3dd470d8537020325c0c9de" } }, "dx": { "dashboard": { "multiViews": [] }, "fieldMetadata": { "0": { "columnPosition": 2 }, "index": { "columnPosition": 1 } }, "updated": 1687968933827, "views": [ { "annotationRules": [], "chart": {}, "chartMode": "grid", "confoRules": [], "decoration": { "footer": "", "subtitle": "", "title": "Table" }, "facets": [], "filterSettings": { "SHOW_FILTER_PANEL": true, "filters": [] }, "id": "first-view", "isDefault": true, "type": "public", "userID": "" }, { "annotationRules": [], "chart": {}, "chartMode": "grid", "confoRules": [], "decoration": { "footer": "", "subtitle": "", "title": "Table" }, "facets": [], "filterSettings": { "SHOW_FILTER_PANEL": true, "filters": [] }, "id": "first-view", "isDefault": true, "isTransitory": true, "type": "public", "userID": "" } ] }, "noteable": { "cell_type": "code" } }, "outputs": [ { "data": { "text/plain": [ "LATITUDE 0\n", "LONGITUDE 0\n", "SIO2(WT%) 0\n", "TIO2(WT%) 0\n", "AL2O3(WT%) 0\n", " ..\n", "TECTONIC SETTING_SUBMARINE RIDGE 0\n", "LAND OR SEA_SAE 0\n", "LAND OR SEA_SAQ 0\n", "LAND OR SEA_Sae 0\n", "LAND OR SEA_Saq 0\n", "Length: 61, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calculate the median of each column\n", "medians = X_train.median()\n", "\n", "# Fill missing values with the median of each column\n", "X_train = X_train.fillna(medians)\n", "X_test = X_test.fillna(medians)\n", "\n", "# Check again for missing values in the training set\n", "X_train.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 13, "id": "ffd12cb1-2b37-4eb6-9227-5a60d8856e21", "metadata": { "ExecuteTime": null, "noteable": { "cell_type": "code" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Program Files\\Python310\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "data": { "text/plain": [ "0.9727703472680381" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "\n", "\n", "# Create and train the model\n", "logreg = LogisticRegression(max_iter=1000)\n", "logreg.fit(X_train, y_train)\n", "\n", "# Make predictions on the test set\n", "y_pred = logreg.predict(X_test)\n", "\n", "# Calculate the accuracy score# Calculate the accuracy score\n", "\n", "accuracy = accuracy_score(y_test, y_pred)\n", "accuracy" ] }, { "cell_type": "code", "execution_count": 16, "id": "a129d74b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\25688\\AppData\\Roaming\\Python\\Python310\\site-packages\\pytorch_tabnet\\abstract_model.py:75: UserWarning: Device used : cuda\n", " warnings.warn(f\"Device used : {self.device}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 0 | loss: 0.12099 | val_0_accuracy: 0.81485 | 0:00:16s\n", "epoch 1 | loss: 0.01343 | val_0_accuracy: 0.81485 | 0:00:32s\n", "epoch 2 | loss: 0.00462 | val_0_accuracy: 0.81485 | 0:00:48s\n", "epoch 3 | loss: 0.003 | val_0_accuracy: 0.81485 | 0:01:06s\n", "epoch 4 | loss: 0.00235 | val_0_accuracy: 0.81485 | 0:01:23s\n", "epoch 5 | loss: 0.00185 | val_0_accuracy: 0.81485 | 0:01:40s\n", "epoch 6 | loss: 0.00141 | val_0_accuracy: 0.81485 | 0:01:57s\n", "epoch 7 | loss: 0.003 | val_0_accuracy: 0.81485 | 0:02:14s\n", "epoch 8 | loss: 0.00129 | val_0_accuracy: 0.81485 | 0:02:33s\n", "epoch 9 | loss: 0.00233 | val_0_accuracy: 0.81485 | 0:02:51s\n", "epoch 10 | loss: 0.00147 | val_0_accuracy: 0.81485 | 0:03:10s\n", "epoch 11 | loss: 0.00121 | val_0_accuracy: 0.81485 | 0:03:28s\n", "epoch 12 | loss: 0.00128 | val_0_accuracy: 0.81485 | 0:03:47s\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Input \u001b[1;32mIn [16]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 26\u001b[0m tabnet_model \u001b[38;5;241m=\u001b[39m TabNetRegressor(\n\u001b[0;32m 27\u001b[0m seed\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, \u001b[38;5;66;03m# Random seed for reproducibility\u001b[39;00m\n\u001b[0;32m 28\u001b[0m verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;66;03m# Show training progress\u001b[39;00m\n\u001b[0;32m 29\u001b[0m device_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 30\u001b[0m )\n\u001b[0;32m 33\u001b[0m \u001b[38;5;66;03m# 不经过PCA拟合模型\u001b[39;00m\n\u001b[1;32m---> 34\u001b[0m \u001b[43mtabnet_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[43mX_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreshape\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_set\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_test\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreshape\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_metric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maccuracy\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_epochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatience\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_last\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[0;32m 42\u001b[0m \u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\pytorch_tabnet\\abstract_model.py:245\u001b[0m, in \u001b[0;36mTabModel.fit\u001b[1;34m(self, X_train, y_train, eval_set, eval_name, eval_metric, loss_fn, weights, max_epochs, patience, batch_size, virtual_batch_size, num_workers, drop_last, callbacks, pin_memory, from_unsupervised, warm_start, augmentations)\u001b[0m\n\u001b[0;32m 243\u001b[0m \u001b[38;5;66;03m# Apply predict epoch to all eval sets\u001b[39;00m\n\u001b[0;32m 244\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m eval_name, valid_dataloader \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(eval_names, valid_dataloaders):\n\u001b[1;32m--> 245\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predict_epoch\u001b[49m\u001b[43m(\u001b[49m\u001b[43meval_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalid_dataloader\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 247\u001b[0m \u001b[38;5;66;03m# Call method on_epoch_end for all callbacks\u001b[39;00m\n\u001b[0;32m 248\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_callback_container\u001b[38;5;241m.\u001b[39mon_epoch_end(\n\u001b[0;32m 249\u001b[0m epoch_idx, logs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhistory\u001b[38;5;241m.\u001b[39mepoch_metrics\n\u001b[0;32m 250\u001b[0m )\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\pytorch_tabnet\\abstract_model.py:529\u001b[0m, in \u001b[0;36mTabModel._predict_epoch\u001b[1;34m(self, name, loader)\u001b[0m\n\u001b[0;32m 526\u001b[0m list_y_score \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m 528\u001b[0m \u001b[38;5;66;03m# Main loop\u001b[39;00m\n\u001b[1;32m--> 529\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m batch_idx, (X, y) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mloader\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[0;32m 530\u001b[0m scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_predict_batch(X)\n\u001b[0;32m 531\u001b[0m list_y_true\u001b[38;5;241m.\u001b[39mappend(y)\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\site-packages\\torch\\utils\\data\\dataloader.py:368\u001b[0m, in \u001b[0;36mDataLoader.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 366\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_iterator\n\u001b[0;32m 367\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 368\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_iterator\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\site-packages\\torch\\utils\\data\\dataloader.py:314\u001b[0m, in \u001b[0;36mDataLoader._get_iterator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 312\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 313\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_worker_number_rationality()\n\u001b[1;32m--> 314\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_MultiProcessingDataLoaderIter\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\site-packages\\torch\\utils\\data\\dataloader.py:927\u001b[0m, in \u001b[0;36m_MultiProcessingDataLoaderIter.__init__\u001b[1;34m(self, loader)\u001b[0m\n\u001b[0;32m 920\u001b[0m w\u001b[38;5;241m.\u001b[39mdaemon \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 921\u001b[0m \u001b[38;5;66;03m# NB: Process.start() actually take some time as it needs to\u001b[39;00m\n\u001b[0;32m 922\u001b[0m \u001b[38;5;66;03m# start a process and pass the arguments over via a pipe.\u001b[39;00m\n\u001b[0;32m 923\u001b[0m \u001b[38;5;66;03m# Therefore, we only add a worker to self._workers list after\u001b[39;00m\n\u001b[0;32m 924\u001b[0m \u001b[38;5;66;03m# it started, so that we do not call .join() if program dies\u001b[39;00m\n\u001b[0;32m 925\u001b[0m \u001b[38;5;66;03m# before it starts, and __del__ tries to join but will get:\u001b[39;00m\n\u001b[0;32m 926\u001b[0m \u001b[38;5;66;03m# AssertionError: can only join a started process.\u001b[39;00m\n\u001b[1;32m--> 927\u001b[0m \u001b[43mw\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 928\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_index_queues\u001b[38;5;241m.\u001b[39mappend(index_queue)\n\u001b[0;32m 929\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_workers\u001b[38;5;241m.\u001b[39mappend(w)\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\multiprocessing\\process.py:121\u001b[0m, in \u001b[0;36mBaseProcess.start\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 118\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _current_process\u001b[38;5;241m.\u001b[39m_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdaemon\u001b[39m\u001b[38;5;124m'\u001b[39m), \\\n\u001b[0;32m 119\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdaemonic processes are not allowed to have children\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 120\u001b[0m _cleanup()\n\u001b[1;32m--> 121\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_popen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_Popen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 122\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sentinel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_popen\u001b[38;5;241m.\u001b[39msentinel\n\u001b[0;32m 123\u001b[0m \u001b[38;5;66;03m# Avoid a refcycle if the target function holds an indirect\u001b[39;00m\n\u001b[0;32m 124\u001b[0m \u001b[38;5;66;03m# reference to the process object (see bpo-30775)\u001b[39;00m\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\multiprocessing\\context.py:224\u001b[0m, in \u001b[0;36mProcess._Popen\u001b[1;34m(process_obj)\u001b[0m\n\u001b[0;32m 222\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_Popen\u001b[39m(process_obj):\n\u001b[1;32m--> 224\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_context\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mProcess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_Popen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_obj\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\multiprocessing\\context.py:327\u001b[0m, in \u001b[0;36mSpawnProcess._Popen\u001b[1;34m(process_obj)\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[0;32m 325\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_Popen\u001b[39m(process_obj):\n\u001b[0;32m 326\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpopen_spawn_win32\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Popen\n\u001b[1;32m--> 327\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_obj\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\multiprocessing\\popen_spawn_win32.py:93\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, process_obj)\u001b[0m\n\u001b[0;32m 91\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 92\u001b[0m reduction\u001b[38;5;241m.\u001b[39mdump(prep_data, to_child)\n\u001b[1;32m---> 93\u001b[0m \u001b[43mreduction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdump\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_child\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 94\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 95\u001b[0m set_spawning_popen(\u001b[38;5;28;01mNone\u001b[39;00m)\n", "File \u001b[1;32mC:\\Program Files\\Python310\\lib\\multiprocessing\\reduction.py:60\u001b[0m, in \u001b[0;36mdump\u001b[1;34m(obj, file, protocol)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdump\u001b[39m(obj, file, protocol\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 59\u001b[0m \u001b[38;5;124;03m'''Replacement for pickle.dump() using ForkingPickler.'''\u001b[39;00m\n\u001b[1;32m---> 60\u001b[0m \u001b[43mForkingPickler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprotocol\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdump\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import mean_squared_error\n", "from pytorch_tabnet.tab_model import TabNetRegressor\n", "import torch\n", "\n", "# Set up TabNet model\n", "# tabnet_model = TabNetRegressor(\n", "# n_d=16, # Dimension of the prediction layer\n", "# n_a=16, # Dimension of the attention layer\n", "# n_steps=10, # Number of decision steps, start with a lower value and increase as needed\n", "# gamma=1.3, # Regularization coefficient for feature selection\n", "# lambda_sparse=0, # Regularization coefficient for sparsity\n", "# optimizer_fn=torch.optim.Adam, # Use Adam optimizer\n", "# optimizer_params=dict(lr=0.01, weight_decay=1e-5), # Set learning rate and weight decay\n", "# mask_type=\"entmax\", # Use \"entmax\" instead of \"sparsemax\" for feature selection\n", "# scheduler_params=dict(mode=\"min\", patience=5, min_lr=1e-5, factor=0.9), # Learning rate scheduler parameters\n", "# scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau, # Use ReduceLROnPlateau scheduler\n", "# seed=42, # Random seed for reproducibility\n", "# verbose=1, # Show training progress\n", "# device_name='cuda'\n", "# )\n", "\n", "tabnet_model = TabNetRegressor(\n", " seed=42, # Random seed for reproducibility\n", " verbose=1, # Show training progress\n", " device_name='cuda'\n", ")\n", "\n", "\n", "# 不经过PCA拟合模型\n", "tabnet_model.fit(\n", " X_train.values, y_train.values.reshape(-1, 1),\n", " eval_set=[(X_test.values, y_test.values.reshape(-1, 1))],\n", " eval_metric=[\"accuracy\"],\n", " max_epochs=1000,\n", " patience=20,\n", " num_workers=2,\n", " drop_last=False\n", ")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 17, "id": "2f4a5cf0", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "Descriptors cannot not be created directly.\nIf this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.\nIf you cannot immediately regenerate your protos, some other possible workarounds are:\n 1. Downgrade the protobuf package to 3.20.x or lower.\n 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).\n\nMore information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "Input \u001b[1;32mIn [17]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkeras\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Sequential\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkeras\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlayers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dense\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# Define the model\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\keras\\__init__.py:21\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;124;03m\"\"\"Implementation of the Keras API, the high-level API of TensorFlow.\u001b[39;00m\n\u001b[0;32m 16\u001b[0m \n\u001b[0;32m 17\u001b[0m \u001b[38;5;124;03mDetailed documentation and user guides are available at\u001b[39;00m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;124;03m[keras.io](https://keras.io).\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# pylint: disable=unused-import\u001b[39;00m\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tf2\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkeras\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m distribute\n\u001b[0;32m 24\u001b[0m \u001b[38;5;66;03m# See b/110718070#comment18 for more details about this import.\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\__init__.py:37\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msys\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_sys\u001b[39;00m\n\u001b[0;32m 35\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_typing\u001b[39;00m\n\u001b[1;32m---> 37\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m module_util \u001b[38;5;28;01mas\u001b[39;00m _module_util\n\u001b[0;32m 38\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlazy_loader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LazyLoader \u001b[38;5;28;01mas\u001b[39;00m _LazyLoader\n\u001b[0;32m 40\u001b[0m \u001b[38;5;66;03m# Make sure code inside the TensorFlow codebase can use tf2.enabled() at import.\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\__init__.py:37\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;66;03m# We aim to keep this file minimal and ideally remove completely.\u001b[39;00m\n\u001b[0;32m 30\u001b[0m \u001b[38;5;66;03m# If you are adding a new file with @tf_export decorators,\u001b[39;00m\n\u001b[0;32m 31\u001b[0m \u001b[38;5;66;03m# import it in modules_with_exports.py instead.\u001b[39;00m\n\u001b[0;32m 32\u001b[0m \n\u001b[0;32m 33\u001b[0m \u001b[38;5;66;03m# go/tf-wildcard-import\u001b[39;00m\n\u001b[0;32m 34\u001b[0m \u001b[38;5;66;03m# pylint: disable=wildcard-import,g-bad-import-order,g-import-not-at-top\u001b[39;00m\n\u001b[0;32m 36\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pywrap_tensorflow \u001b[38;5;28;01mas\u001b[39;00m _pywrap_tensorflow\n\u001b[1;32m---> 37\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meager\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context\n\u001b[0;32m 39\u001b[0m \u001b[38;5;66;03m# pylint: enable=wildcard-import\u001b[39;00m\n\u001b[0;32m 40\u001b[0m \n\u001b[0;32m 41\u001b[0m \u001b[38;5;66;03m# Bring in subpackages.\u001b[39;00m\n\u001b[0;32m 42\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m data\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\context.py:29\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msix\u001b[39;00m\n\u001b[1;32m---> 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m function_pb2\n\u001b[0;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprotobuf\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config_pb2\n\u001b[0;32m 31\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprotobuf\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m coordination_config_pb2\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\core\\framework\\function_pb2.py:16\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# @@protoc_insertion_point(imports)\u001b[39;00m\n\u001b[0;32m 13\u001b[0m _sym_db \u001b[38;5;241m=\u001b[39m _symbol_database\u001b[38;5;241m.\u001b[39mDefault()\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m attr_value_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_attr__value__pb2\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m node_def_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_node__def__pb2\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m op_def_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_op__def__pb2\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\core\\framework\\attr_value_pb2.py:16\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# @@protoc_insertion_point(imports)\u001b[39;00m\n\u001b[0;32m 13\u001b[0m _sym_db \u001b[38;5;241m=\u001b[39m _symbol_database\u001b[38;5;241m.\u001b[39mDefault()\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tensor_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_tensor__pb2\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tensor_shape_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m types_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_types__pb2\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\core\\framework\\tensor_pb2.py:16\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# @@protoc_insertion_point(imports)\u001b[39;00m\n\u001b[0;32m 13\u001b[0m _sym_db \u001b[38;5;241m=\u001b[39m _symbol_database\u001b[38;5;241m.\u001b[39mDefault()\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m resource_handle_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_resource__handle__pb2\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tensor_shape_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m types_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_types__pb2\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\core\\framework\\resource_handle_pb2.py:16\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# @@protoc_insertion_point(imports)\u001b[39;00m\n\u001b[0;32m 13\u001b[0m _sym_db \u001b[38;5;241m=\u001b[39m _symbol_database\u001b[38;5;241m.\u001b[39mDefault()\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tensor_shape_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mframework\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m types_pb2 \u001b[38;5;28;01mas\u001b[39;00m tensorflow_dot_core_dot_framework_dot_types__pb2\n\u001b[0;32m 20\u001b[0m DESCRIPTOR \u001b[38;5;241m=\u001b[39m _descriptor\u001b[38;5;241m.\u001b[39mFileDescriptor(\n\u001b[0;32m 21\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow/core/framework/resource_handle.proto\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 22\u001b[0m package\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 26\u001b[0m ,\n\u001b[0;32m 27\u001b[0m dependencies\u001b[38;5;241m=\u001b[39m[tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2\u001b[38;5;241m.\u001b[39mDESCRIPTOR,tensorflow_dot_core_dot_framework_dot_types__pb2\u001b[38;5;241m.\u001b[39mDESCRIPTOR,])\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\core\\framework\\tensor_shape_pb2.py:36\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 13\u001b[0m _sym_db \u001b[38;5;241m=\u001b[39m _symbol_database\u001b[38;5;241m.\u001b[39mDefault()\n\u001b[0;32m 18\u001b[0m DESCRIPTOR \u001b[38;5;241m=\u001b[39m _descriptor\u001b[38;5;241m.\u001b[39mFileDescriptor(\n\u001b[0;32m 19\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow/core/framework/tensor_shape.proto\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 20\u001b[0m package\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 23\u001b[0m serialized_pb\u001b[38;5;241m=\u001b[39m_b(\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m,tensorflow/core/framework/tensor_shape.proto\u001b[39m\u001b[38;5;130;01m\\x12\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mtensorflow\u001b[39m\u001b[38;5;130;01m\\\"\u001b[39;00m\u001b[38;5;124mz\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x10\u001b[39;00m\u001b[38;5;124mTensorShapeProto\u001b[39m\u001b[38;5;130;01m\\x12\u001b[39;00m\u001b[38;5;124m-\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x03\u001b[39;00m\u001b[38;5;130;01m\\x64\u001b[39;00m\u001b[38;5;124mim\u001b[39m\u001b[38;5;130;01m\\x18\u001b[39;00m\u001b[38;5;130;01m\\x02\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\x03\u001b[39;00m\u001b[38;5;124m(\u001b[39m\u001b[38;5;130;01m\\x0b\u001b[39;00m\u001b[38;5;130;01m\\x32\u001b[39;00m\u001b[38;5;124m .tensorflow.TensorShapeProto.Dim\u001b[39m\u001b[38;5;130;01m\\x12\u001b[39;00m\u001b[38;5;130;01m\\x14\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x0c\u001b[39;00m\u001b[38;5;124munknown_rank\u001b[39m\u001b[38;5;130;01m\\x18\u001b[39;00m\u001b[38;5;130;01m\\x03\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;124m(\u001b[39m\u001b[38;5;130;01m\\x08\u001b[39;00m\u001b[38;5;130;01m\\x1a\u001b[39;00m\u001b[38;5;124m!\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x03\u001b[39;00m\u001b[38;5;130;01m\\x44\u001b[39;00m\u001b[38;5;124mim\u001b[39m\u001b[38;5;130;01m\\x12\u001b[39;00m\u001b[38;5;130;01m\\x0c\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x04\u001b[39;00m\u001b[38;5;124msize\u001b[39m\u001b[38;5;130;01m\\x18\u001b[39;00m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;124m(\u001b[39m\u001b[38;5;130;01m\\x03\u001b[39;00m\u001b[38;5;130;01m\\x12\u001b[39;00m\u001b[38;5;130;01m\\x0c\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x04\u001b[39;00m\u001b[38;5;124mname\u001b[39m\u001b[38;5;130;01m\\x18\u001b[39;00m\u001b[38;5;130;01m\\x02\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;124m(\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124mB\u001b[39m\u001b[38;5;130;01m\\x87\u001b[39;00m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\x18\u001b[39;00m\u001b[38;5;124morg.tensorflow.frameworkB\u001b[39m\u001b[38;5;130;01m\\x11\u001b[39;00m\u001b[38;5;124mTensorShapeProtosP\u001b[39m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;124mZSgithub.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto\u001b[39m\u001b[38;5;130;01m\\xf8\u001b[39;00m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;130;01m\\x01\u001b[39;00m\u001b[38;5;130;01m\\x62\u001b[39;00m\u001b[38;5;130;01m\\x06\u001b[39;00m\u001b[38;5;124mproto3\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 24\u001b[0m )\n\u001b[0;32m 29\u001b[0m _TENSORSHAPEPROTO_DIM \u001b[38;5;241m=\u001b[39m _descriptor\u001b[38;5;241m.\u001b[39mDescriptor(\n\u001b[0;32m 30\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDim\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 31\u001b[0m full_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow.TensorShapeProto.Dim\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 32\u001b[0m filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 33\u001b[0m file\u001b[38;5;241m=\u001b[39mDESCRIPTOR,\n\u001b[0;32m 34\u001b[0m containing_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 35\u001b[0m fields\u001b[38;5;241m=\u001b[39m[\n\u001b[1;32m---> 36\u001b[0m \u001b[43m_descriptor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFieldDescriptor\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msize\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfull_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtensorflow.TensorShapeProto.Dim.size\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mnumber\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcpp_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_default_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menum_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontaining_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_extension\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextension_scope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[43mserialized_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mDESCRIPTOR\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 43\u001b[0m _descriptor\u001b[38;5;241m.\u001b[39mFieldDescriptor(\n\u001b[0;32m 44\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m, full_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow.TensorShapeProto.Dim.name\u001b[39m\u001b[38;5;124m'\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m 45\u001b[0m number\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, \u001b[38;5;28mtype\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m9\u001b[39m, cpp_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m9\u001b[39m, label\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m 46\u001b[0m has_default_value\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, default_value\u001b[38;5;241m=\u001b[39m_b(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m),\n\u001b[0;32m 47\u001b[0m message_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, enum_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, containing_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 48\u001b[0m is_extension\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, extension_scope\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 49\u001b[0m serialized_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, file\u001b[38;5;241m=\u001b[39mDESCRIPTOR),\n\u001b[0;32m 50\u001b[0m ],\n\u001b[0;32m 51\u001b[0m extensions\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m 52\u001b[0m ],\n\u001b[0;32m 53\u001b[0m nested_types\u001b[38;5;241m=\u001b[39m[],\n\u001b[0;32m 54\u001b[0m enum_types\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m 55\u001b[0m ],\n\u001b[0;32m 56\u001b[0m serialized_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 57\u001b[0m is_extendable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 58\u001b[0m syntax\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mproto3\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 59\u001b[0m extension_ranges\u001b[38;5;241m=\u001b[39m[],\n\u001b[0;32m 60\u001b[0m oneofs\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m 61\u001b[0m ],\n\u001b[0;32m 62\u001b[0m serialized_start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m149\u001b[39m,\n\u001b[0;32m 63\u001b[0m serialized_end\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m182\u001b[39m,\n\u001b[0;32m 64\u001b[0m )\n\u001b[0;32m 66\u001b[0m _TENSORSHAPEPROTO \u001b[38;5;241m=\u001b[39m _descriptor\u001b[38;5;241m.\u001b[39mDescriptor(\n\u001b[0;32m 67\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTensorShapeProto\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 68\u001b[0m full_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtensorflow.TensorShapeProto\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 100\u001b[0m serialized_end\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m182\u001b[39m,\n\u001b[0;32m 101\u001b[0m )\n\u001b[0;32m 103\u001b[0m _TENSORSHAPEPROTO_DIM\u001b[38;5;241m.\u001b[39mcontaining_type \u001b[38;5;241m=\u001b[39m _TENSORSHAPEPROTO\n", "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\google\\protobuf\\descriptor.py:561\u001b[0m, in \u001b[0;36mFieldDescriptor.__new__\u001b[1;34m(cls, name, full_name, index, number, type, cpp_type, label, default_value, message_type, enum_type, containing_type, is_extension, extension_scope, options, serialized_options, has_default_value, containing_oneof, json_name, file, create_key)\u001b[0m\n\u001b[0;32m 555\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__new__\u001b[39m(\u001b[38;5;28mcls\u001b[39m, name, full_name, index, number, \u001b[38;5;28mtype\u001b[39m, cpp_type, label,\n\u001b[0;32m 556\u001b[0m default_value, message_type, enum_type, containing_type,\n\u001b[0;32m 557\u001b[0m is_extension, extension_scope, options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 558\u001b[0m serialized_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 559\u001b[0m has_default_value\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, containing_oneof\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 560\u001b[0m file\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, create_key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m): \u001b[38;5;66;03m# pylint: disable=redefined-builtin\u001b[39;00m\n\u001b[1;32m--> 561\u001b[0m \u001b[43m_message\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_CheckCalledFromGeneratedFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 562\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_extension:\n\u001b[0;32m 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _message\u001b[38;5;241m.\u001b[39mdefault_pool\u001b[38;5;241m.\u001b[39mFindExtensionByName(full_name)\n", "\u001b[1;31mTypeError\u001b[0m: Descriptors cannot not be created directly.\nIf this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.\nIf you cannot immediately regenerate your protos, some other possible workarounds are:\n 1. Downgrade the protobuf package to 3.20.x or lower.\n 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).\n\nMore information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates" ] } ], "source": [ "from keras.models import Sequential\n", "from keras.layers import Dense\n", "\n", "# Define the model\n", "model = Sequential()\n", "model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))\n", "model.add(Dense(32, activation='relu'))\n", "model.add(Dense(1, activation='sigmoid'))\n", "\n", "# Compile the model\n", "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "\n", "# Train the model\n", "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "id": "25d191e6", "metadata": {}, "outputs": [], "source": [ "# Evaluate the model on the test set\n", "test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)\n", "print('\\nTest accuracy:', test_acc)" ] }, { "cell_type": "code", "execution_count": 34, "id": "c148d95e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9878246266418761\n" ] } ], "source": [ "# 导入需要的库\n", "import xgboost as xgb\n", "from sklearn.metrics import accuracy_score\n", "\n", "# 定义参数\n", "params = {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 300, 'subsample': 0.8, 'tree_method': 'gpu_hist'}\n", "\n", "# 创建分类模型\n", "xgb_model = xgb.XGBClassifier(objective='binary:logistic', **params)\n", "\n", "# 拟合训练集\n", "xgb_model.fit(X_train, y_train)\n", "# xgb.plot_importance(xgb_model)\n", "# plt.show()\n", "# 对测试集进行预测\n", "y_pred = xgb_model.predict(X_test)\n", "\n", "# 计算准确率\n", "accuracy = accuracy_score(y_test, y_pred)\n", "print('Accuracy:', accuracy)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ba285bea", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "noteable": { "last_delta_id": "8d0b2b1b-0e9b-42d7-9b9f-c8bf11f810ad", "last_transaction_id": "a88daa5e-8262-43cf-8d4a-370a7c86d740" }, "noteable-chatgpt": { "create_notebook": { "openai_conversation_id": "5e7e52b4-c2d1-51ec-be8f-09a9b24f4053", "openai_ephemeral_user_id": "4792a2b8-7980-5790-8196-464aafd8b369", "openai_subdivision1_iso_code": "US-OR" } }, "nteract": { "version": "noteable@2.9.0" }, "selected_hardware_size": "small" }, "nbformat": 4, "nbformat_minor": 5 }