import pandas as pd
import numpy as np
filepath = "C:\\Users\\abdelilah\\Desktop\\Python Projects\\Cars\\"
car_raw = pd.read_csv(filepath + "Cars Data1.csv", delimiter = ",")
car_raw.head(10)
Make | Model | Type | Origin | DriveTrain | MSRP | Invoice | EngineSize | Cylinders | Horsepower | MPG_City | MPG_Highway | Weight | Wheelbase | Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Acura | MDX | SUV | Asia | All | $36,945 | $33,337 | 3.5 | 6.0 | 265.0 | 17.0 | 23.0 | 4451.0 | 106.0 | 189.0 |
1 | Acura | RSX Type S 2dr | Sedan | Asia | Front | $23,820 | $21,761 | 2.0 | 4.0 | 200.0 | 24.0 | 31.0 | 2778.0 | 101.0 | 172.0 |
2 | Acura | TSX 4dr | Sedan | Asia | Front | $26,990 | $24,647 | 2.4 | 4.0 | 200.0 | 22.0 | 29.0 | 3230.0 | 105.0 | 183.0 |
3 | Acura | TL 4dr | Sedan | Asia | Front | $33,195 | $30,299 | 3.2 | 6.0 | 270.0 | 20.0 | 28.0 | 3575.0 | 108.0 | 186.0 |
4 | Acura | 3.5 RL 4dr | Sedan | Asia | Front | $43,755 | $39,014 | 3.5 | 6.0 | 225.0 | 18.0 | 24.0 | 3880.0 | 115.0 | 197.0 |
5 | Acura | 3.5 RL w/Navigation 4dr | Sedan | Asia | Front | $46,100 | $41,100 | 3.5 | 6.0 | 225.0 | 18.0 | 24.0 | 3893.0 | 115.0 | 197.0 |
6 | Acura | NSX coupe 2dr manual S | Sports | Asia | Rear | $89,765 | $79,978 | 3.2 | 6.0 | 290.0 | 17.0 | 24.0 | 3153.0 | 100.0 | 174.0 |
7 | Audi | A4 1.8T 4dr | Sedan | Europe | Front | $25,940 | $23,508 | 1.8 | 4.0 | 170.0 | 22.0 | 31.0 | 3252.0 | 104.0 | 179.0 |
8 | Audi | A41.8T convertible 2dr | Sedan | Europe | Front | $35,940 | $32,506 | 1.8 | 4.0 | 170.0 | 23.0 | 30.0 | 3638.0 | 105.0 | 180.0 |
9 | Audi | A4 3.0 4dr | Sedan | Europe | Front | $31,840 | $28,846 | 3.0 | 6.0 | 220.0 | 20.0 | 28.0 | 3462.0 | 104.0 | 179.0 |
car_raw.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 432 entries, 0 to 431 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Make 428 non-null object 1 Model 428 non-null object 2 Type 428 non-null object 3 Origin 428 non-null object 4 DriveTrain 428 non-null object 5 MSRP 428 non-null object 6 Invoice 428 non-null object 7 EngineSize 428 non-null float64 8 Cylinders 426 non-null float64 9 Horsepower 428 non-null float64 10 MPG_City 428 non-null float64 11 MPG_Highway 428 non-null float64 12 Weight 428 non-null float64 13 Wheelbase 428 non-null float64 14 Length 428 non-null float64 dtypes: float64(8), object(7) memory usage: 50.8+ KB
First Task: Find all Null Values in the dataset. If there is any null value in any column, then fill it with the mean of that column.
for col in car_raw.columns:
print(col,"has |",car_raw[col].isnull().sum(), "|null values")
Make has | 4 |null values Model has | 4 |null values Type has | 4 |null values Origin has | 4 |null values DriveTrain has | 4 |null values MSRP has | 4 |null values Invoice has | 4 |null values EngineSize has | 4 |null values Cylinders has | 6 |null values Horsepower has | 4 |null values MPG_City has | 4 |null values MPG_Highway has | 4 |null values Weight has | 4 |null values Wheelbase has | 4 |null values Length has | 4 |null values
null_values = []
null_idx = []
for col in car_raw.columns:
null_idx.append(car_raw.index[car_raw[col].isnull() == True].tolist())
null_idx
[[30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173, 251, 252], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173], [30, 39, 161, 173]]
car_raw[car_raw["Cylinders"].isnull() == True]
Make | Model | Type | Origin | DriveTrain | MSRP | Invoice | EngineSize | Cylinders | Horsepower | MPG_City | MPG_Highway | Weight | Wheelbase | Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
39 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
161 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
173 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
251 | Mazda | RX-8 4dr automatic | Sports | Asia | Rear | $25,700 | $23,794 | 1.3 | NaN | 197.0 | 18.0 | 25.0 | 3053.0 | 106.0 | 174.0 |
252 | Mazda | RX-8 4dr manual | Sports | Asia | Rear | $27,200 | $25,179 | 1.3 | NaN | 238.0 | 18.0 | 24.0 | 3029.0 | 106.0 | 174.0 |
for col in car_raw.columns:
car_raw[col] = car_raw[col].fillna(method = "ffill")
Second Task: Check what are the different types of Make are there in our dataset. And, what is the count (occurrence) of each Make in the data ?
car_raw["Make"].value_counts()
Toyota 28 Chevrolet 27 Mercedes-Benz 26 Ford 23 BMW 22 Audi 19 Honda 18 Nissan 17 Chrysler 15 Volkswagen 15 Dodge 13 Mitsubishi 13 Hyundai 13 Jaguar 12 Volvo 12 Lexus 11 Subaru 11 Mazda 11 Kia 11 Pontiac 11 Buick 9 Lincoln 9 Mercury 9 Infiniti 8 GMC 8 Suzuki 8 Cadillac 8 Saturn 8 Acura 7 Saab 7 Porsche 7 Oldsmobile 3 Jeep 3 Land Rover 3 Isuzu 2 MINI 2 Scion 2 Hummer 1 Name: Make, dtype: int64
Third Task: Show all the records where Origin is Asia or Europe.
eurasia = car_raw[(car_raw["Origin"] == "Asia") | (car_raw["Origin"] == "Europe")]
eurasia
Make | Model | Type | Origin | DriveTrain | MSRP | Invoice | EngineSize | Cylinders | Horsepower | MPG_City | MPG_Highway | Weight | Wheelbase | Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Acura | MDX | SUV | Asia | All | $36,945 | $33,337 | 3.5 | 6.0 | 265.0 | 17.0 | 23.0 | 4451.0 | 106.0 | 189.0 |
1 | Acura | RSX Type S 2dr | Sedan | Asia | Front | $23,820 | $21,761 | 2.0 | 4.0 | 200.0 | 24.0 | 31.0 | 2778.0 | 101.0 | 172.0 |
2 | Acura | TSX 4dr | Sedan | Asia | Front | $26,990 | $24,647 | 2.4 | 4.0 | 200.0 | 22.0 | 29.0 | 3230.0 | 105.0 | 183.0 |
3 | Acura | TL 4dr | Sedan | Asia | Front | $33,195 | $30,299 | 3.2 | 6.0 | 270.0 | 20.0 | 28.0 | 3575.0 | 108.0 | 186.0 |
4 | Acura | 3.5 RL 4dr | Sedan | Asia | Front | $43,755 | $39,014 | 3.5 | 6.0 | 225.0 | 18.0 | 24.0 | 3880.0 | 115.0 | 197.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
427 | Volvo | C70 LPT convertible 2dr | Sedan | Europe | Front | $40,565 | $38,203 | 2.4 | 5.0 | 197.0 | 21.0 | 28.0 | 3450.0 | 105.0 | 186.0 |
428 | Volvo | C70 HPT convertible 2dr | Sedan | Europe | Front | $42,565 | $40,083 | 2.3 | 5.0 | 242.0 | 20.0 | 26.0 | 3450.0 | 105.0 | 186.0 |
429 | Volvo | S80 T6 4dr | Sedan | Europe | Front | $45,210 | $42,573 | 2.9 | 6.0 | 268.0 | 19.0 | 26.0 | 3653.0 | 110.0 | 190.0 |
430 | Volvo | V40 | Wagon | Europe | Front | $26,135 | $24,641 | 1.9 | 4.0 | 170.0 | 22.0 | 29.0 | 2822.0 | 101.0 | 180.0 |
431 | Volvo | XC70 | Wagon | Europe | All | $35,145 | $33,112 | 2.5 | 5.0 | 208.0 | 20.0 | 27.0 | 3823.0 | 109.0 | 186.0 |
285 rows × 15 columns
Fourth Task: Remove all the records (rows) where Weight is above 4000.
car_df = car_raw.copy()
car_df = car_df.loc[car_df["Weight"] > 4000]
car_df.equals(car_raw)
False
Fifth Task: Increase all the values of 'MPG_City' column by 3.
car_raw["MPG_City"] = car_raw["MPG_City"] + 3
car_raw
Make | Model | Type | Origin | DriveTrain | MSRP | Invoice | EngineSize | Cylinders | Horsepower | MPG_City | MPG_Highway | Weight | Wheelbase | Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Acura | MDX | SUV | Asia | All | $36,945 | $33,337 | 3.5 | 6.0 | 265.0 | 20.0 | 23.0 | 4451.0 | 106.0 | 189.0 |
1 | Acura | RSX Type S 2dr | Sedan | Asia | Front | $23,820 | $21,761 | 2.0 | 4.0 | 200.0 | 27.0 | 31.0 | 2778.0 | 101.0 | 172.0 |
2 | Acura | TSX 4dr | Sedan | Asia | Front | $26,990 | $24,647 | 2.4 | 4.0 | 200.0 | 25.0 | 29.0 | 3230.0 | 105.0 | 183.0 |
3 | Acura | TL 4dr | Sedan | Asia | Front | $33,195 | $30,299 | 3.2 | 6.0 | 270.0 | 23.0 | 28.0 | 3575.0 | 108.0 | 186.0 |
4 | Acura | 3.5 RL 4dr | Sedan | Asia | Front | $43,755 | $39,014 | 3.5 | 6.0 | 225.0 | 21.0 | 24.0 | 3880.0 | 115.0 | 197.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
427 | Volvo | C70 LPT convertible 2dr | Sedan | Europe | Front | $40,565 | $38,203 | 2.4 | 5.0 | 197.0 | 24.0 | 28.0 | 3450.0 | 105.0 | 186.0 |
428 | Volvo | C70 HPT convertible 2dr | Sedan | Europe | Front | $42,565 | $40,083 | 2.3 | 5.0 | 242.0 | 23.0 | 26.0 | 3450.0 | 105.0 | 186.0 |
429 | Volvo | S80 T6 4dr | Sedan | Europe | Front | $45,210 | $42,573 | 2.9 | 6.0 | 268.0 | 22.0 | 26.0 | 3653.0 | 110.0 | 190.0 |
430 | Volvo | V40 | Wagon | Europe | Front | $26,135 | $24,641 | 1.9 | 4.0 | 170.0 | 25.0 | 29.0 | 2822.0 | 101.0 | 180.0 |
431 | Volvo | XC70 | Wagon | Europe | All | $35,145 | $33,112 | 2.5 | 5.0 | 208.0 | 23.0 | 27.0 | 3823.0 | 109.0 | 186.0 |
432 rows × 15 columns