haikal/clean_specs.py
2024-12-17 16:29:10 +03:00

31 lines
1.1 KiB
Python

import pandas as pd
# Specify the path to your CSV file
file_path = 'car_specification_value.csv'
# Read the file and fix inconsistencies
try:
with open(file_path, 'r') as file:
lines = file.readlines()
# Fix the problematic lines
fixed_lines = []
for line in lines:
# Remove single quotes and strip whitespace
columns = [col.strip().replace("'", "") for col in line.strip().split(',')]
if len(columns) >= 8: # Ensure at least 8 fields
fixed_line = ','.join(f'"{col}"' for col in columns[:8]) # Truncate to 8 fields and add double quotes
fixed_lines.append(fixed_line)
# Save the cleaned data to a new CSV file
cleaned_file_path = 'car_specification_value_cleaned.csv'
with open(cleaned_file_path, 'w') as file:
file.write('\n'.join(fixed_lines))
# Load the cleaned data into a DataFrame
cleaned_df = pd.read_csv(cleaned_file_path)
print("Cleaned data preview:")
print(cleaned_df.head())
print(f"Cleaned file saved as: {cleaned_file_path}")
except Exception as e:
print(f"An error occurred: {e}")