Create regression_autos

TheAlgorithms · Suja1212 · Feb 9, 2025 · Feb 9, 2025 · 7f8ce4ebfcd55f446b39df4acddb3e36e96cf532
commit 7f8ce4ebfcd55f446b39df4acddb3e36e96cf532
diff --git a/regression_autos b/regression_autos
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+"""Regression Autos
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1beRq-XbKLbs_4AOP_0nIX12uKUdTQW6U
+"""
+
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
+
+# Load the dataset
+file_path = "/content/_autos - regression - autos.csv"  # Replace with your actual file path
+data = pd.read_csv(file_path)
+
+# Preprocessing: Handle missing values, convert data types, etc.
+# Fill missing numerical values with the median
+data.fillna(data.median(numeric_only=True), inplace=True)
+
+# Encode categorical variables using one-hot encoding
+data = pd.get_dummies(data, drop_first=True)
+
+# Define the features (X) and target variable (y)
+# Assuming 'price' is the target variable
+X = data.drop(columns=['price'])
+y = data['price']
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Train a linear regression model
+model = LinearRegression()
+model.fit(X_train, y_train)
+
+# Predict on the test set
+y_pred = model.predict(X_test)
+
+# Evaluate the model
+mse = mean_squared_error(y_test, y_pred)
+r2 = r2_score(y_test, y_pred)
+
+# Display results
+print(f"Mean Squared Error: {mse}")
+print(f"R-squared: {r2}")
+
+# Optional: Display coefficients for interpretation
+coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
+print(coefficients)