Last active
May 25, 2025 21:54
-
-
Save plushycat/bb539e3497dfe60d31a77d53bf6c98cc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.preprocessing import PolynomialFeatures, StandardScaler | |
from sklearn.pipeline import make_pipeline | |
from sklearn.metrics import mean_squared_error, r2_score | |
def evaluate_and_plot(X_test, y_test, y_pred, xlabel, ylabel, title, is_poly=False): | |
plt.scatter(X_test, y_test, color="blue", label="Actual", alpha=0.7) | |
if is_poly: | |
plt.scatter(X_test, y_pred, color="red", label="Predicted", alpha=0.7) | |
else: | |
plt.plot(X_test, y_pred, color="red", label="Predicted") | |
plt.xlabel(xlabel) | |
plt.ylabel(ylabel) | |
plt.title(title) | |
plt.legend() | |
plt.tight_layout() | |
plt.show() | |
print(title) | |
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}") | |
print(f"R² Score: {r2_score(y_test, y_pred):.2f}\n") | |
def linear_regression_boston(): | |
url1="https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv" | |
df = pd.read_csv(url1) | |
X = df[["rm"]] | |
y = df["medv"] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
model = LinearRegression().fit(X_train, y_train) | |
evaluate_and_plot(X_test, y_test, model.predict(X_test), | |
"Average number of rooms per dwelling (RM)", | |
"Median value of homes ($1000s)", | |
"Linear Regression - Boston Housing") | |
def polynomial_regression_auto_mpg(): | |
url2 = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv" | |
df = pd.read_csv(url2).dropna() | |
X = df[["displacement"]] | |
y = df["mpg"] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
model = make_pipeline(PolynomialFeatures(2), StandardScaler(), LinearRegression()).fit(X_train, y_train) | |
evaluate_and_plot(X_test, y_test, model.predict(X_test), | |
"Displacement", "Miles per gallon (MPG)", | |
"Polynomial Regression - Auto MPG", is_poly=True) | |
linear_regression_boston() | |
polynomial_regression_auto_mpg() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment