import pandas as pd

df = pd.read_csv('07_2_diabetes.csv')
df.head()

from sklearn.model_selection import train_test_split
X = df.drop('Outcome', axis=1)
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

LogisticRegression(max_iter=1000)

from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.75

import matplotlib.pyplot as plt
import numpy as np

labels = ['No Diabetes', 'Diabetes']
actual_counts = np.bincount(y_test)
predicted_counts = np.bincount(y_pred)

x = np.arange(len(labels))
width = 0.35

fig, ax = plt.subplots()
ax.bar(x - width/2, actual_counts, width, label='Actual')
ax.bar(x + width/2, predicted_counts, width, label='Predicted')

ax.set_xlabel('Outcome')
ax.set_ylabel('Count')
ax.set_title('Actual vs Predicted Counts')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.tight_layout()
plt.show()

	Pregnancies	Glucose	BloodPressure	SkinThickness	Insulin	BMI	DiabetesPedigreeFunction	Age	Outcome
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0
2	8	183	64	0	0	23.3	0.672	32	1
3	1	89	66	23	94	28.1	0.167	21	0
4	0	137	40	35	168	43.1	2.288	33	1

Pima Indians Diabetes – Logistic Regression Practice¶

📊 Dataset Overview¶

📥 Data Download¶

📂 Load Data¶

🧪 Split train/test dataset¶

🧠 Learn Logistic Model¶

📈 Prediction & Evaluation¶

📊 Visualization (matplotlib)¶