from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Sample dataset and labels
X, y = ... # Your dataset and labels
# Define the number of folds
k = 5
# Initialize KFold cross-validation
kf = KFold(n_splits=k, shuffle=True, random_state=42)
# Initialize a classifier (e.g., Logistic Regression)
clf = LogisticRegression()
# Lists to store accuracy scores for each fold
accuracies = []
# Perform k-fold cross-validation
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Fit the classifier on the training data
clf.fit(X_train, y_train)
# Predict on the test data
y_pred = clf.predict(X_test)
# Calculate accuracy and store it
accuracy = accuracy_score(y_test, y_pred)
accuracies.append(accuracy)
# Calculate and print the mean accuracy
mean_accuracy = sum(accuracies) / k
print(f"Mean Accuracy: {mean_accuracy}")
Comments