# -*- coding: utf-8 -*-
"""
Created on Sun May 10 23:37:40 2020

Fitting classifiers for Digits dataset
Classifiers: logistic regression, naive Bayes, nearest neighbor, neural network

@author: Márton Ispány
"""

from sklearn import datasets as ds; # importing scikit-learn datasets
from sklearn import model_selection as ms; # importing model selection tools
from sklearn import linear_model as lm; #  importing linear models
from sklearn import naive_bayes as nb; #  importing naive Bayes classifiers
from sklearn import neighbors;    # importing nearest neighbor methods
from sklearn import neural_network as nn; # importing neural network models

# loading dataset
digits = ds.load_digits();
n = digits.data.shape[0];  # number of records
p = digits.data.shape[1];  # number of attributes

# Partitioning into training and test sets
X_train, X_test, y_train, y_test = ms.train_test_split(digits.data,digits.target, 
            test_size=0.3, shuffle = True, random_state=2020);

# Fitting logistic regression
logreg_classifier = lm.LogisticRegression();
logreg_classifier.fit(X_train,y_train);
score_logreg = logreg_classifier.score(X_test,y_test);  #  goodness of fit
ypred_logreg = logreg_classifier.predict(X_test);   # spam prediction
yprobab_logreg = logreg_classifier.predict_proba(X_test);  #  prediction probabilities

# Fitting naive Bayes classifier
naive_bayes_classifier = nb.GaussianNB();
naive_bayes_classifier.fit(X_train,y_train);
score_naive_bayes = naive_bayes_classifier.score(X_test,y_test);  #  goodness of fit
ypred_naive_bayes = naive_bayes_classifier.predict(X_test);  # spam prediction
yprobab_naive_bayes = naive_bayes_classifier.predict_proba(X_test);  #  prediction probabilities

# Fitting nearest neighbor classifier
K = 5;  # number of neighbors
knn_classifier = neighbors.KNeighborsClassifier(n_neighbors=K);
knn_classifier.fit(X_train,y_train);
score_knn = knn_classifier.score(X_test,y_test);  #  goodness of fit
ypred_knn = knn_classifier.predict(X_test);   # spam prediction
yprobab_knn = knn_classifier.predict_proba(X_test);  #  prediction probabilities

# Fitting neural network classifier
neural_classifier = nn.MLPClassifier(hidden_layer_sizes=(16));  #  number of hidden neurons: 5
neural_classifier.fit(X_train,y_train);
score_neural = neural_classifier.score(X_test,y_test);  #  goodness of fit
ypred_neural = neural_classifier.predict(X_test);   # spam prediction
yprobab_neural = neural_classifier.predict_proba(X_test);  #  prediction probabilities

#  The best model based on test score is Nearest neighbor with 98.8%
