# -*- coding: utf-8 -*-
"""
Created on Mon Nov 30 14:44:21 2020

@author: Márton
"""

import numpy as np;  # importing numerical library
from sklearn.datasets import load_digits; # importing digits dataset
from sklearn.feature_selection import SelectKBest;
from sklearn.linear_model import LogisticRegression; # Class for logistic regression
from sklearn.preprocessing import Binarizer; # importing binarizer class

# loading dataset and computing dimensions
digits = load_digits();
n = digits.data.shape[0]; # number of records
p = digits.data.shape[1]; # number of attributes

feature_selection = SelectKBest(k=8);
feature_selection.fit(digits.data,digits.target);
mask = feature_selection.get_support();
new_data = feature_selection.transform(digits.data);

# Fitting logistic regression for whole dataset
logreg = LogisticRegression(solver='liblinear'); 
logreg.fit(digits.data,digits.target);
score = logreg.score(digits.data,digits.target);

# Fitting logistic regression for selected features
logreg_partial = LogisticRegression(solver='liblinear'); 
logreg_partial.fit(new_data,digits.target);
score_partial = logreg_partial.score(new_data,digits.target);

binarizer = Binarizer(threshold=8);
binarizer.fit(digits.data);
binarized_data = binarizer.transform(digits.data);

# Fitting logistic regression for selected features
logreg_bin = LogisticRegression(solver='liblinear'); 
logreg_bin.fit(binarized_data,digits.target);
score_bin = logreg_bin.score(binarized_data,digits.target);