# -*- coding: utf-8 -*-
"""
Created on Mon Apr  1 11:05:53 2019

@author: Márton
"""

import numpy as np; 
import matplotlib.pyplot as plt;
from sklearn import datasets as ds;
from sklearn import model_selection as ms;
from sklearn import cluster, metrics;

# load dataset and partition in training and testing sets
digits = ds.load_digits();
n = digits.data.shape[0];
p = digits.data.shape[1];

# Particionálás tanító és teszt adatállományra
t_s = 0.3;
X_train, X_test, y_train, y_test = ms.train_test_split(digits.data, 
             digits.target, test_size=t_s, random_state=2019);
                                                       
# Kmeans clustering
n_c = 4;
kmeans = cluster.KMeans(n_clusters=n_c, random_state=2019);
kmeans.fit(X_train);
digits_labels_train = kmeans.labels_;
digits_centers = kmeans.cluster_centers_;
sse = kmeans.inertia_;  
digits_labels_test = kmeans.predict(X_test);  

# Goodness of fit
cm_train = metrics.cluster.contingency_matrix(y_train,digits_labels_train);    
cm_test = metrics.cluster.contingency_matrix(y_test,digits_labels_test);                                        