# -*- coding: utf-8 -*-
"""
Spyder Editor

Reproduction of some figures from the book 
C.M. Bishop: Pattern Recognition and Machine Learning

Linear basis functions 
Author: Márton Ispány
Example: suggested parameter values 
"""
import numpy as np;
import matplotlib.pyplot as plt;

# Global parameters
N =10; # number of training points
sig = 0.2; # standard error 
M = 9; # degree of fitted polynomial
R = 1000; # number of replication in simulation
res = 1000;  # resolution of functions, curves etc. in figures
NT = 100; # number of test points
B = 10; # number of basis functions

# Figure 1: Training dataset (without error) and sinus function
# Example: N=10 res=1000
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
plt.figure(1);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green");  # sinus curve
plt.scatter(x,f,color="blue");  # training points
plt.show();

# Figure 2: Training dataset (with error) and sinus function
# Example: N=10 sig=0.1 res=1000
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
plt.figure(2);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.scatter(x,ferr,color="blue");  # training points
plt.show();

# Figure 3: Training dataset, sin function and fitted polynomial
# Example: N=10 sig=0.1 res=1000
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
coeff = np.polynomial.polynomial.polyfit(x,ferr,M); # fitting polynomial by bould-in method
plt.figure(3);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.plot(np.linspace(0,2*np.pi,res),
         np.polynomial.polynomial.polyval(np.linspace(0,2*np.pi,res),coeff),color="red"); # fitted polynomial
plt.scatter(x,ferr,color="blue");  # training points
plt.show();

# Figure 4: Test dataset, sin function and fitted polynomial
# Example: N=10 NT=100 sig=0.1 res=1000
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
xt = np.random.uniform(0,2*np.pi,NT);  # test input points
ferrt = np.sin(xt) + np.random.normal(0,sig,NT);  # test target values with errors
coeff = np.polynomial.polynomial.polyfit(x,ferr,M);  # fitting polynomial by bould-in method
plt.figure(4);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.plot(np.linspace(0,2*np.pi,res),
         np.polynomial.polynomial.polyval(np.linspace(0,2*np.pi,res),coeff),color="red"); # fitted polynomial
plt.scatter(x,ferr,color="blue");  # training points
plt.scatter(xt,ferrt,color="magenta");  # test points
plt.show(); 

# Coefficients of polynomials for different degrees
# Example: N=10
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
C = np.zeros((M+1,M+1));  # coefficient matrix
for ind in range(M+1):
    C[ind] = np.append(np.polynomial.polynomial.polyfit(x,ferr,ind),np.zeros((M-ind)));
#endfor
C = np.transpose(C);

# Coefficients in list instead of array
#C = list();
#for ind in range(M+1):
#    C.append(np.polynomial.polynomial.polyfit(x,ferr,ind).tolist());


# Figure 5: Training and test RMS for different degrees
# Example: N=10 NT=100 M=9 sig=0.1
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
rms = np.zeros((M+1));  # root mean square error (RMS)
xt = np.random.uniform(0,2*np.pi,NT);  # test input points
ferrt = np.sin(xt) + np.random.normal(0,sig,NT);  # test target values with errors
rmst = np.zeros((M+1));  # root mean square error (RMS) for test data
for ind in range(M+1):
    coeff = np.polynomial.polynomial.polyfit(x,ferr,ind);
    rms[ind] = np.sqrt(np.sum((ferr - np.polynomial.polynomial.polyval(x,coeff))**2)/N); # RMS for training data
    rmst[ind] = np.sqrt(np.sum((ferrt - np.polynomial.polynomial.polyval(xt,coeff))**2)/NT); # RMS for test data
#endfor
plt.figure(5);
plt.plot(rms,color="blue"); # RMS curve for training points
plt.plot(rmst,color="red"); # RMS curve for test points
plt.show();


# Figure 6: Distribution of parameters of the fitted polynomial
# Example: N=10 M=9 R=1000 sig=0.1
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
Chist = np.zeros((R,M+1));  # History of replications
for ind in range(R):
    ferr = f + np.random.normal(0,sig,N); # target values with errors
    Chist[ind,:] = np.polynomial.polynomial.polyfit(x,ferr,M); # parameters of fitted polynomial in a replication
#endfor
parind = 5;   # parameter index
mu_hist = np.mean(Chist[:,parind]); # mean of the estimated parameter with index parind
sig_hist = np.std(Chist[:,parind]);  # standard deviation of the estimated parameter with index parind
plt.figure(6);   # histogram of the estimated parameter with normal curve
count, bins, ignored = plt.hist(Chist[:,parind],30,normed=True);
plt.plot(bins,1/(sig_hist*np.sqrt(2*np.pi))*np.exp(-(bins-mu_hist)**2/(2*sig_hist**2)),linewidth=2,color='red');   
plt.show();

    
# Figure 7: Polynomial basis function and sin function 
# Example: N=10 B=10 res=1000 sig=0.1    
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
Phi = np.zeros((N,B));   # design matrix
for ind in range(B):
    Phi[:,ind] = x**ind;    # computing a column of design matrix by evaluating basis function in input points
#endfor
A = np.dot(np.transpose(Phi),Phi);  # matrix of the normal equation
b = np.dot(np.transpose(Phi),ferr);  # vector of the normal equation
w = np.linalg.solve(A,b);   # solving the normal equation
predf = np.zeros((res));   # predicted values
for ind in range(B):
    predf += w[ind]*(np.linspace(0,2*np.pi,res)**ind);
#endfor    
plt.figure(7);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.plot(np.linspace(0,2*np.pi,res),predf,color="red");  # predicted curve
plt.scatter(x,ferr,color="blue");  # training points
plt.show();    

# Figure 8: Training and test RMS for different numbers of basis function
# Example: N=10 NT=100 B=10 sig=0.1
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
rms = np.zeros((B));  # root mean square error (RMS)
xt = np.random.uniform(0,2*np.pi,NT);  # test input points
ferrt = np.sin(xt) + np.random.normal(0,sig,NT);  # test target values with errors
rmst = np.zeros((B));  # root mean square error (RMS) for test data
Phi = np.zeros((N,B));   # design matrix
for ind in range(B):
    Phi[:,ind] = x**ind;    # computing a column of design matrix by evaluating basis function in input points
#endfor
for ind in range(B):
    Phi0 = Phi[:,0:ind];  # the first ind column of the design matrix
    A = np.dot(np.transpose(Phi0),Phi0);  # matrix of the normal equation
    b = np.dot(np.transpose(Phi0),ferr);  # vector of the normal equation
    w = np.linalg.solve(A,b);   # solving the normal equation
    predf = np.zeros((N));   # predicted values for training points
    for iind in range(ind):
        predf += w[iind]*(x**iind);
    #endfor     
    rms[ind] = np.sqrt(np.sum((ferr - predf)**2)/N); # RMS for training data
    predf = np.zeros((NT));   # predicted values for test points
    for iind in range(ind):
        predf += w[iind]*(xt**iind);
    #endfor      
    rmst[ind] = np.sqrt(np.sum((ferrt - predf)**2)/NT); # RMS for test data
#endfor
plt.figure(8);
plt.plot(rms,color="blue");  # RMS curve for training points
plt.plot(rmst,color="red");  # RMS curve for test points
plt.show();

# Figure 9: Regularization of polynomial basis function and sin function 
# Example: N=10 B=10 res=1000
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
Phi = np.zeros((N,B));   # design matrix
lamb = np.exp(0);   # regularization parameter
for ind in range(B):
    Phi[:,ind] = x**ind;    # computing a column of design matrix by evaluating basis function in input points
#endfor
A = np.dot(np.transpose(Phi),Phi) + lamb*np.eye((B));  # matrix of the normal equation
b = np.dot(np.transpose(Phi),ferr);  # vector of the normal equation
w = np.linalg.solve(A,b);   # solving the normal equation
predf = np.zeros((res));   # predicted values
for ind in range(B):
    predf += w[ind]*(np.linspace(0,2*np.pi,res)**ind);
#endfor    
plt.figure(9);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.plot(np.linspace(0,2*np.pi,res),predf,color="red");  # predicted curve
plt.scatter(x,ferr,color="blue");  # training points
plt.show();   

# Global parameter
nrp = 1000;  # number of regularization parameter

# Figure 10: Plot of RMS versus regularization parameter for training and test datasets
# Example: N=100 NT=100 B=10 sig=0.1 nrp=1000 
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
ferr = f + np.random.normal(0,sig,N);   # training target values with errors
xt = np.random.uniform(0,2*np.pi,NT);  # test input points
ferrt = np.sin(xt) + np.random.normal(0,sig,NT);  # test target values with errors
rms = np.zeros((nrp));  # root mean square error (RMS) for training data
rmst = np.zeros((nrp)); # root mean square error (RMS) for test data
Phi = np.zeros((N,B));   # design matrix
for ind in range(B):
    Phi[:,ind] = x**ind;    # computing a column of design matrix by evaluating basis function in input points
#endfor 
A0 = np.dot(np.transpose(Phi),Phi);
b = np.dot(np.transpose(Phi),ferr);  # vector of the normal equation
left = -20;
right = 0;
loglamb = np.zeros((nrp));
for rind in range(nrp):
    loglamb[rind] = (rind*(right - left))/nrp + left;
    lamb = np.exp(loglamb[rind]);   # regularization parameter
    A = A0 + lamb*np.eye((B));  # matrix of the normal equation
    w = np.linalg.solve(A,b);   # solving the normal equation
    predf = np.zeros((N));   # predicted values for traning data
    for ind in range(B):
        predf += w[ind]*(x**ind);
    #endfor    
    rms[rind] = np.sqrt(np.sum((ferr - predf)**2)/N);  # RMS for training data
    predf = np.zeros((NT));   # predicted values for test data
    for ind in range(B):
        predf += w[ind]*(xt**ind);
    #endfor    
    rmst[rind] = np.sqrt(np.sum((ferrt - predf)**2)/NT);  # RMS for test data
#endfor
plt.figure(10);
plt.plot(loglamb,rms,color="blue");  # RMS curve for training points
plt.plot(loglamb,rmst,color="red");  # RMS curve for test points
plt.show();

# Bias variance decomposition for Gaussian basis function
# Example: N=100 B=50 R= sig=0.1 res=1000
# Figure 11: Regularization of polynomial basis function and sin function 
# Figure 12: mean of the prediction functions and sin function
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
plt.figure(11);
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
lamb = np.exp(-0.2);   # regularization parameter
bw =0.1; # bandwith parameter
Phi = np.zeros((N,B));   # design matrix
#Phi[:,0] = np.ones((N));
Rsum = np.zeros((res));
for rep in range(R):
    ferr = f + np.random.normal(0,sig,N);   # training target values with errors
    for ind in range(B):    
        Phi[:,ind] = (1/(bw*np.sqrt(2*np.pi)))*np.exp(-(x-((ind*2*np.pi)/(B-1)))**2/(2*bw**2));    
    # computing a column of design matrix by evaluating Gaussian 
    #endfor
    A = np.dot(np.transpose(Phi),Phi) + lamb*np.eye((B));  # matrix of the normal equation
    b = np.dot(np.transpose(Phi),ferr);  # vector of the normal equation
    w = np.linalg.solve(A,b);   # solving the normal equation
    predf = np.zeros((res));   # predicted values
    for ind in range(B):
        predf += w[ind]*(1/(bw*np.sqrt(2*np.pi)))*np.exp(-(np.linspace(0,2*np.pi,res)-(ind*2*np.pi)/B)**2/(2*bw**2));
    #endfor
    Rsum += predf;    
    plt.figure(11);
    plt.plot(np.linspace(0,2*np.pi,res),predf,color="red");  # predicted curve
#endfor    
plt.show();
Rsum = Rsum/R;
plt.figure(12); 
plt.plot(np.linspace(0,2*np.pi,res),np.sin(np.linspace(0,2*np.pi,res)),color="green"); # sinus curve
plt.plot(np.linspace(0,2*np.pi,res),Rsum,color="red");  # predicted curve
plt.show();


# Figure 13: Bias-Variance versus regularization parameter plot
# Example: N=100 NT=1000 B=25 R=20 sig=0.2 nrp=1000 
x = np.linspace(0,2*np.pi,N);  # input points 
f = np.sin(x);    # target values
xt = np.random.uniform(0,2*np.pi,NT);  # test input points
ferrt = np.sin(xt) + np.random.normal(0,sig,NT);  # test target values with errors
mst = np.zeros((nrp)); # mean square error (MSE) for test data
bw =0.1; # bandwith parameter
Phi = np.zeros((N,B));   # design matrix
for ind in range(B):
    Phi[:,ind] = (1/(bw*np.sqrt(2*np.pi)))*np.exp(-(x-((ind*2*np.pi)/(B-1)))**2/(2*bw**2));    
    # computing a column of design matrix by evaluating Gaussian 
#endfor 
left = -3;  # left endpoint of loglambda
right = 3;  # right endpoint of loglambda
loglamb = np.zeros((nrp));  # log of regularization parameter
first = np.zeros((N,nrp));   # first moment of prediction curve
second = np.zeros((N,nrp));  # second moment of prediction curve
for rind in range(nrp):
    loglamb[rind] = (rind*(right - left))/nrp + left;
#endfor    
A0 = np.dot(np.transpose(Phi),Phi);
for ind in range(R):
    ferr = f + np.random.normal(0,sig,N);   # training target values with errors
    b = np.dot(np.transpose(Phi),ferr);  # vector of the normal equation
    for rind in range(nrp):
        lamb = np.exp(loglamb[rind]);   # regularization parameter
        A = A0 + lamb*np.eye((B));  # matrix of the normal equation
        w = np.linalg.solve(A,b);   # solving the normal equation
        predf = np.zeros((N));   # predicted values for traning data
        for iind in range(B):
            predf += w[iind]*(1/(bw*np.sqrt(2*np.pi)))*np.exp(-(x-((iind*2*np.pi)/(B-1)))**2/(2*bw**2));
        #endfor  
        first[:,rind] += predf;
        second[:,rind] += predf**2;
        if ind == 0:
            predf = np.zeros((NT));   # predicted values for test data
            for iind in range(B):
                predf += w[iind]*(1/(bw*np.sqrt(2*np.pi)))*np.exp(-(xt-((iind*2*np.pi)/(B-1)))**2/(2*bw**2));
            #endfor    
            mst[rind] = np.sum((ferrt - predf)**2)/NT;  # MSE for test data
        #endif    
    #endfor        
#endfor
first = first/R;
second = second/R;
bias2 = np.zeros((nrp)); # square bias of prediction
var = np.zeros((nrp));  # variance of prediction
mean2 = np.zeros((nrp));
for rind in range(nrp):
    bias2[rind] = np.sum((first[:,rind] - f)**2)/N;
    var[rind] = np.sum(second[:,rind])/N;
    mean2[rind] = np.sum(first[:,rind]**2)/N;
#endfor    
var = var - mean2;
plt.figure(13);
plt.plot(loglamb,bias2,color="blue");  # square bias curve
plt.plot(loglamb,var,color="red");  # variance curve
plt.plot(loglamb,bias2+var,color="green");  # square bias + variance curve
plt.plot(loglamb,mst,color="black");  # MSE curve for test data
plt.show();

