from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()


from bokeh.layouts import column,row
from bokeh.models import CustomJS, ColumnDataSource, Slider

# Set up data
import numpy as np
N = 200
u = np.linspace(-4, 4, N)
p = np.abs(u)+0.5*np.power(u,2)

source = ColumnDataSource(data=dict(u=u, p=p))
opt = ColumnDataSource(data=dict(u=[0.0], p=[0.0]))

# Set up plot
plot = figure(height=400, width=400, title="stability of minimizers",
              x_range=[-4, 4], y_range=[-0.5, 12.5])

plot.line('u', 'p', source=source, line_width=3, line_alpha=0.6,legend_label="s_g")
plot.circle('u', 'p', source=opt, size = 5, color="black",legend_label=r"minimizer = prox_{\gamma g}(x)")
plot.legend.location = "top_left"



slider_gamma = Slider(start=0.01, end=4, value=1, step=.01, title="stepsize $$\gamma$$")
slider_x = Slider(start=-4.0, end=4.0, value=0.0, step=.1, title="input point $$x$$")

update_curve = CustomJS(args=dict(source=source, gamma=slider_gamma, xslider=slider_x ), code="""
    const g = gamma.value
    const x = xslider.value
    const u = source.data.u
    const p = Array.from(u, (u) => Math.abs(u) + 0.5*Math.pow(u-x, 2)/g)
    source.data = { u, p }
""")
slider_gamma.js_on_change('value', update_curve)
slider_x.js_on_change('value', update_curve)


update_opt = CustomJS(args=dict(opt=opt, gamma=slider_gamma, xslider=slider_x ), code="""
    const g = gamma.value
    const x = xslider.value
    const p = opt.data.p
    const u = Array.from(p, (p) => Math.sign(x)*Math.max(0,Math.abs(x)-g)) 
    opt.data = { u , p }
""")
slider_gamma.js_on_change('value', update_opt)
slider_x.js_on_change('value', update_opt)

show(row(column(slider_gamma,slider_x), plot))


import numpy as np

x0 = np.array([0,1,0,0.5,-0.9])
n = x0.size

m = 10

A = np.random.randn(m,n)

b = np.dot(A,x0) + 0.1*np.random.randn(m)

lam = 0.5


def lasso(x,A=A,b=b,lam=lam):
    return 0.5*np.linalg.norm(np.dot(A,x)-b) + lam*np.linalg.norm(x,1)


from scipy.optimize import minimize
x0 = np.zeros(n)
res = minimize(lasso, x0)


import matplotlib.cm as cm
import matplotlib.pyplot as plt

delta = 0.025
x = np.arange(-2.0, 2.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)

Z = np.copy(X)
p = res.x
for index,x in np.ndenumerate(X):
    p[0] = x
    p[1] = Y[index]
    Z[index] = lasso(p)


fig, ax = plt.subplots()
CS = ax.contour(X, Y, Z)
ax.clabel(CS, inline=True, fontsize=10)
ax.set_title('Level sets of the lasso problem')

Text(0.5, 1.0, 'Level sets of the lasso problem')


import numpy as np

n, m = 20, 50
prop_null = 0.5
null_coord = np.random.choice(n, int(prop_null*n), replace=False)

x0 = np.random.randn(n)
x0[null_coord] = 0.0


np.random.seed(42)

A   = np.random.randn(m,n)

b   = np.dot(A,x0) + 0.1*np.random.randn(m)

lam = 0.1*n

x0

array([ 0.        , -0.32051567,  0.16218066,  0.        , -2.53108861,
        0.        , -1.63263694,  0.        ,  1.14547535, -0.88881661,
        1.10114863, -1.00240739,  0.        ,  0.        ,  0.        ,
        0.26347435,  0.        ,  0.92921617,  0.        ,  0.        ])


def lasso(x,A=A,b=b,lam=lam):
    return 0.5*np.linalg.norm(np.dot(A,x)-b)**2 + lam*np.linalg.norm(x,1)


from scipy.optimize import minimize


res = minimize(lasso, np.zeros(n),  options={'gtol': 1e-5, 'disp': True})

Warning: Desired error not necessarily achieved due to precision loss.
         Current function value: 19.368645
         Iterations: 82
         Function evaluations: 4690
         Gradient evaluations: 223


xopt_GP = res.x


import cvxpy as cp

# Construct the problem.
x = cp.Variable(n)
objective = cp.Minimize(0.5*cp.sum_squares(A @ x - b)+lam*cp.norm(x,1))
prob = cp.Problem(objective)

# The optimal objective value is returned by `prob.solve()`.
result = prob.solve(verbose = True)
# The optimal value for x is stored in `x.value`.

===============================================================================
                                     CVXPY                                     
                                     v1.2.1                                    
===============================================================================
(CVXPY) Dec 07 10:30:38 AM: Your problem has 20 variables, 0 constraints, and 0 parameters.
(CVXPY) Dec 07 10:30:38 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Dec 07 10:30:38 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Dec 07 10:30:38 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Dec 07 10:30:38 AM: Compiling problem (target solver=OSQP).
(CVXPY) Dec 07 10:30:38 AM: Reduction chain: CvxAttr2Constr -> Qp2SymbolicQp -> QpMatrixStuffing -> OSQP
(CVXPY) Dec 07 10:30:38 AM: Applying reduction CvxAttr2Constr
(CVXPY) Dec 07 10:30:38 AM: Applying reduction Qp2SymbolicQp
(CVXPY) Dec 07 10:30:38 AM: Applying reduction QpMatrixStuffing
(CVXPY) Dec 07 10:30:38 AM: Applying reduction OSQP
(CVXPY) Dec 07 10:30:38 AM: Finished problem compilation (took 1.259e-02 seconds).
-------------------------------------------------------------------------------
                                Numerical solver                               
-------------------------------------------------------------------------------
(CVXPY) Dec 07 10:30:38 AM: Invoking solver OSQP  to obtain a solution.
-----------------------------------------------------------------
           OSQP v0.6.2  -  Operator Splitting QP Solver
              (c) Bartolomeo Stellato,  Goran Banjac
        University of Oxford  -  Stanford University 2021
-----------------------------------------------------------------
problem:  variables n = 90, constraints m = 90
          nnz(P) + nnz(A) = 1180
settings: linear system solver = qdldl,
          eps_abs = 1.0e-05, eps_rel = 1.0e-05,
          eps_prim_inf = 1.0e-04, eps_dual_inf = 1.0e-04,
          rho = 1.00e-01 (adaptive),
          sigma = 1.00e-06, alpha = 1.60, max_iter = 10000
          check_termination: on (interval 25),
          scaling: on, scaled_termination: off
          warm start: on, polish: on, time_limit: off

iter   objective    pri res    dua res    rho        time
   1  -3.2000e+02   8.00e+00   1.04e+04   1.00e-01   3.12e-04s
 100   1.9369e+01   6.18e-05   7.46e-06   1.30e+00   9.79e-04s
plsh   1.9369e+01   1.46e-15   8.38e-15   --------   1.19e-03s

status:               solved
solution polish:      successful
number of iterations: 100
optimal objective:    19.3686
run time:             1.19e-03s
optimal rho estimate: 2.13e+00

-------------------------------------------------------------------------------
                                    Summary                                    
-------------------------------------------------------------------------------
(CVXPY) Dec 07 10:30:38 AM: Problem status: optimal
(CVXPY) Dec 07 10:30:38 AM: Optimal value: 1.937e+01
(CVXPY) Dec 07 10:30:38 AM: Compilation took 1.259e-02 seconds
(CVXPY) Dec 07 10:30:38 AM: Solver (including time spent in interface) took 2.117e-03 seconds


xopt_CVXPY = x.value


from sklearn import linear_model

clf = linear_model.Lasso(alpha=lam/n,fit_intercept=False)
clf.fit(A,b)

Lasso(alpha=0.1, fit_intercept=False)

Lasso(alpha=0.1, fit_intercept=False)


xopt_sklearn = clf.coef_


def f(x,A=A,b=b):
    return 0.5*np.linalg.norm(np.dot(A,x)-b)

def grad_f(x,A=A,b=b):
    return np.dot(A.T,(np.dot(A,x)-b))


def g(x,lam=lam):
    return  lam*np.linalg.norm(x,1)

def prox_g(x,gamma,lam=lam):
    n = x.size
    p = np.zeros(n)
    for i in range(n):
        if x[i] > gamma*lam:
            p[i] = x[i] - gamma*lam
        elif x[i] < -gamma*lam:
            p[i] = x[i] + gamma*lam
    return p


x = np.zeros(n)

ITE_MAX = 10000

gamma = 1/np.linalg.norm(A)**2

for i in range(ITE_MAX):
    g = grad_f(x)
    x_prev = x.copy()
    
    x = prox_g(x-gamma*g,gamma)

    if np.linalg.norm(x-x_prev)<= 1e-5:
        
        break


xopt_PG = x.copy()


def f(x,A=A,b=b):
    return 0.5*np.linalg.norm(np.dot(A,x)-b)

def prox_f(x,gamma,A=A,b=b):
    return np.linalg.solve(np.dot(A.T,A)+np.eye(n)/gamma, np.dot(A.T,b) + x/gamma)


def g(x,lam=lam):
    return  lam*np.linalg.norm(x,1)

def prox_g(x,gamma,lam=lam):
    n = x.size
    p = np.zeros(n)
    for i in range(n):
        if x[i] > gamma*lam:
            p[i] = x[i] - gamma*lam
        elif x[i] < -gamma*lam:
            p[i] = x[i] + gamma*lam
    return p


x = np.zeros(n)
u = np.zeros(n)
z = np.zeros(n)

ITE_MAX = 10000

gamma = 1.0

for i in range(ITE_MAX):
    x = prox_g(u,gamma)
    z = prox_f(2*x-u,gamma)
    u = u + z - x


xopt_Split = x.copy()


print("         Gen.Purp. \t\t CVXPY     \t\t sklearn   \t\t Prox. Grad. \t\t Split.")
for i in range(n):
    print("{:3d} - \t{:10.3e} \t\t{:10.3e} \t\t{:10.3e} \t\t{:10.3e} \t\t{:10.3e}".format(i+1,xopt_GP[i],xopt_CVXPY[i],xopt_sklearn[i],xopt_PG[i],xopt_Split[i]))
    
print("")
print("obj.: \t{:7.3f} \t\t{:7.3f} \t\t{:7.3f} \t\t{:7.3f} \t\t{:7.3f}".format(lasso(xopt_GP),lasso(xopt_CVXPY),lasso(xopt_sklearn),lasso(xopt_PG),lasso(xopt_Split)))

         Gen.Purp. 		 CVXPY     		 sklearn   		 Prox. Grad. 		 Split.
  1 - 	-3.898e-09 		 1.459e-21 		 0.000e+00 		 0.000e+00 		 0.000e+00
  2 - 	-2.860e-01 		-2.860e-01 		-2.452e-01 		-2.861e-01 		-2.860e-01
  3 - 	 5.114e-02 		 5.114e-02 		-0.000e+00 		 5.100e-02 		 5.114e-02
  4 - 	 3.488e-09 		 2.204e-21 		 0.000e+00 		 0.000e+00 		 0.000e+00
  5 - 	-2.449e+00 		-2.449e+00 		-2.323e+00 		-2.449e+00 		-2.449e+00
  6 - 	-1.965e-08 		 2.374e-21 		-0.000e+00 		 0.000e+00 		 0.000e+00
  7 - 	-1.563e+00 		-1.563e+00 		-1.454e+00 		-1.563e+00 		-1.563e+00
  8 - 	-9.235e-09 		-1.785e-22 		 0.000e+00 		 0.000e+00 		 0.000e+00
  9 - 	 1.055e+00 		 1.055e+00 		 9.128e-01 		 1.055e+00 		 1.055e+00
 10 - 	-7.764e-01 		-7.764e-01 		-6.727e-01 		-7.763e-01 		-7.764e-01
 11 - 	 9.661e-01 		 9.661e-01 		 8.403e-01 		 9.662e-01 		 9.661e-01
 12 - 	-9.750e-01 		-9.750e-01 		-9.263e-01 		-9.749e-01 		-9.750e-01
 13 - 	-2.482e-09 		-1.038e-21 		-0.000e+00 		 0.000e+00 		 0.000e+00
 14 - 	-2.801e-07 		-1.201e-21 		-0.000e+00 		 0.000e+00 		 0.000e+00
 15 - 	 5.894e-09 		 1.064e-21 		 0.000e+00 		 0.000e+00 		 0.000e+00
 16 - 	 1.928e-01 		 1.928e-01 		 1.462e-01 		 1.927e-01 		 1.928e-01
 17 - 	 4.849e-09 		 2.146e-22 		-0.000e+00 		 0.000e+00 		 0.000e+00
 18 - 	 8.375e-01 		 8.375e-01 		 6.885e-01 		 8.370e-01 		 8.375e-01
 19 - 	 2.528e-05 		-1.338e-21 		 0.000e+00 		 5.931e-05 		 0.000e+00
 20 - 	 1.568e-02 		 1.568e-02 		 0.000e+00 		 1.597e-02 		 1.568e-02

obj.: 	 19.369 		 19.369 		 20.661 		 19.369 		 19.369


def non_null_coords(x,thres=1e-30):
    non_null = []
    for i,v in enumerate(x):
        if np.abs(v)>thres:
            non_null.append(i)
    return non_null


import matplotlib.pyplot as plt

fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(np.ones(np.size(non_null_coords(xopt_GP))),non_null_coords(xopt_GP),label="gen. purp.")
ax1.scatter(2*np.ones(np.size(non_null_coords(xopt_CVXPY))),non_null_coords(xopt_CVXPY),label="CVXPY")
ax1.scatter(3*np.ones(np.size(non_null_coords(xopt_sklearn))),non_null_coords(xopt_sklearn),label="sklearn")
ax1.scatter(4*np.ones(np.size(non_null_coords(xopt_PG))),non_null_coords(xopt_PG),label="prox. grad.")
ax1.scatter(5*np.ones(np.size(non_null_coords(xopt_Split))),non_null_coords(xopt_Split),label="Split.")
ax1.scatter(6*np.ones(np.size(non_null_coords(x0))),non_null_coords(x0),label="x0")
plt.legend(loc='upper left')
plt.show()


import numpy as np
import csv

#### File reading
dat_file = np.load('student.npz')
A = dat_file['A_learn']
final_grades = dat_file['b_learn']
m = final_grades.size
b = np.zeros(m)
for i in range(m):
    if final_grades[i]>11:
        b[i] = 1.0
    else:
        b[i] = -1.0

A_test = dat_file['A_test']
final_grades_test = dat_file['b_test']
m_test = final_grades_test.size
b_test = np.zeros(m_test)
for i in range(m_test):
    if final_grades_test[i]>11:
        b_test[i] = 1.0
    else:
        b_test[i] = -1.0


d = 27 # features
n = d+1 # with the intercept




lam2 = 0.1 # for the 2-norm regularization best:0.1
lam1 = 0.03 # for the 1-norm regularization best:0.03


L = 0.25*max(np.linalg.norm(A,2,axis=1))**2 + lam2


def f(x,A=A,b=b):
    l = 0.0
    for i in range(A.shape[0]):
        if b[i] > 0 :
            l += np.log( 1 + np.exp(-np.dot( A[i] , x ) ) ) 
        else:
            l += np.log( 1 + np.exp(np.dot( A[i] , x ) ) ) 
    return l/m + lam2/2.0*np.dot(x,x)

def grad_f(x,lam2=lam2):
    g = np.zeros(n)
    for i in range(A.shape[0]):
        if b[i] > 0:
            g += -A[i]/( 1 + np.exp(np.dot( A[i] , x ) ) ) 
        else:
            g += A[i]/( 1 + np.exp(-np.dot( A[i] , x ) ) ) 
    return g/m + lam2*x


def g(x,lam1=lam1):
    return lam1*np.linalg.norm(x,1)

def prox_g(x,gamma,lam1=lam1):
    p = np.zeros(n)
    for i in range(n):
        if x[i] < - lam1*gamma:
            p[i] = x[i] + lam1*gamma
        if x[i] > lam1*gamma:
            p[i] = x[i] - lam1*gamma
    return p


def F(x):
    return f(x) + g(x)


def prediction_train(w,PRINT=True):
    pred = np.zeros(A.shape[0])
    perf = 0
    for i in range(A.shape[0]):
        p = 1.0/( 1 + np.exp(-np.dot( A[i] , w ) ) )
        if p>0.5:
            pred[i] = 1.0
            if b[i]>0:
                correct = "True"
                perf += 1
            else:
                correct = "False"
            if PRINT:
                print("True class: {:d} \t-- Predicted: {} \t(confidence: {:.1f}%)\t{}".format(int(b[i]),1,(p-0.5)*200,correct))
        else:
            pred[i] = -1.0
            if b[i]<0:
                correct = "True"
                perf += 1
            else:
                correct = "False"
            if PRINT:
                print("True class: {:d} \t-- Predicted: {} \t(confidence: {:.1f}%)\t{}".format(int(b[i]),-1,100-(0.5-p)*200,correct))
    return pred,float(perf)/A.shape[0]

def prediction_test(w,PRINT=True):
    pred = np.zeros(A_test.shape[0])
    perf = 0
    for i in range(A_test.shape[0]):
        p = 1.0/( 1 + np.exp(-np.dot( A_test[i] , w ) ) )
        if p>0.5:
            pred[i] = 1.0
            if b_test[i]>0:
                correct = "True"
                perf += 1
            else:
                correct = "False"
            if PRINT:
                print("True class: {:d} \t-- Predicted: {} \t(confidence: {:.1f}%)\t{}".format(int(b[i]),1,(p-0.5)*200,correct))
        else:
            pred[i] = -1.0
            if b_test[i]<0:
                correct = "True"
                perf += 1
            else:
                correct = "False"
            if PRINT:
                print("True class: {:d} \t-- Predicted: {} \t(confidence: {:.1f}%)\t{}".format(int(b[i]),-1,100-(0.5-p)*200,correct))
    return pred,float(perf)/A_test.shape[0]


import numpy as np
import timeit

def proximal_gradient_algorithm(F , grad_f , prox_g , x0 , step , PREC , ITE_MAX , PRINT ):
    x = np.copy(x0)
    x_tab = np.copy(x)
    if PRINT:
        print("------------------------------------\n Proximal gradient algorithm\n------------------------------------\nSTART    -- stepsize = {:0}".format(step))
    t_s =  timeit.default_timer()
    for k in range(ITE_MAX):
        g = grad_f(x)
        x = prox_g(x - step*g , step, lam1=lam1)  #######  ITERATION

        x_tab = np.vstack((x_tab,x))


    t_e =  timeit.default_timer()
    if PRINT:
        print("FINISHED -- {:d} iterations / {:.6f}s -- final value: {:f}\n\n".format(k,t_e-t_s,F(x)))
    return x,x_tab


#### Parameter we give at our algorithm 
PREC    = 1e-5                     # Sought precision
ITE_MAX = 1000                      # Max number of iterations
x0      = np.zeros(n)              # Initial point
step    = 1.0/L

##### gradient algorithm
x,x_tab = proximal_gradient_algorithm(F , grad_f , prox_g , x0 , step , PREC, ITE_MAX , True)

------------------------------------
 Proximal gradient algorithm
------------------------------------
START    -- stepsize = 0.030334772813507393
FINISHED -- 999 iterations / 1.577216s -- final value: 0.438712


import matplotlib.pyplot as plt


Fval = []
for i in range(x_tab.shape[0]):
    Fval.append( F(x_tab[i])) 

plt.figure()
plt.plot( Fval, color="black", linewidth=1.0, linestyle="-")
plt.grid(True)
plt.show()


plt.figure()

for i in np.arange(0,x_tab.shape[0],int(x_tab.shape[0]/40)):
    for j in range(n):
        if np.abs(x_tab[i,j])>1e-14:
            plt.plot( i , j  , 'ko')

plt.grid(True)
plt.ylabel('Non-null Coordinates')
plt.xlabel('Nb. Iterations')
plt.ylim(-1,d+1)
plt.yticks(np.arange(0,d+1))
plt.show()


import matplotlib.pyplot as plt

import numpy as np


#### Parameter we give at our algorithm (see algoGradient.ipynb)
PREC    = 1e-5                     # Sought precision
ITE_MAX = 500                      # Max number of iterations
x0      = np.zeros(n)              # Initial point
step    = 1.0/L

reg_l1_tab = np.power( 2.0, np.arange(-10,1,1) )
lam2 = 1e-1

Perf_train = []
Perf_test = []
Pts = []
for reg_l1 in reg_l1_tab:
    lam1 = reg_l1
    x,x_tab = proximal_gradient_algorithm(F , grad_f , prox_g , x0 , step ,  PREC , ITE_MAX, False )
    for i in range(n):
        if np.abs(x[i])>1e-14:
            Pts.append([reg_l1,i])
            
    pred_train,perf_train = prediction_train(x,False)
    pred_test,perf_test = prediction_test(x,False)
    
    Perf_train.append(perf_train*100.0)
    Perf_test.append(perf_test*100.0)
    print("lambda1 = {:.2e} , lambda2 = {:.2e}, performance training: {:3.1f}%, performance test: {:3.1f}%".format(lam1,lam2,perf_train*100,perf_test*100))

lambda1 = 9.77e-04 , lambda2 = 1.00e-01, performance training: 92.3%, performance test: 86.3%
lambda1 = 1.95e-03 , lambda2 = 1.00e-01, performance training: 92.0%, performance test: 86.3%
lambda1 = 3.91e-03 , lambda2 = 1.00e-01, performance training: 92.0%, performance test: 88.4%
lambda1 = 7.81e-03 , lambda2 = 1.00e-01, performance training: 92.0%, performance test: 89.5%
lambda1 = 1.56e-02 , lambda2 = 1.00e-01, performance training: 91.7%, performance test: 89.5%
lambda1 = 3.12e-02 , lambda2 = 1.00e-01, performance training: 91.3%, performance test: 84.2%
lambda1 = 6.25e-02 , lambda2 = 1.00e-01, performance training: 90.7%, performance test: 82.1%
lambda1 = 1.25e-01 , lambda2 = 1.00e-01, performance training: 91.0%, performance test: 82.1%
lambda1 = 2.50e-01 , lambda2 = 1.00e-01, performance training: 91.0%, performance test: 82.1%
lambda1 = 5.00e-01 , lambda2 = 1.00e-01, performance training: 58.0%, performance test: 62.1%
lambda1 = 1.00e+00 , lambda2 = 1.00e-01, performance training: 58.0%, performance test: 62.1%


plt.figure()
for point in Pts:
    plt.plot( point[0], point[1] , 'ko')
plt.xscale('log')
plt.grid(True)
plt.xlim(min(reg_l1_tab/2.0),max(reg_l1_tab))
plt.ylim(-1,d+1)
plt.yticks(np.arange(0,d+1))
plt.ylabel('Non-null Coordinates')
plt.xlabel('value of lambda_1')
plt.show()


import matplotlib.pyplot as plt


plt.figure()
plt.plot( reg_l1_tab , Perf_train , color="blue", linewidth=1.0, linestyle="-", label="training")
plt.plot( reg_l1_tab , Perf_test , color="red", linewidth=1.0, linestyle="-", label="testing")
plt.grid(True)
plt.xscale('log')
plt.ylim([50,100])
plt.ylabel('Prediction accuracy')
plt.xlabel('value of lambda_1')
plt.legend()
plt.show()


plt.figure()
for point in Pts:
    plt.plot( point[0], point[1]*3 , 'ko')
plt.xscale('log')
plt.grid(True)
plt.ylabel('Non-null Coordinates')
plt.xlabel('value of lambda_1')
plt.plot( reg_l1_tab , Perf_train , color="blue", linewidth=1.0, linestyle="-", label="training")
plt.plot( reg_l1_tab , Perf_test , color="red", linewidth=1.0, linestyle="-", label="testing")
plt.ylim([-1,100])
plt.ylabel('Prediction accuracy')
plt.xlabel('value of lambda_1')
plt.legend()
plt.show()

Structure identification:

from nonsmooth optimization to data science

1 - Proximity operator and stability¶

1.a Stability in 1D¶

1.b Stability in nD¶

2 - Stucture identification & Optimization¶

2.1 General purpose minimization¶

2.2 Modelling software¶

2.3 Dedicated library¶

2.4 Proximal Gradient Method¶

2.5 Proximal Splitting methods: Douglas-Rachford, ADMM, etc.¶

2.6 Comparison¶

3 - a Practical example in Classification¶

Machine Learning as an Optimization problem¶

Binary Classification with Logisitic Regression¶

L1-regularization¶

Features signification¶

3.1 Objective definition¶

Oracles¶

Prediction Function¶

3.2 Proximal Gradient algorithm¶

3.3 Regularization path.¶

3.4 Bonus questions¶