meth_lab4/bfgs.py at main · imorda/meth_lab4 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import typing as ty

import numpy as np

from descent import wolfe_conditions


def rho_calc(y, s):
    return 1 / np.dot(y.T, s)


def bfgs(f: ty.Callable, df: ty.Callable, x_0, epochs, tol=1e-5, ls_tol=1e-5):
    i = 0
    E = np.eye(len(x_0))

    H_i = E  # reversed (^-1) hessian
    points = [x_0]
    x_i = x_0  # point
    df_x_i = df(f, x_0)  # derivative at x_i

    while i < epochs and np.linalg.norm(df_x_i) > tol:
        p_i = -H_i @ df_x_i
        t, f_calls, df_calls = wolfe_conditions(f, df, x_i, p_i, ls_tol)

        s = t * p_i
        x_i1 = x_i + s
        s = np.reshape(np.array([s]), (len(x_0), 1))
        y = np.reshape(np.array([df(f, x_i1) - df(f, x_i)]), (len(x_0), 1))

        rho = 1 / (y.T @ s)
        m1 = E - rho * (s @ y.T)
        m2 = E - rho * (y @ s.T)

        H_i = m1 @ H_i @ m2 + rho * (s @ s.T)
        x_i = x_i1[:]
        points.append(x_i)
        df_x_i = df(f, x_i)

        i += 1

    print(len(points))
    return points


# m --- is size of "batch" of precompiled data
def l_bfgs(f: ty.Callable, df: ty.Callable, x_0, epochs, m=5, tol=1e-9, ls_tol=1e-6):
    points = [x_0]
    x_i = x_0  # point
    df_x_i = df(f, x_0)  # derivative at x_i
    p = -df_x_i
    t, _, _ = wolfe_conditions(f, df, x_0, p, tol=ls_tol)

    ys = []
    ss = []

    ss.append(t * p)
    x_i1 = x_i + ss[-1]
    points.append(x_i1)
    df_x_i1 = df(f, x_i1)

    ys.append(df_x_i1 - df_x_i)
    df_x_i = df_x_i1
    x_i = x_i1

    def alpha_calc(j, q):
        return rho_calc(ys[len(ss) - 1 - j], ss[len(ss) - 1 - j]) * np.dot(
            ss[len(ss) - 1 - j].T, q
        )

    def calcualte_p():
        q = df(f, x_i)
        alphas = [None] * len(ss)

        for j in range(len(ss)):
            alphas[len(ss) - 1 - j] = alpha_calc(j, q)
            q = q - alphas[len(ss) - 1 - j] * ys[len(ss) - 1 - j]

        lambd = np.dot(ss[-1].T, ys[-1]) / np.dot(ys[-1].T, ys[-1])
        H_i = lambd * np.eye(len(ss[-1]))
        p_i = np.dot(H_i, q)

        for j in range(len(ss)):
            betta = rho_calc(ys[j], ss[j]) * np.dot(ys[j].T, p_i)
            p_i = p_i + ss[j] * (alphas[j] - betta)

        return -p_i

    i = 0
    # calculating reversed hessian recursively
    while i < epochs and np.linalg.norm(df_x_i) > tol:
        p_i = calcualte_p()

        t, f_calls, df_calls = wolfe_conditions(f, df, x_i, p_i, tol=1e-6)

        s = t * p_i
        x_i1 = x_i + s
        y = (df(f, x_i1) - df(f, x_i)).T

        ys.append(y)
        ss.append(s)

        if len(ss) == m + 1:
            ys.pop(0)
            ss.pop(0)

        x_i = x_i1
        points.append(x_i)
        df_x_i = df(f, x_i)

        i += 1

    return points