Skip to content

Instantly share code, notes, and snippets.

@hrayrhar
Last active August 20, 2019 05:55
Show Gist options
  • Select an option

  • Save hrayrhar/3b809c5ae778485a9ea9d253c4bfc90a to your computer and use it in GitHub Desktop.

Select an option

Save hrayrhar/3b809c5ae778485a9ea9d253c4bfc90a to your computer and use it in GitHub Desktop.
Adam and vanilla SGD in numpy.
class Adam:
"""Adam optimizer.
Default parameters follow those provided in the original paper.
# Arguments
lr: float >= 0. Learning rate.
beta_1: float, 0 < beta < 1. Generally close to 1.
beta_2: float, 0 < beta < 1. Generally close to 1.
epsilon: float >= 0. Fuzz factor.
decay: float >= 0. Learning rate decay over each update.
# References
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
"""
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
epsilon=1e-8, decay=0., **kwargs):
allowed_kwargs = {'clipnorm', 'clipvalue'}
for k in kwargs:
if k not in allowed_kwargs:
raise TypeError('Unexpected keyword argument '
'passed to optimizer: ' + str(k))
self.__dict__.update(kwargs)
self.iterations = 0
self.lr = lr
self.beta_1 = beta_1
self.beta_2 = beta_2
self.decay = decay
self.epsilon = epsilon
self.initial_decay = decay
def get_update(self, params, grads):
""" params and grads are list of numpy arrays
"""
original_shapes = [x.shape for x in params]
params = [x.flatten() for x in params]
grads = [x.flatten() for x in grads]
""" #TODO: implement clipping
if hasattr(self, 'clipnorm') and self.clipnorm > 0:
norm = np.sqrt(sum([np.sum(np.square(g)) for g in grads]))
grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
if hasattr(self, 'clipvalue') and self.clipvalue > 0:
grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
"""
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
t = self.iterations + 1
lr_t = lr * (np.sqrt(1. - np.power(self.beta_2, t)) /
(1. - np.power(self.beta_1, t)))
if not hasattr(self, 'ms'):
self.ms = [np.zeros(p.shape) for p in params]
self.vs = [np.zeros(p.shape) for p in params]
ret = [None] * len(params)
for i, p, g, m, v in zip(range(len(params)), params, grads, self.ms, self.vs):
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v_t = (self.beta_2 * v) + (1. - self.beta_2) * np.square(g)
p_t = p - lr_t * m_t / (np.sqrt(v_t) + self.epsilon)
self.ms[i] = m_t
self.vs[i] = v_t
ret[i] = p_t
self.iterations += 1
for i in range(len(ret)):
ret[i] = ret[i].reshape(original_shapes[i])
return ret
class SGD:
"""SGD optimizer.
# Arguments
lr: float >= 0. Learning rate.
"""
def __init__(self, lr=0.001, **kwargs):
allowed_kwargs = {'clipnorm', 'clipvalue'}
for k in kwargs:
if k not in allowed_kwargs:
raise TypeError('Unexpected keyword argument '
'passed to optimizer: ' + str(k))
self.__dict__.update(kwargs)
self.lr = lr
def get_update(self, params, grads):
""" params and grads are list of numpy arrays
"""
original_shapes = [x.shape for x in params]
params = [x.flatten() for x in params]
grads = [x.flatten() for x in grads]
""" #TODO: implement clipping
if hasattr(self, 'clipnorm') and self.clipnorm > 0:
norm = np.sqrt(sum([np.sum(np.square(g)) for g in grads]))
grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
if hasattr(self, 'clipvalue') and self.clipvalue > 0:
grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
"""
ret = [None] * len(params)
for i, p, g in zip(range(len(params)), params, grads):
ret[i] = p - self.lr * g
for i in range(len(ret)):
ret[i] = ret[i].reshape(original_shapes[i])
return ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment