diff --git a/questions/146_momentum-optimizer/description.md b/questions/146_momentum-optimizer/description.md new file mode 100644 index 00000000..3a2fdf77 --- /dev/null +++ b/questions/146_momentum-optimizer/description.md @@ -0,0 +1 @@ +Implement the momentum optimizer update step function. Your function should take the current parameter value, gradient, and velocity as inputs, and return the updated parameter value and new velocity. The function should also handle scalar and array inputs. \ No newline at end of file diff --git a/questions/146_momentum-optimizer/example.json b/questions/146_momentum-optimizer/example.json new file mode 100644 index 00000000..c7d172e6 --- /dev/null +++ b/questions/146_momentum-optimizer/example.json @@ -0,0 +1,5 @@ +{ + "input": "parameter = 1.0, grad = 0.1, velocity = 0.1", + "output": "(0.909, 0.091)", + "reasoning": "The momentum optimizer computes updated values for the parameter and the velocity. With input values parameter=1.0, grad=0.1, and velocity=0.1, the updated parameter becomes 0.909 and the updated velocity becomes 0.091." +} diff --git a/questions/146_momentum-optimizer/learn.md b/questions/146_momentum-optimizer/learn.md new file mode 100644 index 00000000..8f4b20e1 --- /dev/null +++ b/questions/146_momentum-optimizer/learn.md @@ -0,0 +1,58 @@ +# Implementing Momentum Optimizer + +## Introduction +Momentum is a popular optimization technique that helps accelerate gradient descent in the relevant direction and dampens oscillations. It works by adding a fraction of the previous update vector to the current gradient. + +## Learning Objectives +- Understand how momentum optimization works +- Learn to implement momentum-based gradient updates +- Understand the effect of momentum on optimization + +## Theory +Momentum optimization uses a moving average of gradients to determine the direction of the update. The key equations are: + +$v_t = \gamma v_{t-1} + \eta \nabla_\theta J(\theta)$ (Velocity update) + +$\theta_t = \theta_{t-1} - v_t$ (Parameter update) + +Where: +- $v_t$ is the velocity at time t +- $\gamma$ is the momentum coefficient (typically 0.9) +- $\eta$ is the learning rate +- $\nabla_\theta J(\theta)$ is the gradient of the loss function + +Read more at: + +1. Ruder, S. (2017). An overview of gradient descent optimization algorithms. [arXiv:1609.04747](https://arxiv.org/pdf/1609.04747) + + +## Problem Statement +Implement the momentum optimizer update step function. Your function should take the current parameter value, gradient, and velocity as inputs, and return the updated parameter value and new velocity. + +### Input Format +The function should accept: +- parameter: Current parameter value +- grad: Current gradient +- velocity: Current velocity +- learning_rate: Learning rate (default=0.01) +- momentum: Momentum coefficient (default=0.9) + +### Output Format +Return tuple: (updated_parameter, updated_velocity) + +## Example +```python +# Example usage: +parameter = 1.0 +grad = 0.1 +velocity = 0.1 + +new_param, new_velocity = momentum_optimizer(parameter, grad, velocity) +``` + +## Tips +- Initialize velocity as zero +- Use numpy for numerical operations +- Test with both scalar and array inputs + +--- diff --git a/questions/146_momentum-optimizer/meta.json b/questions/146_momentum-optimizer/meta.json new file mode 100644 index 00000000..be6462a5 --- /dev/null +++ b/questions/146_momentum-optimizer/meta.json @@ -0,0 +1,17 @@ +{ + "id": "146", + "title": "Momentum Optimizer", + "difficulty": "easy", + "category": "Deep Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/mavleo96", + "name": "Vijayabharathi Murugan" + } + ], + "tinygrad_difficulty": null, + "pytorch_difficulty": null +} diff --git a/questions/146_momentum-optimizer/solution.py b/questions/146_momentum-optimizer/solution.py new file mode 100644 index 00000000..ed5fb9d1 --- /dev/null +++ b/questions/146_momentum-optimizer/solution.py @@ -0,0 +1,27 @@ +import numpy as np + +def momentum_optimizer(parameter, grad, velocity, learning_rate=0.01, momentum=0.9): + """ + Update parameters using the momentum optimizer. + Uses momentum to accelerate learning in relevant directions and dampen oscillations. + + Args: + parameter: Current parameter value + grad: Current gradient + velocity: Current velocity/momentum term + learning_rate: Learning rate (default=0.01) + momentum: Momentum coefficient (default=0.9) + + Returns: + tuple: (updated_parameter, updated_velocity) + """ + assert learning_rate > 0, "Learning rate must be positive" + assert 0 <= momentum < 1, "Momentum must be between 0 and 1" + + # Update velocity + velocity = momentum * velocity + learning_rate * grad + + # Update parameters + parameter = parameter - velocity + + return np.round(parameter, 5), np.round(velocity, 5) diff --git a/questions/146_momentum-optimizer/starter_code.py b/questions/146_momentum-optimizer/starter_code.py new file mode 100644 index 00000000..c9d5d240 --- /dev/null +++ b/questions/146_momentum-optimizer/starter_code.py @@ -0,0 +1,19 @@ +import numpy as np + +def momentum_optimizer(parameter, grad, velocity, learning_rate=0.01, momentum=0.9): + """ + Update parameters using the momentum optimizer. + Uses momentum to accelerate learning in relevant directions and dampen oscillations. + + Args: + parameter: Current parameter value + grad: Current gradient + velocity: Current velocity/momentum term + learning_rate: Learning rate (default=0.01) + momentum: Momentum coefficient (default=0.9) + + Returns: + tuple: (updated_parameter, updated_velocity) + """ + # Your code here + return np.round(parameter, 5), np.round(velocity, 5) diff --git a/questions/146_momentum-optimizer/tests.json b/questions/146_momentum-optimizer/tests.json new file mode 100644 index 00000000..0546e0b6 --- /dev/null +++ b/questions/146_momentum-optimizer/tests.json @@ -0,0 +1,18 @@ +[ + { + "test": "print(momentum_optimizer(1., 0.1, 0.5, 0.01, 0.9))", + "expected_output": "(0.549, 0.451)" + }, + { + "test": "print(momentum_optimizer(np.array([1., 2.]), np.array([0.1, 0.2]), np.array([0.5, 1.0]), 0.01, 0.9))", + "expected_output": "(array([0.549, 1.098]), array([0.451, 0.902]))" + }, + { + "test": "print(momentum_optimizer(np.array([1., 2.]), np.array([0.1, 0.2]), np.array([0.5, 1.0]), 0.01, 0.))", + "expected_output": "(array([0.999, 1.998]), array([0.001, 0.002]))" + }, + { + "test": "print(momentum_optimizer(np.array([1., 2.]), np.array([0., 0.]), np.array([0.5, 0.5]), 0.01, 0.9))", + "expected_output": "(array([0.55, 1.55]), array([0.45, 0.45]))" + } +]