Automatic Differentiation
Vibe Prompt
"Build a simple automatic differentiation engine from scratch that supports +, -, *, sin, cos, and computes the gradient of f(x,y)=sin(x)*cos(y)."
import math
class Var:
def __init__(self, val, children=()):
self.val = val
self.grad = 0
self._backward = lambda: None
self._prev = set(children)
def __add__(self, other):
other = other if isinstance(other, Var) else Var(other)
out = Var(self.val + other.val, (self, other))
def _backward():
self.grad += out.grad
other.grad += out.grad
out._backward = _backward
return out
def __mul__(self, other):
other = other if isinstance(other, Var) else Var(other)
out = Var(self.val * other.val, (self, other))
def _backward():
self.grad += other.val * out.grad
other.grad += self.val * out.grad
out._backward = _backward
return out
def __neg__(self):
return self * Var(-1)
def __sub__(self, other):
return self + (-other)
def sin(self):
out = Var(math.sin(self.val), (self,))
def _backward():
self.grad += math.cos(self.val) * out.grad
out._backward = _backward
return out
def cos(self):
out = Var(math.cos(self.val), (self,))
def _backward():
self.grad += -math.sin(self.val) * out.grad
out._backward = _backward
return out
def backward(self):
topo = []
visited = set()
def build(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build(child)
topo.append(v)
build(self)
self.grad = 1
for v in reversed(topo):
v._backward()
# Test
x = Var(0.5)
y = Var(1.2)
f = (x.sin() * y.cos())
f.backward()
print(f"f = sin({x.val}) * cos({y.val}) = {f.val:.4f}")
print(f"df/dx = {x.grad:.4f} (theoretical: cos(x)*cos(y) = {math.cos(0.5)*math.cos(1.2):.4f})")
print(f"df/dy = {y.grad:.4f} (theoretical: -sin(x)*sin(y) = {-math.sin(0.5)*math.sin(1.2):.4f})")
Chapter Summary
- Understand the core concepts and theory
- Master implementation methods and techniques
- Learn common issues and their solutions
- Apply knowledge to real-world projects
Further Reading
- Official documentation and API references
- Open source projects on GitHub
- Related technical books and courses
- Community discussions and technical blogs
Implementation Examples
Basic Examples
# This section provides a complete implementation example
# to help you apply what you've learned to real projects
Steps
- Initialization: Set up the development environment and required tools
- Data Preparation: Collect and organize the required data
- Core Implementation: Implement the main functionality and logic
- Testing & Validation: Ensure the functionality works correctly
- Optimization: Tune performance and user experience
Common Errors
| Error Type | Possible Cause | Solution | |-----------|---------------|----------| | Compilation Error | Syntax issues | Check code syntax | | Runtime Error | Environment issues | Verify dependencies are installed | | Logic Error | Algorithm issues | Step-by-step debugging and testing | | Performance Issue | Efficiency issues | Use performance analysis tools |
Code Example
# Example code
import sys
def main():
# Main program logic
print("Hello, World!")
if __name__ == "__main__":
main()
Related Resources
- Official documentation
- API reference manuals
- Open source project examples
- Technical community discussions
Forward Mode AutoDiff
Forward mode computes both the value and the derivative simultaneously using dual numbers.
Dual Numbers
A dual number is $a + b\epsilon$ where $\epsilon^2 = 0$.
| Operation | Result | |-----------|--------| | Addition | $(a + b\epsilon) + (c + d\epsilon) = (a+c) + (b+d)\epsilon$ | | Multiplication | $(a + b\epsilon) \times (c + d\epsilon) = ac + (ad + bc)\epsilon$ | | Sin | $\sin(a + b\epsilon) = \sin(a) + b\cos(a)\epsilon$ | | Cos | $\cos(a + b\epsilon) = \cos(a) - b\sin(a)\epsilon$ |
Python Implementation
import math
class Dual:
def __init__(self, value, derivative=0.0):
self.value = value
self.derivative = derivative
def __add__(self, other):
if isinstance(other, Dual):
return Dual(
self.value + other.value,
self.derivative + other.derivative
)
return Dual(self.value + other, self.derivative)
def __mul__(self, other):
if isinstance(other, Dual):
return Dual(
self.value * other.value,
self.value * other.derivative + self.derivative * other.value
)
return Dual(self.value * other, self.derivative * other)
def __sub__(self, other):
if isinstance(other, Dual):
return Dual(
self.value - other.value,
self.derivative - other.derivative
)
return Dual(self.value - other, self.derivative)
# Define sin and cos for Dual numbers
def sin_dual(x: Dual) -> Dual:
return Dual(math.sin(x.value), x.derivative * math.cos(x.value))
def cos_dual(x: Dual) -> Dual:
return Dual(math.cos(x.value), -x.derivative * math.sin(x.value))
# Evaluate f(x,y) = sin(x) * cos(y)
def f(x_val, y_val):
x = Dual(x_val, 1.0) # seed = 1.0 means df/dx
y = Dual(y_val, 0.0) # seed = 0.0 means df/dy = 0
return sin_dual(x) * cos_dual(y)
result = f(2.0, 3.0)
print(f"f(2,3) = {result.value:.4f}")
print(f"df/dx at (2,3) = {result.derivative:.4f}")
# Now compute df/dy
def df_dy(x_val, y_val):
x = Dual(x_val, 0.0)
y = Dual(y_val, 1.0)
return sin_dual(x) * cos_dual(y)
result2 = df_dy(2.0, 3.0)
print(f"df/dy at (2,3) = {result2.derivative:.4f}")
Reverse Mode AutoDiff
Reverse mode computes gradients more efficiently for functions with many inputs and few outputs (the common case in deep learning).
Computation Graph
x ──┐
├── (*) ── sin ──┐
y ──┘ ├── (+) ── result
x ──┐ │
├── (*) ── cos ──┘
y ──┘
Forward Pass (compute values)
| Node | Value | |------|-------| | $x$ | 2.0 | | $y$ | 3.0 | | $x \times y$ | 6.0 | | $\sin(x \times y)$ | -0.279 | | $x \times y$ (again) | 6.0 | | $\cos(x \times y)$ | 0.960 | | result | 0.681 |
Backward Pass (compute gradients)
Starting from $\frac{\partial result}{\partial result} = 1$, propagate backward using the chain rule.
Summary
Automatic differentiation computes exact derivatives without symbolic differentiation or numerical approximation. Forward mode is best for few inputs, reverse mode for many inputs (deep learning).
Key takeaways: | Dual numbers: a + bε where ε² = 0, encode value + derivative | | Forward mode: computes one derivative per pass — good for few inputs | | Reverse mode: forward pass computes values, backward pass propagates gradients | | Reverse mode is the foundation of backpropagation in neural networks | | Frameworks (PyTorch, TensorFlow) use reverse mode AutoDiff | | Dual numbers support all operations: +, -, *, /, sin, cos, exp, log |
Next Chapter: Linear Regression
The next chapter covers linear regression as a gradient descent application.