SGD vs Batch vs Mini-Batch
Vibe Prompt
「幫我比較 Batch GD、SGD、Mini-Batch SGD 在線性回歸上的收斂速度,畫出損失曲線。」
import numpy as np
def batch_gd(X, y, lr=0.01, epochs=100):
m, n = X.shape
w = np.zeros(n)
losses = []
for _ in range(epochs):
pred = X @ w
loss = np.mean((pred - y)**2)
losses.append(loss)
grad = (2/m) * X.T @ (pred - y)
w -= lr * grad
return w, losses
def sgd(X, y, lr=0.01, epochs=100):
m, n = X.shape
w = np.zeros(n)
losses = []
for _ in range(epochs):
idx = np.random.randint(m)
xi, yi = X[idx:idx+1], y[idx:idx+1]
pred = xi @ w
loss = np.mean((X @ w - y)**2)
losses.append(loss)
grad = 2 * xi.T @ (pred - yi)
w -= lr * grad
return w, losses
def minibatch_sgd(X, y, lr=0.01, epochs=100, batch_size=32):
m, n = X.shape
w = np.zeros(n)
losses = []
for _ in range(epochs):
idx = np.random.choice(m, batch_size, replace=False)
Xb, yb = X[idx], y[idx]
pred = Xb @ w
loss = np.mean((X @ w - y)**2)
losses.append(loss)
grad = (2/len(idx)) * Xb.T @ (pred - yb)
w -= lr * grad
return w, losses
# 測試
np.random.seed(42)
X = np.random.randn(1000, 5)
true_w = np.array([3, -2, 1, 0.5, -1])
y = X @ true_w + np.random.randn(1000) * 0.1
w_batch, _ = batch_gd(X, y)
w_sgd, _ = sgd(X, y)
w_mini, _ = minibatch_sgd(X, y)
print(f"真實權重: {true_w}")
print(f"Batch GD: {np.round(w_batch, 3)}")
print(f"SGD: {np.round(w_sgd, 3)}")
print(f"Mini-Batch:{np.round(w_mini, 3)}")