SGD vs Batch vs Mini-Batch

Vibe Prompt

「幫我比較 Batch GD、SGD、Mini-Batch SGD 在線性回歸上的收斂速度,畫出損失曲線。」

import numpy as np

def batch_gd(X, y, lr=0.01, epochs=100):
    m, n = X.shape
    w = np.zeros(n)
    losses = []
    for _ in range(epochs):
        pred = X @ w
        loss = np.mean((pred - y)**2)
        losses.append(loss)
        grad = (2/m) * X.T @ (pred - y)
        w -= lr * grad
    return w, losses

def sgd(X, y, lr=0.01, epochs=100):
    m, n = X.shape
    w = np.zeros(n)
    losses = []
    for _ in range(epochs):
        idx = np.random.randint(m)
        xi, yi = X[idx:idx+1], y[idx:idx+1]
        pred = xi @ w
        loss = np.mean((X @ w - y)**2)
        losses.append(loss)
        grad = 2 * xi.T @ (pred - yi)
        w -= lr * grad
    return w, losses

def minibatch_sgd(X, y, lr=0.01, epochs=100, batch_size=32):
    m, n = X.shape
    w = np.zeros(n)
    losses = []
    for _ in range(epochs):
        idx = np.random.choice(m, batch_size, replace=False)
        Xb, yb = X[idx], y[idx]
        pred = Xb @ w
        loss = np.mean((X @ w - y)**2)
        losses.append(loss)
        grad = (2/len(idx)) * Xb.T @ (pred - yb)
        w -= lr * grad
    return w, losses

# 測試
np.random.seed(42)
X = np.random.randn(1000, 5)
true_w = np.array([3, -2, 1, 0.5, -1])
y = X @ true_w + np.random.randn(1000) * 0.1

w_batch, _ = batch_gd(X, y)
w_sgd, _ = sgd(X, y)
w_mini, _ = minibatch_sgd(X, y)

print(f"真實權重: {true_w}")
print(f"Batch GD: {np.round(w_batch, 3)}")
print(f"SGD:      {np.round(w_sgd, 3)}")
print(f"Mini-Batch:{np.round(w_mini, 3)}")

解鎖完整教學內容

本章為付費內容。加入專案即可解鎖超過 5000 字的深度解析,包含 10 個以上神級 Prompt 與真實 Source Code 範例!