部署推薦系統 API

在本課程的最後一章,我們將把推薦系統包裝成一個生產級的 API 服務,讓任何應用(前端網站、手機 App)都可以呼叫它來取得個人化推薦。

建立推薦系統引擎

首先,我們建立一個推薦引擎類別,封裝所有邏輯:

import pandas as pd
import numpy as np
import joblib
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer

class RecommendationEngine:
    """推薦系統引擎(封裝所有邏輯)"""
    
    def __init__(self, movies_path='movies.csv', ratings_path='ratings.csv'):
        # 載入資料
        self.movies = pd.read_csv(movies_path)
        self.ratings = pd.read_csv(ratings_path)
        
        # 預處理
        self._prepare_features()
        
        # 計算相似度
        self._compute_similarity()
        
        # 計算熱門度
        self._compute_popularity()
        
        print(f"推薦引擎初始化完成:")
        print(f"  {len(self.movies)} 部電影")
        print(f"  {self.ratings['userId'].nunique()} 位使用者")
        print(f"  {len(self.ratings)} 筆評分")
    
    def _prepare_features(self):
        """準備電影特徵"""
        # 解析類型
        self.movies['genres_list'] = self.movies['genres'].str.split('|')
        
        # One-Hot 編碼
        mlb = MultiLabelBinarizer()
        genre_matrix = mlb.fit_transform(self.movies['genres_list'])
        self.genre_df = pd.DataFrame(
            genre_matrix,
            columns=mlb.classes_,
            index=self.movies['movieId']
        )
    
    def _compute_similarity(self):
        """計算電影相似度矩陣"""
        self.movie_similarity = cosine_similarity(self.genre_df)
        self.movie_similarity_df = pd.DataFrame(
            self.movie_similarity,
            index=self.genre_df.index,
            columns=self.genre_df.index
        )
    
    def _compute_popularity(self):
        """計算電影熱門度"""
        movie_stats = self.ratings.groupby('movieId').agg(
            rating_count=('rating', 'count'),
            avg_rating=('rating', 'mean')
        )
        movie_stats['popularity'] = movie_stats['rating_count'] * movie_stats['avg_rating']
        self.popularity = movie_stats['popularity'].to_dict()
    
    def content_based(self, movie_id, n=10):
        """內容為本推薦"""
        if movie_id not in self.movie_similarity_df.index:
            return []
        
        scores = self.movie_similarity_df[movie_id].sort_values(ascending=False)
        scores = scores.drop(movie_id).head(n)
        
        results = []
        for mid, score in scores.items():
            movie = self.movies[self.movies['movieId'] == mid].iloc[0]
            results.append({
                'movie_id': int(mid),
                'title': movie['title'],
                'genres': movie['genres'],
                'score': round(float(score), 4)
            })
        return results
    
    def hybrid_recommend(self, user_id, n=10):
        """混合式推薦"""
        user_ratings = self.ratings[self.ratings['userId'] == user_id]
        
        if len(user_ratings) == 0:
            # 新使用者:回傳熱門推薦
            return self.popular_recommendations(n)
        
        watched = set(user_ratings['movieId'])
        
        # Content-based 分數
        cb_scores = {}
        favorites = user_ratings.sort_values('rating', ascending=False).head(5)
        for _, row in favorites.iterrows():
            mid = row['movieId']
            if mid in self.movie_similarity_df.index:
                similar = self.movie_similarity_df[mid].head(20)
                for sim_id, score in similar.items():
                    if sim_id not in watched:
                        cb_scores[sim_id] = cb_scores.get(sim_id, 0) + score * 0.3
        
        # 熱門度分數
        pop_scores = {}
        max_pop = max(self.popularity.values()) if self.popularity else 1
        for mid, pop in self.popularity.items():
            if mid not in watched:
                pop_scores[mid] = (pop / max_pop) * 0.4
        
        # 合併
        final_scores = {}
        for mid in set(list(cb_scores.keys()) + list(pop_scores.keys())):
            final_scores[mid] = cb_scores.get(mid, 0) + pop_scores.get(mid, 0)
        
        sorted_movies = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)[:n]
        
        results = []
        for mid, score in sorted_movies:
            movie = self.movies[self.movies['movieId'] == mid].iloc[0]
            results.append({
                'movie_id': int(mid),
                'title': movie['title'],
                'genres': movie['genres'],
                'score': round(float(score), 4)
            })
        return results
    
    def popular_recommendations(self, n=10):
        """熱門推薦(冷啟動解決方案)"""
        movie_stats = self.ratings.groupby('movieId').agg(
            count=('rating', 'count'),
            avg=('rating', 'mean')
        )
        movie_stats = movie_stats[movie_stats['count'] >= 10]
        movie_stats['score'] = movie_stats['count'] * movie_stats['avg']
        top_movies = movie_stats.sort_values('score', ascending=False).head(n)
        
        results = []
        for mid, row in top_movies.iterrows():
            movie = self.movies[self.movies['movieId'] == mid].iloc[0]
            results.append({
                'movie_id': int(mid),
                'title': movie['title'],
                'genres': movie['genres'],
                'avg_rating': round(float(row['avg']), 2),
                'rating_count': int(row['count'])
            })
        return results

建立 FastAPI 推薦服務

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import time

app = FastAPI(title="推薦系統 API", version="1.0.0")

# 初始化推薦引擎(在啟動時載入)
engine = RecommendationEngine()

# 簡單的記憶體快取
cache = {}
cache_ttl = 300  # 5 分鐘

class RecommendResponse(BaseModel):
    recommendations: List[dict]
    cached: bool
    processing_time_ms: float

class PopularResponse(BaseModel):
    recommendations: List[dict]

@app.on_event("startup")
async def startup_event():
    """服務啟動時的初始化"""
    print("推薦系統 API 已啟動!")
    print(f"http://localhost:8000/docs 查看 API 文件")
    print(f"http://localhost:8000/recommend/1 測試推薦")

@app.get("/")
def root():
    return {
        "service": "推薦系統 API",
        "version": "1.0.0",
        "endpoints": {
            "/recommend/{user_id}": "為使用者取得個人化推薦",
            "/popular": "取得熱門推薦",
            "/similar/{movie_id}": "取得與某電影相似的電影",
            "/health": "健康檢查"
        }
    }

@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "movies_count": len(engine.movies),
        "users_count": engine.ratings['userId'].nunique(),
        "ratings_count": len(engine.ratings)
    }

@app.get("/recommend/{user_id}", response_model=RecommendResponse)
def recommend(user_id: int, n: int = 10):
    """為使用者取得個人化推薦"""
    cache_key = f"recommend:{user_id}:{n}"
    
    # 檢查快取
    if cache_key in cache:
        cached_result, timestamp = cache[cache_key]
        if time.time() - timestamp < cache_ttl:
            return {
                "recommendations": cached_result,
                "cached": True,
                "processing_time_ms": 0
            }
    
    # 產生推薦
    start = time.time()
    recommendations = engine.hybrid_recommend(user_id, n)
    elapsed = (time.time() - start) * 1000
    
    # 寫入快取
    cache[cache_key] = (recommendations, time.time())
    
    return {
        "recommendations": recommendations,
        "cached": False,
        "processing_time_ms": round(elapsed, 2)
    }

@app.get("/popular", response_model=PopularResponse)
def popular(n: int = 10):
    """取得熱門推薦(給新使用者)"""
    return {
        "recommendations": engine.popular_recommendations(n)
    }

@app.get("/similar/{movie_id}")
def similar_movies(movie_id: int, n: int = 10):
    """取得與某電影相似的電影"""
    results = engine.content_based(movie_id, n)
    if not results:
        raise HTTPException(status_code=404, detail=f"找不到電影 ID: {movie_id}")
    return {"movie_id": movie_id, "similar_movies": results}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

測試 API

# 啟動服務
uvicorn recommend_api:app --host 0.0.0.0 --port 8000 --reload

# 測試推薦
curl http://localhost:8000/recommend/1?n=5

# 測試熱門推薦
curl http://localhost:8000/popular?n=10

# 測試相似電影
curl http://localhost:8000/similar/1

# 測試健康檢查
curl http://localhost:8000/health

部署到 Docker

FROM python:3.11-slim

WORKDIR /app

# 安裝套件
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# 複製程式碼與資料
COPY . .

# 暴露埠
EXPOSE 8000

# 啟動
CMD ["uvicorn", "recommend_api:app", "--host", "0.0.0.0", "--port", "8000"]

requirements.txt

fastapi
uvicorn
pandas
numpy
scikit-learn
joblib
pydantic

使用 Vibe Coding 部署 API

🔥 【推薦 API 部署詠唱範例】 「請幫我將推薦系統打包成 Docker 服務: 1. 使用 Python 3.11-slim 作為基底。 2. 安裝 fastapi、uvicorn、pandas、numpy、scikit-learn。 3. 複製推薦引擎程式碼與資料集。 4. 使用 uvicorn 啟動,8000 埠。 5. 設定 HEALTHCHECK 端點。 6. 寫一個 docker-compose.yml 包含此服務。」

本日總結

在本章中,你學到了:

  1. 推薦引擎封裝:將所有推薦邏輯包裝成可重複使用的類別
  2. FastAPI 服務:建立三個推薦端點(個人化、熱門、相似)
  3. 快取層:使用記憶體快取減少重複計算
  4. 錯誤處理:使用者不存在或電影 ID 不正確時的處理
  5. Docker 部署:將推薦 API 容器化,方便部署到雲端

恭喜你完成了整個 推薦系統引擎 課程!

你現在已經具備了:

  • 🎯 內容為本推薦(Content-Based Filtering)
  • 👥 協同過濾(User-based / Item-based CF)
  • 🔮 SVD 矩陣分解(Netflix Prize 演算法)
  • 🚀 混合式推薦(結合多種方法)
  • ❄️ 冷啟動解決方案
  • 📊 推薦系統評估(Precision/Recall/Diversity/Novelty)
  • 🌐 推薦 API 部署(FastAPI + Docker)

這些技能無論是在電商平台建立「猜你喜歡」功能、為內容平台打造個人化推薦、或是作為推薦系統工程師求職,都能讓你擁有巨大的競爭優勢!

解鎖完整教學內容

本章為付費內容。加入專案即可解鎖超過 5000 字的深度解析,包含 10 個以上神級 Prompt 與真實 Source Code 範例!