部署推薦系統 API
在本課程的最後一章,我們將把推薦系統包裝成一個生產級的 API 服務,讓任何應用(前端網站、手機 App)都可以呼叫它來取得個人化推薦。
建立推薦系統引擎
首先,我們建立一個推薦引擎類別,封裝所有邏輯:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
class RecommendationEngine:
"""推薦系統引擎(封裝所有邏輯)"""
def __init__(self, movies_path='movies.csv', ratings_path='ratings.csv'):
# 載入資料
self.movies = pd.read_csv(movies_path)
self.ratings = pd.read_csv(ratings_path)
# 預處理
self._prepare_features()
# 計算相似度
self._compute_similarity()
# 計算熱門度
self._compute_popularity()
print(f"推薦引擎初始化完成:")
print(f" {len(self.movies)} 部電影")
print(f" {self.ratings['userId'].nunique()} 位使用者")
print(f" {len(self.ratings)} 筆評分")
def _prepare_features(self):
"""準備電影特徵"""
# 解析類型
self.movies['genres_list'] = self.movies['genres'].str.split('|')
# One-Hot 編碼
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(self.movies['genres_list'])
self.genre_df = pd.DataFrame(
genre_matrix,
columns=mlb.classes_,
index=self.movies['movieId']
)
def _compute_similarity(self):
"""計算電影相似度矩陣"""
self.movie_similarity = cosine_similarity(self.genre_df)
self.movie_similarity_df = pd.DataFrame(
self.movie_similarity,
index=self.genre_df.index,
columns=self.genre_df.index
)
def _compute_popularity(self):
"""計算電影熱門度"""
movie_stats = self.ratings.groupby('movieId').agg(
rating_count=('rating', 'count'),
avg_rating=('rating', 'mean')
)
movie_stats['popularity'] = movie_stats['rating_count'] * movie_stats['avg_rating']
self.popularity = movie_stats['popularity'].to_dict()
def content_based(self, movie_id, n=10):
"""內容為本推薦"""
if movie_id not in self.movie_similarity_df.index:
return []
scores = self.movie_similarity_df[movie_id].sort_values(ascending=False)
scores = scores.drop(movie_id).head(n)
results = []
for mid, score in scores.items():
movie = self.movies[self.movies['movieId'] == mid].iloc[0]
results.append({
'movie_id': int(mid),
'title': movie['title'],
'genres': movie['genres'],
'score': round(float(score), 4)
})
return results
def hybrid_recommend(self, user_id, n=10):
"""混合式推薦"""
user_ratings = self.ratings[self.ratings['userId'] == user_id]
if len(user_ratings) == 0:
# 新使用者:回傳熱門推薦
return self.popular_recommendations(n)
watched = set(user_ratings['movieId'])
# Content-based 分數
cb_scores = {}
favorites = user_ratings.sort_values('rating', ascending=False).head(5)
for _, row in favorites.iterrows():
mid = row['movieId']
if mid in self.movie_similarity_df.index:
similar = self.movie_similarity_df[mid].head(20)
for sim_id, score in similar.items():
if sim_id not in watched:
cb_scores[sim_id] = cb_scores.get(sim_id, 0) + score * 0.3
# 熱門度分數
pop_scores = {}
max_pop = max(self.popularity.values()) if self.popularity else 1
for mid, pop in self.popularity.items():
if mid not in watched:
pop_scores[mid] = (pop / max_pop) * 0.4
# 合併
final_scores = {}
for mid in set(list(cb_scores.keys()) + list(pop_scores.keys())):
final_scores[mid] = cb_scores.get(mid, 0) + pop_scores.get(mid, 0)
sorted_movies = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)[:n]
results = []
for mid, score in sorted_movies:
movie = self.movies[self.movies['movieId'] == mid].iloc[0]
results.append({
'movie_id': int(mid),
'title': movie['title'],
'genres': movie['genres'],
'score': round(float(score), 4)
})
return results
def popular_recommendations(self, n=10):
"""熱門推薦(冷啟動解決方案)"""
movie_stats = self.ratings.groupby('movieId').agg(
count=('rating', 'count'),
avg=('rating', 'mean')
)
movie_stats = movie_stats[movie_stats['count'] >= 10]
movie_stats['score'] = movie_stats['count'] * movie_stats['avg']
top_movies = movie_stats.sort_values('score', ascending=False).head(n)
results = []
for mid, row in top_movies.iterrows():
movie = self.movies[self.movies['movieId'] == mid].iloc[0]
results.append({
'movie_id': int(mid),
'title': movie['title'],
'genres': movie['genres'],
'avg_rating': round(float(row['avg']), 2),
'rating_count': int(row['count'])
})
return results
建立 FastAPI 推薦服務
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import time
app = FastAPI(title="推薦系統 API", version="1.0.0")
# 初始化推薦引擎(在啟動時載入)
engine = RecommendationEngine()
# 簡單的記憶體快取
cache = {}
cache_ttl = 300 # 5 分鐘
class RecommendResponse(BaseModel):
recommendations: List[dict]
cached: bool
processing_time_ms: float
class PopularResponse(BaseModel):
recommendations: List[dict]
@app.on_event("startup")
async def startup_event():
"""服務啟動時的初始化"""
print("推薦系統 API 已啟動!")
print(f"http://localhost:8000/docs 查看 API 文件")
print(f"http://localhost:8000/recommend/1 測試推薦")
@app.get("/")
def root():
return {
"service": "推薦系統 API",
"version": "1.0.0",
"endpoints": {
"/recommend/{user_id}": "為使用者取得個人化推薦",
"/popular": "取得熱門推薦",
"/similar/{movie_id}": "取得與某電影相似的電影",
"/health": "健康檢查"
}
}
@app.get("/health")
def health_check():
return {
"status": "healthy",
"movies_count": len(engine.movies),
"users_count": engine.ratings['userId'].nunique(),
"ratings_count": len(engine.ratings)
}
@app.get("/recommend/{user_id}", response_model=RecommendResponse)
def recommend(user_id: int, n: int = 10):
"""為使用者取得個人化推薦"""
cache_key = f"recommend:{user_id}:{n}"
# 檢查快取
if cache_key in cache:
cached_result, timestamp = cache[cache_key]
if time.time() - timestamp < cache_ttl:
return {
"recommendations": cached_result,
"cached": True,
"processing_time_ms": 0
}
# 產生推薦
start = time.time()
recommendations = engine.hybrid_recommend(user_id, n)
elapsed = (time.time() - start) * 1000
# 寫入快取
cache[cache_key] = (recommendations, time.time())
return {
"recommendations": recommendations,
"cached": False,
"processing_time_ms": round(elapsed, 2)
}
@app.get("/popular", response_model=PopularResponse)
def popular(n: int = 10):
"""取得熱門推薦(給新使用者)"""
return {
"recommendations": engine.popular_recommendations(n)
}
@app.get("/similar/{movie_id}")
def similar_movies(movie_id: int, n: int = 10):
"""取得與某電影相似的電影"""
results = engine.content_based(movie_id, n)
if not results:
raise HTTPException(status_code=404, detail=f"找不到電影 ID: {movie_id}")
return {"movie_id": movie_id, "similar_movies": results}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
測試 API
# 啟動服務
uvicorn recommend_api:app --host 0.0.0.0 --port 8000 --reload
# 測試推薦
curl http://localhost:8000/recommend/1?n=5
# 測試熱門推薦
curl http://localhost:8000/popular?n=10
# 測試相似電影
curl http://localhost:8000/similar/1
# 測試健康檢查
curl http://localhost:8000/health
部署到 Docker
FROM python:3.11-slim
WORKDIR /app
# 安裝套件
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 複製程式碼與資料
COPY . .
# 暴露埠
EXPOSE 8000
# 啟動
CMD ["uvicorn", "recommend_api:app", "--host", "0.0.0.0", "--port", "8000"]
requirements.txt:
fastapi
uvicorn
pandas
numpy
scikit-learn
joblib
pydantic
使用 Vibe Coding 部署 API
🔥 【推薦 API 部署詠唱範例】
「請幫我將推薦系統打包成 Docker 服務:1. 使用 Python 3.11-slim 作為基底。2. 安裝 fastapi、uvicorn、pandas、numpy、scikit-learn。3. 複製推薦引擎程式碼與資料集。4. 使用 uvicorn 啟動,8000 埠。5. 設定 HEALTHCHECK 端點。6. 寫一個 docker-compose.yml 包含此服務。」
本日總結
在本章中,你學到了:
- ✅ 推薦引擎封裝:將所有推薦邏輯包裝成可重複使用的類別
- ✅ FastAPI 服務:建立三個推薦端點(個人化、熱門、相似)
- ✅ 快取層:使用記憶體快取減少重複計算
- ✅ 錯誤處理:使用者不存在或電影 ID 不正確時的處理
- ✅ Docker 部署:將推薦 API 容器化,方便部署到雲端
恭喜你完成了整個 推薦系統引擎 課程!
你現在已經具備了:
- 🎯 內容為本推薦(Content-Based Filtering)
- 👥 協同過濾(User-based / Item-based CF)
- 🔮 SVD 矩陣分解(Netflix Prize 演算法)
- 🚀 混合式推薦(結合多種方法)
- ❄️ 冷啟動解決方案
- 📊 推薦系統評估(Precision/Recall/Diversity/Novelty)
- 🌐 推薦 API 部署(FastAPI + Docker)
這些技能無論是在電商平台建立「猜你喜歡」功能、為內容平台打造個人化推薦、或是作為推薦系統工程師求職,都能讓你擁有巨大的競爭優勢!