Skip to content

Instantly share code, notes, and snippets.

@mascot6699
Last active January 30, 2025 18:58
Show Gist options
  • Select an option

  • Save mascot6699/6fd5abc0e873955884d922135e4ab43a to your computer and use it in GitHub Desktop.

Select an option

Save mascot6699/6fd5abc0e873955884d922135e4ab43a to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.preprocessing import StandardScaler
from typing import List, Dict, Tuple
import pandas as pd
class PredictionModel:
"""
Predicts probability of user interactions with videos
"""
def __init__(self):
self.scaler = StandardScaler()
def prepare_features(self, user_data: Dict, video_data: Dict) -> np.ndarray:
"""
Prepares features for prediction from user and video data
"""
features = [
# User features
len(user_data.get('recently_watched', [])),
user_data.get('total_watch_time', 0),
user_data.get('like_rate', 0),
user_data.get('comment_rate', 0),
# Video features
video_data.get('duration', 0),
video_data.get('like_count', 0),
video_data.get('comment_count', 0),
video_data.get('share_count', 0),
video_data.get('author_follower_count', 0),
# Interaction features
1 if video_data.get('author_id') in user_data.get('following_ids', []) else 0,
video_data.get('same_tag_today', 0)
]
return np.array(features).reshape(1, -1)
def predict(self, user_data: Dict, video_data: Dict) -> Dict[str, float]:
"""
Predicts probabilities of different user interactions
"""
features = self.prepare_features(user_data, video_data)
# Simplified prediction logic - in reality would use trained ML models
like_prob = np.clip(features[0][-3] / 100, 0, 1) # Based on like_count
comment_prob = np.clip(features[0][-4] / 50, 0, 1) # Based on comment_count
share_prob = np.clip(features[0][-5] / 30, 0, 1) # Based on share_count
completion_prob = np.clip(0.8 - features[0][4] / 300, 0, 1) # Based on duration
return {
'like_probability': float(like_prob),
'comment_probability': float(comment_prob),
'share_probability': float(share_prob),
'completion_probability': float(completion_prob)
}
class ValueModel:
"""
Calculates expected value of recommending a video to a user
"""
def __init__(self):
# Base values for different interactions
self.base_values = {
'like': 10.0,
'comment': 5.0,
'share': 15.0,
'completion': 20.0
}
def calculate_value(self, predictions: Dict[str, float], user_data: Dict, video_data: Dict) -> float:
"""
Calculates total expected value based on predictions and context
"""
total_value = 0.0
# User value components
like_value = (
self.base_values['like'] * predictions['like_probability'] *
(1 - 0.1 * video_data.get('same_tag_today', 0)) # Decay for content fatigue
)
comment_value = (
self.base_values['comment'] * predictions['comment_probability'] *
(1.5 if video_data.get('is_high_quality', False) else 1.0) # Boost for high-quality content
)
share_value = (
self.base_values['share'] * predictions['share_probability'] *
(1.2 if video_data.get('author_id') in user_data.get('following_ids', []) else 1.0) # Boost for followed creators
)
completion_value = (
self.base_values['completion'] * predictions['completion_probability'] *
(0.8 if video_data.get('is_like_bait', False) else 1.0) # Penalty for like-bait content
)
total_value = like_value + comment_value + share_value + completion_value
# Platform value adjustments
if video_data.get('is_brand_safe', True):
total_value *= 1.1
# Author value adjustments
if video_data.get('author_type') == 'rising_creator':
total_value *= 1.2
return total_value
class RecommendationSystem:
"""
Main recommendation system that combines prediction and value models
"""
def __init__(self):
self.prediction_model = PredictionModel()
self.value_model = ValueModel()
def rank_videos(self, user_data: Dict, candidate_videos: List[Dict]) -> List[Tuple[Dict, float]]:
"""
Ranks candidate videos based on predicted value for the user
"""
ranked_videos = []
for video in candidate_videos:
# Get interaction predictions
predictions = self.prediction_model.predict(user_data, video)
# Calculate expected value
value = self.value_model.calculate_value(predictions, user_data, video)
ranked_videos.append((video, value))
# Sort by value in descending order
ranked_videos.sort(key=lambda x: x[1], reverse=True)
return ranked_videos
def get_recommendations(self, user_data: Dict, candidate_videos: List[Dict], n_recommendations: int = 10) -> List[Dict]:
"""
Gets top N recommendations for a user
"""
ranked_videos = self.rank_videos(user_data, candidate_videos)
# Apply diversity rules
final_recommendations = []
seen_authors = set()
seen_tags = set()
for video, _ in ranked_videos:
# Skip if we have enough recommendations
if len(final_recommendations) >= n_recommendations:
break
# Apply diversity rules
author_id = video.get('author_id')
main_tag = video.get('main_tag')
# Skip if we've seen too many from this author or tag
if author_id in seen_authors:
continue
if main_tag in seen_tags and len(seen_tags) > n_recommendations / 2:
continue
final_recommendations.append(video)
seen_authors.add(author_id)
seen_tags.add(main_tag)
return final_recommendations
# Example usage
if __name__ == "__main__":
# Sample data
user_data = {
'user_id': '12345',
'recently_watched': ['vid1', 'vid2', 'vid3'],
'total_watch_time': 3600,
'like_rate': 0.2,
'comment_rate': 0.05,
'following_ids': ['auth1', 'auth2']
}
candidate_videos = [
{
'video_id': 'vid4',
'duration': 120,
'like_count': 1000,
'comment_count': 200,
'share_count': 150,
'author_id': 'auth1',
'author_follower_count': 10000,
'main_tag': 'comedy',
'same_tag_today': 2,
'is_high_quality': True,
'is_like_bait': False,
'author_type': 'rising_creator'
},
# Add more candidate videos here
]
# Initialize and use recommendation system
rec_system = RecommendationSystem()
recommendations = rec_system.get_recommendations(user_data, candidate_videos, n_recommendations=5)
import random
from dataclasses import dataclass
from typing import List, Dict
# =====================
# Data Structures
# =====================
@dataclass
class Video:
video_id: str
author_id: str
tags: List[str]
views: int = 0
is_cold_start: bool = False
is_friend_author: bool = False
is_like_bait: bool = False
@dataclass
class User:
user_id: str
watched_videos: List[str]
followed_authors: List[str]
location: str = ""
@dataclass
class Context:
experiment_groups: List[str]
connection_type: str = "wifi"
# =====================
# Prediction Model (Mock)
# =====================
class PredictionModel:
def predict(self, user: User, video: Video) -> Dict[str, float]:
"""Mock prediction model returning interaction probabilities"""
base_like = 0.1
if video.author_id in user.followed_authors:
base_like += 0.2
if any(tag in ["sports", "music"] for tag in video.tags):
base_like += 0.15
return {
"like": random.uniform(0, 0.5) + base_like,
"comment": random.uniform(0, 0.1),
"share": random.uniform(0, 0.05),
"watch_duration": random.uniform(0.5, 1.0)
}
# =====================
# Value Model
# =====================
class ValueModel:
def calculate_value(self, preds: Dict[str, float], video: Video, context: Context) -> float:
"""Calculate video value based on predictions and business rules"""
# Base values for interactions
like_value = 10 * preds['like']
comment_value = (3 + 2 * video.is_friend_author) * preds['comment']
share_value = 5 * preds['share']
# Cold start boost
cold_start_boost = 2.0 if video.is_cold_start else 0
# Like bait penalty
like_bait_penalty = -3.0 if video.is_like_bait else 0
# Watch duration importance
duration_value = 8 * preds['watch_duration']
# Experiment adjustments
if "cold_start_boost_v2" in context.experiment_groups:
cold_start_boost *= 1.5
total_value = (
like_value + comment_value + share_value +
duration_value + cold_start_boost + like_bait_penalty
)
return total_value
# =====================
# Recommendation System
# =====================
class RecommendationSystem:
def __init__(self):
self.prediction_model = PredictionModel()
self.value_model = ValueModel()
def generate_candidates(self, user: User, context: Context) -> List[Video]:
"""Mock candidate generation from different recall sources"""
candidates = []
# Play duration recall (mock 20 videos)
for i in range(20):
candidates.append(Video(
video_id=f"v{i}",
author_id=f"a{random.randint(1,100)}",
tags=random.choices(["sports", "music", "tech", "food"], k=2),
views=random.randint(0, 100000),
is_cold_start=random.random() < 0.2,
is_friend_author=random.random() < 0.3,
is_like_bait=random.random() < 0.1
))
return candidates
def apply_rules(self, ranked_videos: List[Video], context: Context) -> List[Video]:
"""Apply business rules to final ranking"""
filtered = []
friend_videos_shown = 0
for video in ranked_videos:
# Rule: Max 2 friend videos
if video.is_friend_author:
if friend_videos_shown >= 2:
continue
friend_videos_shown += 1
# Rule: No like-bait videos in experiment group
if "no_like_bait" in context.experiment_groups and video.is_like_bait:
continue
filtered.append(video)
return filtered
def recommend(self, user: User, context: Context, top_n: int = 10) -> List[Video]:
"""Main recommendation pipeline"""
# Stage 1: Candidate Generation
candidates = self.generate_candidates(user, context)
# Stage 2: Scoring
scored_videos = []
for video in candidates:
preds = self.prediction_model.predict(user, video)
value = self.value_model.calculate_value(preds, video, context)
scored_videos.append((value, video))
# Sort by descending value
scored_videos.sort(reverse=True, key=lambda x: x[0])
# Stage 3: Apply Rules
ranked_videos = [v[1] for v in scored_videos]
filtered_videos = self.apply_rules(ranked_videos, context)
return filtered_videos[:top_n]
# =====================
# Example Usage
# =====================
if __name__ == "__main__":
# Initialize system
rec_system = RecommendationSystem()
# Create mock user
user = User(
user_id="u123",
watched_videos=["v1", "v5", "v10"],
followed_authors=["a5", "a10"],
location="New York"
)
# Create context
context = Context(
experiment_groups=["cold_start_boost_v2"],
connection_type="wifi"
)
# Generate recommendations
recommendations = rec_system.recommend(user, context)
# Display results
print("Top Recommendations:")
for i, video in enumerate(recommendations, 1):
print(f"{i}. Video {video.video_id} (Author: {video.author_id})")
print(f" Tags: {', '.join(video.tags)}")
print(f" Friend: {video.is_friend_author}, Cold Start: {video.is_cold_start}")
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment