Last active
January 30, 2025 18:58
-
-
Save mascot6699/6fd5abc0e873955884d922135e4ab43a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler | |
| from typing import List, Dict, Tuple | |
| import pandas as pd | |
| class PredictionModel: | |
| """ | |
| Predicts probability of user interactions with videos | |
| """ | |
| def __init__(self): | |
| self.scaler = StandardScaler() | |
| def prepare_features(self, user_data: Dict, video_data: Dict) -> np.ndarray: | |
| """ | |
| Prepares features for prediction from user and video data | |
| """ | |
| features = [ | |
| # User features | |
| len(user_data.get('recently_watched', [])), | |
| user_data.get('total_watch_time', 0), | |
| user_data.get('like_rate', 0), | |
| user_data.get('comment_rate', 0), | |
| # Video features | |
| video_data.get('duration', 0), | |
| video_data.get('like_count', 0), | |
| video_data.get('comment_count', 0), | |
| video_data.get('share_count', 0), | |
| video_data.get('author_follower_count', 0), | |
| # Interaction features | |
| 1 if video_data.get('author_id') in user_data.get('following_ids', []) else 0, | |
| video_data.get('same_tag_today', 0) | |
| ] | |
| return np.array(features).reshape(1, -1) | |
| def predict(self, user_data: Dict, video_data: Dict) -> Dict[str, float]: | |
| """ | |
| Predicts probabilities of different user interactions | |
| """ | |
| features = self.prepare_features(user_data, video_data) | |
| # Simplified prediction logic - in reality would use trained ML models | |
| like_prob = np.clip(features[0][-3] / 100, 0, 1) # Based on like_count | |
| comment_prob = np.clip(features[0][-4] / 50, 0, 1) # Based on comment_count | |
| share_prob = np.clip(features[0][-5] / 30, 0, 1) # Based on share_count | |
| completion_prob = np.clip(0.8 - features[0][4] / 300, 0, 1) # Based on duration | |
| return { | |
| 'like_probability': float(like_prob), | |
| 'comment_probability': float(comment_prob), | |
| 'share_probability': float(share_prob), | |
| 'completion_probability': float(completion_prob) | |
| } | |
| class ValueModel: | |
| """ | |
| Calculates expected value of recommending a video to a user | |
| """ | |
| def __init__(self): | |
| # Base values for different interactions | |
| self.base_values = { | |
| 'like': 10.0, | |
| 'comment': 5.0, | |
| 'share': 15.0, | |
| 'completion': 20.0 | |
| } | |
| def calculate_value(self, predictions: Dict[str, float], user_data: Dict, video_data: Dict) -> float: | |
| """ | |
| Calculates total expected value based on predictions and context | |
| """ | |
| total_value = 0.0 | |
| # User value components | |
| like_value = ( | |
| self.base_values['like'] * predictions['like_probability'] * | |
| (1 - 0.1 * video_data.get('same_tag_today', 0)) # Decay for content fatigue | |
| ) | |
| comment_value = ( | |
| self.base_values['comment'] * predictions['comment_probability'] * | |
| (1.5 if video_data.get('is_high_quality', False) else 1.0) # Boost for high-quality content | |
| ) | |
| share_value = ( | |
| self.base_values['share'] * predictions['share_probability'] * | |
| (1.2 if video_data.get('author_id') in user_data.get('following_ids', []) else 1.0) # Boost for followed creators | |
| ) | |
| completion_value = ( | |
| self.base_values['completion'] * predictions['completion_probability'] * | |
| (0.8 if video_data.get('is_like_bait', False) else 1.0) # Penalty for like-bait content | |
| ) | |
| total_value = like_value + comment_value + share_value + completion_value | |
| # Platform value adjustments | |
| if video_data.get('is_brand_safe', True): | |
| total_value *= 1.1 | |
| # Author value adjustments | |
| if video_data.get('author_type') == 'rising_creator': | |
| total_value *= 1.2 | |
| return total_value | |
| class RecommendationSystem: | |
| """ | |
| Main recommendation system that combines prediction and value models | |
| """ | |
| def __init__(self): | |
| self.prediction_model = PredictionModel() | |
| self.value_model = ValueModel() | |
| def rank_videos(self, user_data: Dict, candidate_videos: List[Dict]) -> List[Tuple[Dict, float]]: | |
| """ | |
| Ranks candidate videos based on predicted value for the user | |
| """ | |
| ranked_videos = [] | |
| for video in candidate_videos: | |
| # Get interaction predictions | |
| predictions = self.prediction_model.predict(user_data, video) | |
| # Calculate expected value | |
| value = self.value_model.calculate_value(predictions, user_data, video) | |
| ranked_videos.append((video, value)) | |
| # Sort by value in descending order | |
| ranked_videos.sort(key=lambda x: x[1], reverse=True) | |
| return ranked_videos | |
| def get_recommendations(self, user_data: Dict, candidate_videos: List[Dict], n_recommendations: int = 10) -> List[Dict]: | |
| """ | |
| Gets top N recommendations for a user | |
| """ | |
| ranked_videos = self.rank_videos(user_data, candidate_videos) | |
| # Apply diversity rules | |
| final_recommendations = [] | |
| seen_authors = set() | |
| seen_tags = set() | |
| for video, _ in ranked_videos: | |
| # Skip if we have enough recommendations | |
| if len(final_recommendations) >= n_recommendations: | |
| break | |
| # Apply diversity rules | |
| author_id = video.get('author_id') | |
| main_tag = video.get('main_tag') | |
| # Skip if we've seen too many from this author or tag | |
| if author_id in seen_authors: | |
| continue | |
| if main_tag in seen_tags and len(seen_tags) > n_recommendations / 2: | |
| continue | |
| final_recommendations.append(video) | |
| seen_authors.add(author_id) | |
| seen_tags.add(main_tag) | |
| return final_recommendations | |
| # Example usage | |
| if __name__ == "__main__": | |
| # Sample data | |
| user_data = { | |
| 'user_id': '12345', | |
| 'recently_watched': ['vid1', 'vid2', 'vid3'], | |
| 'total_watch_time': 3600, | |
| 'like_rate': 0.2, | |
| 'comment_rate': 0.05, | |
| 'following_ids': ['auth1', 'auth2'] | |
| } | |
| candidate_videos = [ | |
| { | |
| 'video_id': 'vid4', | |
| 'duration': 120, | |
| 'like_count': 1000, | |
| 'comment_count': 200, | |
| 'share_count': 150, | |
| 'author_id': 'auth1', | |
| 'author_follower_count': 10000, | |
| 'main_tag': 'comedy', | |
| 'same_tag_today': 2, | |
| 'is_high_quality': True, | |
| 'is_like_bait': False, | |
| 'author_type': 'rising_creator' | |
| }, | |
| # Add more candidate videos here | |
| ] | |
| # Initialize and use recommendation system | |
| rec_system = RecommendationSystem() | |
| recommendations = rec_system.get_recommendations(user_data, candidate_videos, n_recommendations=5) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| from dataclasses import dataclass | |
| from typing import List, Dict | |
| # ===================== | |
| # Data Structures | |
| # ===================== | |
| @dataclass | |
| class Video: | |
| video_id: str | |
| author_id: str | |
| tags: List[str] | |
| views: int = 0 | |
| is_cold_start: bool = False | |
| is_friend_author: bool = False | |
| is_like_bait: bool = False | |
| @dataclass | |
| class User: | |
| user_id: str | |
| watched_videos: List[str] | |
| followed_authors: List[str] | |
| location: str = "" | |
| @dataclass | |
| class Context: | |
| experiment_groups: List[str] | |
| connection_type: str = "wifi" | |
| # ===================== | |
| # Prediction Model (Mock) | |
| # ===================== | |
| class PredictionModel: | |
| def predict(self, user: User, video: Video) -> Dict[str, float]: | |
| """Mock prediction model returning interaction probabilities""" | |
| base_like = 0.1 | |
| if video.author_id in user.followed_authors: | |
| base_like += 0.2 | |
| if any(tag in ["sports", "music"] for tag in video.tags): | |
| base_like += 0.15 | |
| return { | |
| "like": random.uniform(0, 0.5) + base_like, | |
| "comment": random.uniform(0, 0.1), | |
| "share": random.uniform(0, 0.05), | |
| "watch_duration": random.uniform(0.5, 1.0) | |
| } | |
| # ===================== | |
| # Value Model | |
| # ===================== | |
| class ValueModel: | |
| def calculate_value(self, preds: Dict[str, float], video: Video, context: Context) -> float: | |
| """Calculate video value based on predictions and business rules""" | |
| # Base values for interactions | |
| like_value = 10 * preds['like'] | |
| comment_value = (3 + 2 * video.is_friend_author) * preds['comment'] | |
| share_value = 5 * preds['share'] | |
| # Cold start boost | |
| cold_start_boost = 2.0 if video.is_cold_start else 0 | |
| # Like bait penalty | |
| like_bait_penalty = -3.0 if video.is_like_bait else 0 | |
| # Watch duration importance | |
| duration_value = 8 * preds['watch_duration'] | |
| # Experiment adjustments | |
| if "cold_start_boost_v2" in context.experiment_groups: | |
| cold_start_boost *= 1.5 | |
| total_value = ( | |
| like_value + comment_value + share_value + | |
| duration_value + cold_start_boost + like_bait_penalty | |
| ) | |
| return total_value | |
| # ===================== | |
| # Recommendation System | |
| # ===================== | |
| class RecommendationSystem: | |
| def __init__(self): | |
| self.prediction_model = PredictionModel() | |
| self.value_model = ValueModel() | |
| def generate_candidates(self, user: User, context: Context) -> List[Video]: | |
| """Mock candidate generation from different recall sources""" | |
| candidates = [] | |
| # Play duration recall (mock 20 videos) | |
| for i in range(20): | |
| candidates.append(Video( | |
| video_id=f"v{i}", | |
| author_id=f"a{random.randint(1,100)}", | |
| tags=random.choices(["sports", "music", "tech", "food"], k=2), | |
| views=random.randint(0, 100000), | |
| is_cold_start=random.random() < 0.2, | |
| is_friend_author=random.random() < 0.3, | |
| is_like_bait=random.random() < 0.1 | |
| )) | |
| return candidates | |
| def apply_rules(self, ranked_videos: List[Video], context: Context) -> List[Video]: | |
| """Apply business rules to final ranking""" | |
| filtered = [] | |
| friend_videos_shown = 0 | |
| for video in ranked_videos: | |
| # Rule: Max 2 friend videos | |
| if video.is_friend_author: | |
| if friend_videos_shown >= 2: | |
| continue | |
| friend_videos_shown += 1 | |
| # Rule: No like-bait videos in experiment group | |
| if "no_like_bait" in context.experiment_groups and video.is_like_bait: | |
| continue | |
| filtered.append(video) | |
| return filtered | |
| def recommend(self, user: User, context: Context, top_n: int = 10) -> List[Video]: | |
| """Main recommendation pipeline""" | |
| # Stage 1: Candidate Generation | |
| candidates = self.generate_candidates(user, context) | |
| # Stage 2: Scoring | |
| scored_videos = [] | |
| for video in candidates: | |
| preds = self.prediction_model.predict(user, video) | |
| value = self.value_model.calculate_value(preds, video, context) | |
| scored_videos.append((value, video)) | |
| # Sort by descending value | |
| scored_videos.sort(reverse=True, key=lambda x: x[0]) | |
| # Stage 3: Apply Rules | |
| ranked_videos = [v[1] for v in scored_videos] | |
| filtered_videos = self.apply_rules(ranked_videos, context) | |
| return filtered_videos[:top_n] | |
| # ===================== | |
| # Example Usage | |
| # ===================== | |
| if __name__ == "__main__": | |
| # Initialize system | |
| rec_system = RecommendationSystem() | |
| # Create mock user | |
| user = User( | |
| user_id="u123", | |
| watched_videos=["v1", "v5", "v10"], | |
| followed_authors=["a5", "a10"], | |
| location="New York" | |
| ) | |
| # Create context | |
| context = Context( | |
| experiment_groups=["cold_start_boost_v2"], | |
| connection_type="wifi" | |
| ) | |
| # Generate recommendations | |
| recommendations = rec_system.recommend(user, context) | |
| # Display results | |
| print("Top Recommendations:") | |
| for i, video in enumerate(recommendations, 1): | |
| print(f"{i}. Video {video.video_id} (Author: {video.author_id})") | |
| print(f" Tags: {', '.join(video.tags)}") | |
| print(f" Friend: {video.is_friend_author}, Cold Start: {video.is_cold_start}") | |
| print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment