mascot6699/recc-claude.py

## recc-claude.py
import numpy as np
from sklearn.preprocessing import StandardScaler
from typing import List, Dict, Tuple
import pandas as pd

class PredictionModel:
    """
    Predicts probability of user interactions with videos
    """
    def __init__(self):
        self.scaler = StandardScaler()

    def prepare_features(self, user_data: Dict, video_data: Dict) -> np.ndarray:
        """
        Prepares features for prediction from user and video data
        """
        features = [
            # User features
            len(user_data.get('recently_watched', [])),
            user_data.get('total_watch_time', 0),
            user_data.get('like_rate', 0),
            user_data.get('comment_rate', 0),

            # Video features
            video_data.get('duration', 0),
            video_data.get('like_count', 0),
            video_data.get('comment_count', 0),
            video_data.get('share_count', 0),
            video_data.get('author_follower_count', 0),

            # Interaction features
            1 if video_data.get('author_id') in user_data.get('following_ids', []) else 0,
            video_data.get('same_tag_today', 0)
        ]
        return np.array(features).reshape(1, -1)

    def predict(self, user_data: Dict, video_data: Dict) -> Dict[str, float]:
        """
        Predicts probabilities of different user interactions
        """
        features = self.prepare_features(user_data, video_data)

        # Simplified prediction logic - in reality would use trained ML models
        like_prob = np.clip(features[0][-3] / 100, 0, 1)  # Based on like_count
        comment_prob = np.clip(features[0][-4] / 50, 0, 1)  # Based on comment_count
        share_prob = np.clip(features[0][-5] / 30, 0, 1)  # Based on share_count
        completion_prob = np.clip(0.8 - features[0][4] / 300, 0, 1)  # Based on duration

        return {
            'like_probability': float(like_prob),
            'comment_probability': float(comment_prob),
            'share_probability': float(share_prob),
            'completion_probability': float(completion_prob)
        }

class ValueModel:
    """
    Calculates expected value of recommending a video to a user
    """
    def __init__(self):
        # Base values for different interactions
        self.base_values = {
            'like': 10.0,
            'comment': 5.0,
            'share': 15.0,
            'completion': 20.0
        }

    def calculate_value(self, predictions: Dict[str, float], user_data: Dict, video_data: Dict) -> float:
        """
        Calculates total expected value based on predictions and context
        """
        total_value = 0.0

        # User value components
        like_value = (
            self.base_values['like'] * predictions['like_probability'] *
            (1 - 0.1 * video_data.get('same_tag_today', 0))  # Decay for content fatigue
        )

        comment_value = (
            self.base_values['comment'] * predictions['comment_probability'] *
            (1.5 if video_data.get('is_high_quality', False) else 1.0)  # Boost for high-quality content
        )

        share_value = (
            self.base_values['share'] * predictions['share_probability'] *
            (1.2 if video_data.get('author_id') in user_data.get('following_ids', []) else 1.0)  # Boost for followed creators
        )

        completion_value = (
            self.base_values['completion'] * predictions['completion_probability'] *
            (0.8 if video_data.get('is_like_bait', False) else 1.0)  # Penalty for like-bait content
        )

        total_value = like_value + comment_value + share_value + completion_value

        # Platform value adjustments
        if video_data.get('is_brand_safe', True):
            total_value *= 1.1

        # Author value adjustments
        if video_data.get('author_type') == 'rising_creator':
            total_value *= 1.2

        return total_value

class RecommendationSystem:
    """
    Main recommendation system that combines prediction and value models
    """
    def __init__(self):
        self.prediction_model = PredictionModel()
        self.value_model = ValueModel()

    def rank_videos(self, user_data: Dict, candidate_videos: List[Dict]) -> List[Tuple[Dict, float]]:
        """
        Ranks candidate videos based on predicted value for the user
        """
        ranked_videos = []

        for video in candidate_videos:
            # Get interaction predictions
            predictions = self.prediction_model.predict(user_data, video)

            # Calculate expected value
            value = self.value_model.calculate_value(predictions, user_data, video)

            ranked_videos.append((video, value))

        # Sort by value in descending order
        ranked_videos.sort(key=lambda x: x[1], reverse=True)
        return ranked_videos

    def get_recommendations(self, user_data: Dict, candidate_videos: List[Dict], n_recommendations: int = 10) -> List[Dict]:
        """
        Gets top N recommendations for a user
        """
        ranked_videos = self.rank_videos(user_data, candidate_videos)

        # Apply diversity rules
        final_recommendations = []
        seen_authors = set()
        seen_tags = set()

        for video, _ in ranked_videos:
            # Skip if we have enough recommendations
            if len(final_recommendations) >= n_recommendations:
                break

            # Apply diversity rules
            author_id = video.get('author_id')
            main_tag = video.get('main_tag')

            # Skip if we've seen too many from this author or tag
            if author_id in seen_authors:
                continue
            if main_tag in seen_tags and len(seen_tags) > n_recommendations / 2:
                continue

            final_recommendations.append(video)
            seen_authors.add(author_id)
            seen_tags.add(main_tag)

        return final_recommendations

# Example usage
if __name__ == "__main__":
    # Sample data
    user_data = {
        'user_id': '12345',
        'recently_watched': ['vid1', 'vid2', 'vid3'],
        'total_watch_time': 3600,
        'like_rate': 0.2,
        'comment_rate': 0.05,
        'following_ids': ['auth1', 'auth2']
    }

    candidate_videos = [
        {
            'video_id': 'vid4',
            'duration': 120,
            'like_count': 1000,
            'comment_count': 200,
            'share_count': 150,
            'author_id': 'auth1',
            'author_follower_count': 10000,
            'main_tag': 'comedy',
            'same_tag_today': 2,
            'is_high_quality': True,
            'is_like_bait': False,
            'author_type': 'rising_creator'
        },
        # Add more candidate videos here
    ]

    # Initialize and use recommendation system
    rec_system = RecommendationSystem()
    recommendations = rec_system.get_recommendations(user_data, candidate_videos, n_recommendations=5)

## recc-r1.py
import random
from dataclasses import dataclass
from typing import List, Dict

# =====================
# Data Structures
# =====================

@dataclass
class Video:
    video_id: str
    author_id: str
    tags: List[str]
    views: int = 0
    is_cold_start: bool = False
    is_friend_author: bool = False
    is_like_bait: bool = False

@dataclass
class User:
    user_id: str
    watched_videos: List[str]
    followed_authors: List[str]
    location: str = ""

@dataclass
class Context:
    experiment_groups: List[str]
    connection_type: str = "wifi"

# =====================
# Prediction Model (Mock)
# =====================

class PredictionModel:
    def predict(self, user: User, video: Video) -> Dict[str, float]:
        """Mock prediction model returning interaction probabilities"""
        base_like = 0.1
        if video.author_id in user.followed_authors:
            base_like += 0.2
        if any(tag in ["sports", "music"] for tag in video.tags):
            base_like += 0.15

        return {
            "like": random.uniform(0, 0.5) + base_like,
            "comment": random.uniform(0, 0.1),
            "share": random.uniform(0, 0.05),
            "watch_duration": random.uniform(0.5, 1.0)
        }

# =====================
# Value Model
# =====================

class ValueModel:
    def calculate_value(self, preds: Dict[str, float], video: Video, context: Context) -> float:
        """Calculate video value based on predictions and business rules"""
        # Base values for interactions
        like_value = 10 * preds['like']
        comment_value = (3 + 2 * video.is_friend_author) * preds['comment']
        share_value = 5 * preds['share']

        # Cold start boost
        cold_start_boost = 2.0 if video.is_cold_start else 0

        # Like bait penalty
        like_bait_penalty = -3.0 if video.is_like_bait else 0

        # Watch duration importance
        duration_value = 8 * preds['watch_duration']

        # Experiment adjustments
        if "cold_start_boost_v2" in context.experiment_groups:
            cold_start_boost *= 1.5

        total_value = (
            like_value + comment_value + share_value +
            duration_value + cold_start_boost + like_bait_penalty
        )

        return total_value

# =====================
# Recommendation System
# =====================

class RecommendationSystem:
    def __init__(self):
        self.prediction_model = PredictionModel()
        self.value_model = ValueModel()

    def generate_candidates(self, user: User, context: Context) -> List[Video]:
        """Mock candidate generation from different recall sources"""
        candidates = []

        # Play duration recall (mock 20 videos)
        for i in range(20):
            candidates.append(Video(
                video_id=f"v{i}",
                author_id=f"a{random.randint(1,100)}",
                tags=random.choices(["sports", "music", "tech", "food"], k=2),
                views=random.randint(0, 100000),
                is_cold_start=random.random() < 0.2,
                is_friend_author=random.random() < 0.3,
                is_like_bait=random.random() < 0.1
            ))

        return candidates

    def apply_rules(self, ranked_videos: List[Video], context: Context) -> List[Video]:
        """Apply business rules to final ranking"""
        filtered = []
        friend_videos_shown = 0

        for video in ranked_videos:
            # Rule: Max 2 friend videos
            if video.is_friend_author:
                if friend_videos_shown >= 2:
                    continue
                friend_videos_shown += 1

            # Rule: No like-bait videos in experiment group
            if "no_like_bait" in context.experiment_groups and video.is_like_bait:
                continue

            filtered.append(video)

        return filtered

    def recommend(self, user: User, context: Context, top_n: int = 10) -> List[Video]:
        """Main recommendation pipeline"""
        # Stage 1: Candidate Generation
        candidates = self.generate_candidates(user, context)

        # Stage 2: Scoring
        scored_videos = []
        for video in candidates:
            preds = self.prediction_model.predict(user, video)
            value = self.value_model.calculate_value(preds, video, context)
            scored_videos.append((value, video))

        # Sort by descending value
        scored_videos.sort(reverse=True, key=lambda x: x[0])

        # Stage 3: Apply Rules
        ranked_videos = [v[1] for v in scored_videos]
        filtered_videos = self.apply_rules(ranked_videos, context)

        return filtered_videos[:top_n]

# =====================
# Example Usage
# =====================

if __name__ == "__main__":
    # Initialize system
    rec_system = RecommendationSystem()

    # Create mock user
    user = User(
        user_id="u123",
        watched_videos=["v1", "v5", "v10"],
        followed_authors=["a5", "a10"],
        location="New York"
    )

    # Create context
    context = Context(
        experiment_groups=["cold_start_boost_v2"],
        connection_type="wifi"
    )

    # Generate recommendations
    recommendations = rec_system.recommend(user, context)

    # Display results
    print("Top Recommendations:")
    for i, video in enumerate(recommendations, 1):
        print(f"{i}. Video {video.video_id} (Author: {video.author_id})")
        print(f"   Tags: {', '.join(video.tags)}")
        print(f"   Friend: {video.is_friend_author}, Cold Start: {video.is_cold_start}")
        print()
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from typing import List, Dict, Tuple
	import pandas as pd

	class PredictionModel:
	"""
	Predicts probability of user interactions with videos
	"""
	def __init__(self):
	self.scaler = StandardScaler()

	def prepare_features(self, user_data: Dict, video_data: Dict) -> np.ndarray:
	"""
	Prepares features for prediction from user and video data
	"""
	features = [
	# User features
	len(user_data.get('recently_watched', [])),
	user_data.get('total_watch_time', 0),
	user_data.get('like_rate', 0),
	user_data.get('comment_rate', 0),

	# Video features
	video_data.get('duration', 0),
	video_data.get('like_count', 0),
	video_data.get('comment_count', 0),
	video_data.get('share_count', 0),
	video_data.get('author_follower_count', 0),

	# Interaction features
	1 if video_data.get('author_id') in user_data.get('following_ids', []) else 0,
	video_data.get('same_tag_today', 0)
	]
	return np.array(features).reshape(1, -1)

	def predict(self, user_data: Dict, video_data: Dict) -> Dict[str, float]:
	"""
	Predicts probabilities of different user interactions
	"""
	features = self.prepare_features(user_data, video_data)

	# Simplified prediction logic - in reality would use trained ML models
	like_prob = np.clip(features[0][-3] / 100, 0, 1) # Based on like_count
	comment_prob = np.clip(features[0][-4] / 50, 0, 1) # Based on comment_count
	share_prob = np.clip(features[0][-5] / 30, 0, 1) # Based on share_count
	completion_prob = np.clip(0.8 - features[0][4] / 300, 0, 1) # Based on duration

	return {
	'like_probability': float(like_prob),
	'comment_probability': float(comment_prob),
	'share_probability': float(share_prob),
	'completion_probability': float(completion_prob)
	}

	class ValueModel:
	"""
	Calculates expected value of recommending a video to a user
	"""
	def __init__(self):
	# Base values for different interactions
	self.base_values = {
	'like': 10.0,
	'comment': 5.0,
	'share': 15.0,
	'completion': 20.0
	}

	def calculate_value(self, predictions: Dict[str, float], user_data: Dict, video_data: Dict) -> float:
	"""
	Calculates total expected value based on predictions and context
	"""
	total_value = 0.0

	# User value components
	like_value = (
	self.base_values['like'] * predictions['like_probability'] *
	(1 - 0.1 * video_data.get('same_tag_today', 0)) # Decay for content fatigue
	)

	comment_value = (
	self.base_values['comment'] * predictions['comment_probability'] *
	(1.5 if video_data.get('is_high_quality', False) else 1.0) # Boost for high-quality content
	)

	share_value = (
	self.base_values['share'] * predictions['share_probability'] *
	(1.2 if video_data.get('author_id') in user_data.get('following_ids', []) else 1.0) # Boost for followed creators
	)

	completion_value = (
	self.base_values['completion'] * predictions['completion_probability'] *
	(0.8 if video_data.get('is_like_bait', False) else 1.0) # Penalty for like-bait content
	)

	total_value = like_value + comment_value + share_value + completion_value

	# Platform value adjustments
	if video_data.get('is_brand_safe', True):
	total_value *= 1.1

	# Author value adjustments
	if video_data.get('author_type') == 'rising_creator':
	total_value *= 1.2

	return total_value

	class RecommendationSystem:
	"""
	Main recommendation system that combines prediction and value models
	"""
	def __init__(self):
	self.prediction_model = PredictionModel()
	self.value_model = ValueModel()

	def rank_videos(self, user_data: Dict, candidate_videos: List[Dict]) -> List[Tuple[Dict, float]]:
	"""
	Ranks candidate videos based on predicted value for the user
	"""
	ranked_videos = []

	for video in candidate_videos:
	# Get interaction predictions
	predictions = self.prediction_model.predict(user_data, video)

	# Calculate expected value
	value = self.value_model.calculate_value(predictions, user_data, video)

	ranked_videos.append((video, value))

	# Sort by value in descending order
	ranked_videos.sort(key=lambda x: x[1], reverse=True)
	return ranked_videos

	def get_recommendations(self, user_data: Dict, candidate_videos: List[Dict], n_recommendations: int = 10) -> List[Dict]:
	"""
	Gets top N recommendations for a user
	"""
	ranked_videos = self.rank_videos(user_data, candidate_videos)

	# Apply diversity rules
	final_recommendations = []
	seen_authors = set()
	seen_tags = set()

	for video, _ in ranked_videos:
	# Skip if we have enough recommendations
	if len(final_recommendations) >= n_recommendations:
	break

	# Apply diversity rules
	author_id = video.get('author_id')
	main_tag = video.get('main_tag')

	# Skip if we've seen too many from this author or tag
	if author_id in seen_authors:
	continue
	if main_tag in seen_tags and len(seen_tags) > n_recommendations / 2:
	continue

	final_recommendations.append(video)
	seen_authors.add(author_id)
	seen_tags.add(main_tag)

	return final_recommendations

	# Example usage
	if __name__ == "__main__":
	# Sample data
	user_data = {
	'user_id': '12345',
	'recently_watched': ['vid1', 'vid2', 'vid3'],
	'total_watch_time': 3600,
	'like_rate': 0.2,
	'comment_rate': 0.05,
	'following_ids': ['auth1', 'auth2']
	}

	candidate_videos = [
	{
	'video_id': 'vid4',
	'duration': 120,
	'like_count': 1000,
	'comment_count': 200,
	'share_count': 150,
	'author_id': 'auth1',
	'author_follower_count': 10000,
	'main_tag': 'comedy',
	'same_tag_today': 2,
	'is_high_quality': True,
	'is_like_bait': False,
	'author_type': 'rising_creator'
	},
	# Add more candidate videos here
	]

	# Initialize and use recommendation system
	rec_system = RecommendationSystem()
	recommendations = rec_system.get_recommendations(user_data, candidate_videos, n_recommendations=5)
	import random
	from dataclasses import dataclass
	from typing import List, Dict

	# =====================
	# Data Structures
	# =====================

	@dataclass
	class Video:
	video_id: str
	author_id: str
	tags: List[str]
	views: int = 0
	is_cold_start: bool = False
	is_friend_author: bool = False
	is_like_bait: bool = False

	@dataclass
	class User:
	user_id: str
	watched_videos: List[str]
	followed_authors: List[str]
	location: str = ""

	@dataclass
	class Context:
	experiment_groups: List[str]
	connection_type: str = "wifi"

	# =====================
	# Prediction Model (Mock)
	# =====================

	class PredictionModel:
	def predict(self, user: User, video: Video) -> Dict[str, float]:
	"""Mock prediction model returning interaction probabilities"""
	base_like = 0.1
	if video.author_id in user.followed_authors:
	base_like += 0.2
	if any(tag in ["sports", "music"] for tag in video.tags):
	base_like += 0.15

	return {
	"like": random.uniform(0, 0.5) + base_like,
	"comment": random.uniform(0, 0.1),
	"share": random.uniform(0, 0.05),
	"watch_duration": random.uniform(0.5, 1.0)
	}

	# =====================
	# Value Model
	# =====================

	class ValueModel:
	def calculate_value(self, preds: Dict[str, float], video: Video, context: Context) -> float:
	"""Calculate video value based on predictions and business rules"""
	# Base values for interactions
	like_value = 10 * preds['like']
	comment_value = (3 + 2 * video.is_friend_author) * preds['comment']
	share_value = 5 * preds['share']

	# Cold start boost
	cold_start_boost = 2.0 if video.is_cold_start else 0

	# Like bait penalty
	like_bait_penalty = -3.0 if video.is_like_bait else 0

	# Watch duration importance
	duration_value = 8 * preds['watch_duration']

	# Experiment adjustments
	if "cold_start_boost_v2" in context.experiment_groups:
	cold_start_boost *= 1.5

	total_value = (
	like_value + comment_value + share_value +
	duration_value + cold_start_boost + like_bait_penalty
	)

	return total_value

	# =====================
	# Recommendation System
	# =====================

	class RecommendationSystem:
	def __init__(self):
	self.prediction_model = PredictionModel()
	self.value_model = ValueModel()

	def generate_candidates(self, user: User, context: Context) -> List[Video]:
	"""Mock candidate generation from different recall sources"""
	candidates = []

	# Play duration recall (mock 20 videos)
	for i in range(20):
	candidates.append(Video(
	video_id=f"v{i}",
	author_id=f"a{random.randint(1,100)}",
	tags=random.choices(["sports", "music", "tech", "food"], k=2),
	views=random.randint(0, 100000),
	is_cold_start=random.random() < 0.2,
	is_friend_author=random.random() < 0.3,
	is_like_bait=random.random() < 0.1
	))

	return candidates

	def apply_rules(self, ranked_videos: List[Video], context: Context) -> List[Video]:
	"""Apply business rules to final ranking"""
	filtered = []
	friend_videos_shown = 0

	for video in ranked_videos:
	# Rule: Max 2 friend videos
	if video.is_friend_author:
	if friend_videos_shown >= 2:
	continue
	friend_videos_shown += 1

	# Rule: No like-bait videos in experiment group
	if "no_like_bait" in context.experiment_groups and video.is_like_bait:
	continue

	filtered.append(video)

	return filtered

	def recommend(self, user: User, context: Context, top_n: int = 10) -> List[Video]:
	"""Main recommendation pipeline"""
	# Stage 1: Candidate Generation
	candidates = self.generate_candidates(user, context)

	# Stage 2: Scoring
	scored_videos = []
	for video in candidates:
	preds = self.prediction_model.predict(user, video)
	value = self.value_model.calculate_value(preds, video, context)
	scored_videos.append((value, video))

	# Sort by descending value
	scored_videos.sort(reverse=True, key=lambda x: x[0])

	# Stage 3: Apply Rules
	ranked_videos = [v[1] for v in scored_videos]
	filtered_videos = self.apply_rules(ranked_videos, context)

	return filtered_videos[:top_n]

	# =====================
	# Example Usage
	# =====================

	if __name__ == "__main__":
	# Initialize system
	rec_system = RecommendationSystem()

	# Create mock user
	user = User(
	user_id="u123",
	watched_videos=["v1", "v5", "v10"],
	followed_authors=["a5", "a10"],
	location="New York"
	)

	# Create context
	context = Context(
	experiment_groups=["cold_start_boost_v2"],
	connection_type="wifi"
	)

	# Generate recommendations
	recommendations = rec_system.recommend(user, context)

	# Display results
	print("Top Recommendations:")
	for i, video in enumerate(recommendations, 1):
	print(f"{i}. Video {video.video_id} (Author: {video.author_id})")
	print(f" Tags: {', '.join(video.tags)}")
	print(f" Friend: {video.is_friend_author}, Cold Start: {video.is_cold_start}")
	print()