Created
December 4, 2025 10:02
-
-
Save zcaudate/55e0d7abac75a9e6899d89151a8bb296 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import time | |
| from typing import Dict, Any, List | |
| # In a real scenario, you would install these libraries: | |
| # pip install google-cloud-aiplatform google-cloud-pubsub | |
| # Mocking the library imports for demonstration purposes if not available in environment | |
| try: | |
| from google.cloud import aiplatform | |
| import vertexai | |
| from vertexai.generative_models import GenerativeModel | |
| except ImportError: | |
| print("Google Cloud SDK not found. This code requires 'google-cloud-aiplatform'.") | |
| # Define dummy classes to allow the code to be read/checked without crashing | |
| class GenerativeModel: | |
| def __init__(self, model_name): pass | |
| def generate_content(self, prompt): return type('obj', (object,), {'text': '{"niche": "Travel", "is_influencer": true, "contact_intent": true}'}) | |
| # Configuration | |
| PROJECT_ID = "your-project-id" | |
| LOCATION = "us-central1" | |
| MODEL_NAME = "gemini-1.5-flash-001" | |
| def init_vertex_ai(): | |
| """Initializes the Vertex AI SDK.""" | |
| try: | |
| vertexai.init(project=PROJECT_ID, location=LOCATION) | |
| print(f"Vertex AI initialized for project {PROJECT_ID}") | |
| except Exception as e: | |
| print(f"Failed to initialize Vertex AI: {e}") | |
| def fetch_profile_data_via_api(user_id: str) -> Dict[str, Any]: | |
| """ | |
| Simulates fetching a user profile using the official Instagram Graph API. | |
| In a production environment, this would use the `requests` library to hit: | |
| GET graph.facebook.com/{api-version}/{user-id}?fields=biography,media_count,followers_count&access_token={token} | |
| """ | |
| print(f"Fetching data for User ID: {user_id} via Graph API...") | |
| # Mock response data representing a legitimate API payload | |
| # Dynamic data based on user_id for demonstration | |
| mock_response = { | |
| "id": user_id, | |
| "username": f"user_{user_id}_travels", | |
| "biography": "Exploring the world one city at a time. ✈️ Photographer | Blogger. Contact for collabs.", | |
| "media_count": 450 + int(user_id[-2:]), | |
| "followers_count": 12000 + (int(user_id[-1]) * 500) | |
| } | |
| # Simulate network latency | |
| time.sleep(0.5) | |
| return mock_response | |
| def analyze_profile_with_vertex(profile_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Uses Vertex AI (Gemini) to categorize the profile based on the biography. | |
| """ | |
| print(f"Sending profile {profile_data['username']} to Vertex AI for analysis...") | |
| model = GenerativeModel(MODEL_NAME) | |
| # Construct a prompt for the AI | |
| prompt = | |
| """ | |
| Analyze the following Instagram profile data and output a valid JSON object (no markdown) with: | |
| 1. 'niche': The likely niche of the user (e.g., Travel, Tech, Food). | |
| 2. 'is_influencer': Boolean, based on follower count > 10000. | |
| 3. 'contact_intent': Boolean, if they are open to business/collabs. | |
| Profile Data: | |
| Username: {profile_data['username']} | |
| Bio: {profile_data['biography']} | |
| Followers: {profile_data['followers_count']} | |
| """ | |
| try: | |
| # Generate response | |
| response = model.generate_content(prompt) | |
| # Clean response text to ensure valid JSON parsing (strip markdown backticks) | |
| raw_text = response.text.strip() | |
| if raw_text.startswith("```json"): | |
| raw_text = raw_text[7:] | |
| if raw_text.startswith("```"): | |
| raw_text = raw_text[3:] | |
| if raw_text.endswith("```"): | |
| raw_text = raw_text[:-3] | |
| # Parse the cleaned string into a real dictionary | |
| analysis = json.loads(raw_text) | |
| # Merge the AI insights with the original data | |
| enriched_data = { | |
| **profile_data, | |
| "ai_analysis": analysis | |
| } | |
| return enriched_data | |
| except json.JSONDecodeError: | |
| print(f"Failed to parse JSON for {profile_data['username']}") | |
| return {**profile_data, "error": "AI response was not valid JSON"} | |
| except Exception as e: | |
| print(f"Error during Vertex AI analysis: {e}") | |
| return profile_data | |
| def run_pipeline(user_ids: List[str]): | |
| """ | |
| Orchestrates the pipeline: Ingest -> Process -> Output. | |
| """ | |
| init_vertex_ai() | |
| processed_profiles = [] | |
| for uid in user_ids: | |
| # Step 1: Ingest (Compliance: Using official API) | |
| raw_data = fetch_profile_data_via_api(uid) | |
| # Step 2: Process (Vertex AI) | |
| enriched_data = analyze_profile_with_vertex(raw_data) | |
| processed_profiles.append(enriched_data) | |
| print(f"Successfully processed {raw_data['username']}\n") | |
| # Step 3: Storage (Mocking database insertion) | |
| print("--- Pipeline Summary ---") | |
| print(json.dumps(processed_profiles, indent=2)) | |
| print("Data ready for BigQuery or Firestore insertion.") | |
| if __name__ == "__main__": | |
| # Example User IDs to process | |
| target_users = ["17841400000000001", "17841400000000002"] | |
| run_pipeline(target_users) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment