Skip to content

Instantly share code, notes, and snippets.

@zcaudate
Created December 4, 2025 10:02
Show Gist options
  • Select an option

  • Save zcaudate/55e0d7abac75a9e6899d89151a8bb296 to your computer and use it in GitHub Desktop.

Select an option

Save zcaudate/55e0d7abac75a9e6899d89151a8bb296 to your computer and use it in GitHub Desktop.
import os
import json
import time
from typing import Dict, Any, List
# In a real scenario, you would install these libraries:
# pip install google-cloud-aiplatform google-cloud-pubsub
# Mocking the library imports for demonstration purposes if not available in environment
try:
from google.cloud import aiplatform
import vertexai
from vertexai.generative_models import GenerativeModel
except ImportError:
print("Google Cloud SDK not found. This code requires 'google-cloud-aiplatform'.")
# Define dummy classes to allow the code to be read/checked without crashing
class GenerativeModel:
def __init__(self, model_name): pass
def generate_content(self, prompt): return type('obj', (object,), {'text': '{"niche": "Travel", "is_influencer": true, "contact_intent": true}'})
# Configuration
PROJECT_ID = "your-project-id"
LOCATION = "us-central1"
MODEL_NAME = "gemini-1.5-flash-001"
def init_vertex_ai():
"""Initializes the Vertex AI SDK."""
try:
vertexai.init(project=PROJECT_ID, location=LOCATION)
print(f"Vertex AI initialized for project {PROJECT_ID}")
except Exception as e:
print(f"Failed to initialize Vertex AI: {e}")
def fetch_profile_data_via_api(user_id: str) -> Dict[str, Any]:
"""
Simulates fetching a user profile using the official Instagram Graph API.
In a production environment, this would use the `requests` library to hit:
GET graph.facebook.com/{api-version}/{user-id}?fields=biography,media_count,followers_count&access_token={token}
"""
print(f"Fetching data for User ID: {user_id} via Graph API...")
# Mock response data representing a legitimate API payload
# Dynamic data based on user_id for demonstration
mock_response = {
"id": user_id,
"username": f"user_{user_id}_travels",
"biography": "Exploring the world one city at a time. ✈️ Photographer | Blogger. Contact for collabs.",
"media_count": 450 + int(user_id[-2:]),
"followers_count": 12000 + (int(user_id[-1]) * 500)
}
# Simulate network latency
time.sleep(0.5)
return mock_response
def analyze_profile_with_vertex(profile_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Uses Vertex AI (Gemini) to categorize the profile based on the biography.
"""
print(f"Sending profile {profile_data['username']} to Vertex AI for analysis...")
model = GenerativeModel(MODEL_NAME)
# Construct a prompt for the AI
prompt =
"""
Analyze the following Instagram profile data and output a valid JSON object (no markdown) with:
1. 'niche': The likely niche of the user (e.g., Travel, Tech, Food).
2. 'is_influencer': Boolean, based on follower count > 10000.
3. 'contact_intent': Boolean, if they are open to business/collabs.
Profile Data:
Username: {profile_data['username']}
Bio: {profile_data['biography']}
Followers: {profile_data['followers_count']}
"""
try:
# Generate response
response = model.generate_content(prompt)
# Clean response text to ensure valid JSON parsing (strip markdown backticks)
raw_text = response.text.strip()
if raw_text.startswith("```json"):
raw_text = raw_text[7:]
if raw_text.startswith("```"):
raw_text = raw_text[3:]
if raw_text.endswith("```"):
raw_text = raw_text[:-3]
# Parse the cleaned string into a real dictionary
analysis = json.loads(raw_text)
# Merge the AI insights with the original data
enriched_data = {
**profile_data,
"ai_analysis": analysis
}
return enriched_data
except json.JSONDecodeError:
print(f"Failed to parse JSON for {profile_data['username']}")
return {**profile_data, "error": "AI response was not valid JSON"}
except Exception as e:
print(f"Error during Vertex AI analysis: {e}")
return profile_data
def run_pipeline(user_ids: List[str]):
"""
Orchestrates the pipeline: Ingest -> Process -> Output.
"""
init_vertex_ai()
processed_profiles = []
for uid in user_ids:
# Step 1: Ingest (Compliance: Using official API)
raw_data = fetch_profile_data_via_api(uid)
# Step 2: Process (Vertex AI)
enriched_data = analyze_profile_with_vertex(raw_data)
processed_profiles.append(enriched_data)
print(f"Successfully processed {raw_data['username']}\n")
# Step 3: Storage (Mocking database insertion)
print("--- Pipeline Summary ---")
print(json.dumps(processed_profiles, indent=2))
print("Data ready for BigQuery or Firestore insertion.")
if __name__ == "__main__":
# Example User IDs to process
target_users = ["17841400000000001", "17841400000000002"]
run_pipeline(target_users)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment