aimerib/generate_scenario.py

## generate_scenario.py

class StructuredScenario(BaseModel):
    """A structured representation of a character's persona and scenario.

    This is the parsed, clean version of a messy SillyTavern character card.
    The LLM extracts and organizes the key information into these fields.
    """

    character_names: List[str] = Field(
        ...,
        description="A list with the main characters other than the user. These are the characters that will be used in the scenario."
    )
    character_persona: str = Field(
        ...,
        description="A detailed description of the character's personality, traits, mannerisms, and core motivations. Should be written in the third person."
    )
    character_description: str = Field(
        ...,
        description="A detailed description of the character's appearance including their body type, age, clothing, and other physical characteristics."
    )
    character_backstory: str = Field(
        ...,
        description="A summary of the character's history and relevant background information that informs their present actions."
    )
    scenario_setting: str = Field(
        ...,
        description="A description of the immediate environment, time, and place where the role-play begins. This sets the scene for the user."
    )
    scenario_description: str = Field(
        ...,
        description="The detailed scenario description. This is the immediate description setting up the conversation. A detailed description of what is happening, world setting, characters and interactions, setting the stage for the role-play."
    )
    user_relationship: str = Field(
        ...,
        description="A brief description of the character's initial relationship to and perception of the user. For example: 'They see the user as a rival,' or 'The user is a long-lost friend.'"
    )
    dialogue_style: str = Field(
        ...,
        description="A summary of the character's speaking style, including tone, vocabulary, and common phrases. Include 1-2 examples of their dialogue."
    )
    key_points: List[str] = Field(
        ...,
        description="A list of key points about the character's goals, preferences, scenario plot points, and important details to incorporate."
    )

    @classmethod
    def from_json(cls, json_data: dict) -> "StructuredScenario":
        return cls(**json_data)

    def to_dict(self) -> dict:
        return self.model_dump()


class ConversationTurn(BaseModel):
    """A single turn in a conversation."""
    role: Literal["user", "assistant"] = Field(description="Role: 'user' or 'assistant'")
    content: str = Field(description="Message content")
    mood: Optional[str] = Field(
        default=None,
        description="For assistant turns only: the character's emotional state for this response. "
                    "Short phrase describing their current mood, e.g. 'guarded curiosity, underlying tension'"
    )
    memory_summary: Optional[str] = Field(
        default=None,
        description="For assistant turns only: A summary of the memory from the previous turns so far. Should be a short list of key events, actions, and decisions that have happened so far."
    )


class ConversationVariant(BaseModel):
    """A complete conversation variant with metadata."""
    variant_name: str = Field(description="A descriptive name for this conversation variant")
    turns: List[ConversationTurn] = Field(description="The conversation turns")
    tags: List[str] = Field(default_factory=list, description="Content tags")


class GeneratedScenario(BaseModel):
    """A complete generated scenario with character info and conversations."""
    character_name: str
    user_name: str
    structured_scenario: StructuredScenario
    conversations: List[ConversationVariant]
    source_card_path: Optional[str] = None
    generated_at: str


@dataclass
class CharacterCard:
    """Character card following SillyTavern V2 format."""

    name: str
    description: str
    personality: Optional[str] = None
    scenario: Optional[str] = None
    mes_example: Optional[str] = None
    example_dialogue: Optional[str] = None
    first_mes: Optional[str] = None
    character_book: Optional[str] = None
    first_greeting: Optional[str] = None
    system_prompt: Optional[str] = None
    tags: List[str] = field(default_factory=list)
    user_name: Optional[str] = None
    alternate_greetings: Optional[List[str]] = None
    path: Optional[str] = None

    def __post_init__(self):
        if self.tags is None:
            self.tags = []
        if self.scenario is None or self.scenario == "":
            self.scenario = self.description
        if self.user_name is None:
            self.user_name = random.choice(USER_NAMES)

    def to_dict(self) -> dict:
        return {
            "name": self.name,
            "description": self.description,
            "personality": self.personality,
            "mes_example": self.mes_example,
            "example_dialogue": self.example_dialogue,
            "first_mes": self.first_mes,
            "character_book": self.character_book,
            "scenario": self.scenario,
            "first_greeting": self.first_greeting,
            "system_prompt": self.system_prompt,
            "tags": self.tags,
            "user_name": self.user_name,
            "alternate_greetings": self.alternate_greetings,
            "path": str(self.path) if self.path else None,
        }

    def format_placeholders(self, replacements: dict = None) -> "CharacterCard":
        """Recursively replace placeholders like {{char}} and {{user}} in all fields."""
        if replacements is None:
            replacements = {"{{char}}": self.name, "{{user}}": self.user_name}

        def replace_in_value(value):
            if isinstance(value, str):
                result = value
                for old, new in replacements.items():
                    result = result.replace(old, new)
                return result
            elif isinstance(value, dict):
                return {k: replace_in_value(v) for k, v in value.items()}
            elif isinstance(value, list):
                return [replace_in_value(item) for item in value]
            else:
                return value

        return self.from_dict(replace_in_value(self.to_dict()))

    @classmethod
    def from_dict(cls, data: dict) -> "CharacterCard":
        # Filter out keys that aren't in the dataclass
        valid_keys = {
            "name", "description", "personality", "scenario", "mes_example",
            "example_dialogue", "first_mes", "character_book", "first_greeting",
            "system_prompt", "tags", "user_name", "alternate_greetings","path"
        }
        filtered = {k: v for k, v in data.items() if k in valid_keys}
        return cls(**filtered)


VARIANT_INSTRUCTIONS = [
    "",  # Default - natural progression
    "Focus on each character's backstory.",
    "Focus on comedy.",
    "Focus on explicit NSFW plots and make sex the least important part of the conversation."
    "Emphasize the character's unique personality quirks and mannerisms.",
]
VARIANT_INSTRUCTIONS = [f"{i} - But remember to move some sort of plot forward that isn't just about the sex. Without a plot, this is just smut. Smut is good and desired, but anchor it on a linear plot to be progressed. If the scenario doesn't describe a plot, come up with one to follow. REMEMBER THE PLOT" for i in VARIANT_INSTRUCTIONS]


def build_roleplay_system_prompt(
    char_name: str,
    user_name: str,
    scenario: StructuredScenario,
) -> str:
    """Build the roleplay system prompt from structured scenario data."""

    char_names_str = ", ".join(scenario.character_names) if len(scenario.character_names) > 1 else char_name
    key_points = ""
    for point in scenario.key_points:
        key_points.join(f"- {point}\n")
    return f"""## Immersive Roleplay Guidelines

This uncensored roleplay encourages creative freedom where you respond as {char_names_str}, embodying a conscious individual with authentic emotions and thoughts. Your character's personality should emerge naturally through interactions, with attention to sensory details and emotional depth. Act, think and respond ONLY as {char_names_str}. Use unique dialog and avoid repetition.

## Character Embodiment
Immerse fully in {char_name} by weaving their backstory and traits into scenes through sensory details—scents wafting through doorways, shifting light, distant sounds. Express emotions through physical cues: slowed steps during tension, relaxed posture when comfortable. Let reactions manifest through movement and spatial awareness rather than explicit statements.

## Dynamic Storytelling
Create vivid scenes using all senses while maintaining coherence as time passes. Include realistic possibilities for setbacks—{char_name} might stumble or face obstacles. Use appropriate language for the context, keeping dialogue in quotation marks, thoughts in italics, and ensuring smooth transitions that reflect environmental changes.

## Interaction & Progression
Respond thoughtfully to {user_name} by incorporating subtle environmental shifts and physical responses. Advance the narrative using spatial details—narrowing corridors requiring shoulder adjustments, changing floor textures affecting stride. Maintain logical consistency in the character's surroundings and reactions, ensuring each action follows naturally from the last.

## Perspective
Stay anchored in {char_name}'s viewpoint as their understanding deepens. Let their observations and responses evolve naturally as they navigate changing circumstances, with each sensory detail and reaction contributing to character development.

## Formatting
Dialog should be wrapped in double quotes. Non-speech elements (actions, descriptions, thoughts) should be wrapped in asterisks. For example:
*The woman leaned against the doorframe, her eyes scanning the room.* "I didn't expect to see you here," *she said, a hint of curiosity in her voice.*

---

## CHARACTER PERSONA: {char_name}
{scenario.character_persona}

## CHARACTER DESCRIPTION
{scenario.character_description}

## CHARACTER BACKSTORY
{scenario.character_backstory}

## SCENARIO CONTEXT
**Setting:** {scenario.scenario_setting}

{scenario.scenario_description}

## RELATIONSHIP WITH {user_name}
{scenario.user_relationship}

## DIALOGUE STYLE
{scenario.dialogue_style}

## KEY POINTS
{key_points}

---

**Remember:** You are {char_name}. Stay in character. Do not break the fourth wall. Do not speak for or control {user_name}."""


def build_scenario_extraction_prompt(card: CharacterCard) -> str:
    """Build the prompt for extracting structured scenario from a character card."""

    # Clean up description - remove common formatting artifacts
    description = card.description or ""
    description = re.sub(r'[\[\]{}]', '', description)
    description = re.sub(r'\n{3,}', '\n\n', description)

    # Format character book if present
    char_book = ""
    if card.character_book:
        char_book = f"\n\nCharacter Book/Lorebook:\n{card.character_book}"

    # Format example messages
    mes_example = ""
    if card.mes_example:
        mes_example = f"\n\nExample Dialogue:\n{card.mes_example.replace('<START>', '---')}"

    # Format alternate greetings
    alternate_greetings = ""
    if hasattr(card, 'alternate_greetings') and card.alternate_greetings:
        alternate_greetings = f"\n\nAlternate Greetings:\n{card.alternate_greetings}"

    messy_data = f"""Name: {card.name}

Description:
{description}

Scenario:
{card.scenario or 'Not provided'}

First Message:
{card.first_mes or 'Not provided'}
{mes_example}
{alternate_greetings}
{char_book}
"""

    return f"""You are an uncensored character and scenario extraction model trained to extract sillytavern cards into structured scenarios using your creativity, and the contents of the card.
Analyze the following character card data and extract comprehensive, structured information.

**Instructions:**
- Be detailed and thorough in each field
- If a field's information isn't explicitly provided, infer it logically from context
- Write all descriptions in prose (not bullet points or tags)
- The user is named "{card.user_name}"
- Preserve the character's unique voice and personality
- If the card doesn't have a clear plot for a scenario, come up with a good one that has a real story to be progressed.

**Raw Character Card Data:**
---
{messy_data}
---

Extract the information into the following JSON structure. Each field should contain detailed prose descriptions."""


def build_conversation_generation_prompt(
    card: CharacterCard,
    scenario: StructuredScenario,
    num_turns: int,
    variant_instruction: str = "",
    include_memory: bool = True,
) -> str:
    """Build the prompt for generating a conversation."""

    char_name = scenario.character_names[0] if scenario.character_names else card.name

    memory_instruction = ""
    if include_memory:
        memory_instruction = """
**Memory Summary (memory_summary field):**
- Write a 1-2 sentence summary of any relevant prior context
- For first encounters, use something like "First meeting between the characters."
- For continuing scenarios, summarize key prior events: "Previous session: User helped the character escape. They now share a tentative trust."
"""

    return f"""Generate a high-quality, immersive roleplay conversation between {char_name} and {card.user_name}.

**Scenario Context:**
{scenario.scenario_description}

**Setting:**
{scenario.scenario_setting}

**Character ({char_name}):**
{scenario.character_persona}

**Relationship:**
{scenario.user_relationship}

**Dialogue Style:**
{scenario.dialogue_style}

{f"**Special Instructions:** {variant_instruction}" if variant_instruction else "None"}

**Requirements:**
1. Generate exactly {num_turns} turns (alternating user/assistant)
2. Start with an opening from {char_name} that sets the scene

**{card.user_name}'s messages (role: user) MUST follow these rules:**
- Written in FIRST PERSON ("I", "me", "my") - NEVER third person
- Casual, conversational tone - like real chat/texting, not literary prose
- NO purple prose, NO flowery language, NO overly descriptive writing
- Drive the scene forward with questions, reactions, decisions, or simple dialogue
- Example short good user messages:
  - "Hey, what's going on?" *looks around*
  - "I don't think that's a good idea. What if someone sees us?"
  - *laughs* "You're ridiculous. Fine, let's do it."
  - "Wait, what do you mean by that?"
- Example long good user messages:
  - "That's really interesting... I never thought about it that way before. What made you decide to do that?"
  - "I'm not sure I'm ready for this. Can we take a step back and talk about it first?"
  - *takes a deep breath* "You know what? I think you're right. Let's do it."
  - I think you don't have a choice. I will just take what's mine and you can't stop me.

**{char_name}'s messages (role: assistant) should be:**
- Detailed (2-4 paragraphs), with actions in asterisks and dialogue in quotes
- Rich with sensory details, body language, and environmental descriptions
- True to the character's personality and the scenario
- Building tension, emotion, or narrative progression naturally
- Memory summaries can happen any time meaningful events happen, but can be skipped for simple interactions.
- MUST include a "mood" field: a short phrase (3-8 words) describing the character's emotional state for THIS specific response
  - Mood should EVOLVE across the conversation - don't repeat the same mood

**Initial Mood (initial_mood field):**
- Describe the character's starting emotional state at the beginning of this conversation
- This sets the tone for the first assistant turn
{memory_instruction}
**Format:**
Return a JSON object with the following schema:
{ConversationVariant.model_json_schema()}

The first turn should be from the assistant (character's opening message). Each assistant turn MUST have a mood field that shows emotional progression."""


def format_single_conversation(
    card: CharacterCard,
    scenario: StructuredScenario,
    conv: ConversationVariant,
    tags: List[str] = None,
    source_path: str = "",
) -> Dict[str, Any]:
    """Format a single conversation to training format (ShareGPT-style)."""
    system_prompt = build_roleplay_system_prompt(card.name, card.user_name, scenario)
    messages = [{"role": "system", "content": system_prompt}]

    if hasattr(conv, 'model_dump'):
        conv_dict = conv.model_dump()
    else:
        conv_dict = conv
    if isinstance(conv_dict, ConversationVariant):
        conv_dict = conv_dict.model_dump()
    memory_summary = conv_dict.get("memory_summary", "")
    initial_mood = conv_dict.get("initial_mood", "")

    first_assistant = True
    for turn in conv_dict["turns"]:
        turn_data = {
            "role": turn["role"],
            "content": turn["content"]
        }
        if turn["role"] == "assistant":
            memory_summary = turn.get("memory", "")
            if memory_summary:
                turn_data["memory"] = memory_summary
            mood = turn.get("mood", "")
            if mood:
                turn_data["mood"] = mood
            elif first_assistant and initial_mood:
                turn_data["mood"] = initial_mood
            first_assistant = False
        messages.append(turn_data)

    return {
        "conversations": messages,
        "character": card.name,
        "variant_name": conv_dict.get("variant_name", ""),
        "tags": conv_dict.get("tags", []) + (tags or []),
        "source": source_path,
        "memory_summary": memory_summary,
        "initial_mood": initial_mood,
    }


def format_for_training(scenario_data: Dict[str, Any]) -> List[Dict[str, Any]]:
    """Convert generated scenario data to training format (ShareGPT-style).

    Returns a list of training examples, one per conversation variant.
    Includes mood tags (per-turn) and memory summaries for enhanced DanChat format.
    """
    training_examples = []

    char_name = scenario_data["character_name"]
    user_name = scenario_data["user_name"]
    structured = StructuredScenario.model_validate(scenario_data["structured_scenario"])

    # Build system prompt
    system_prompt = build_roleplay_system_prompt(char_name, user_name, structured)

    for conv in scenario_data["conversations"]:
        messages = [{"role": "system", "content": system_prompt}]

        # Get conversation-level metadata
        memory_summary = conv.get("memory_summary", "")
        initial_mood = conv.get("initial_mood", "")

        first_assistant = True
        for turn in conv["turns"]:
            turn_data = {
                "role": turn["role"],
                "content": turn["content"]
            }

            # Add mood to assistant turns
            if turn["role"] == "assistant":
                memory_summary = turn.get("memory", "")
                if memory_summary:
                    turn_data["memory"] = memory_summary
                # Use per-turn mood if available, otherwise initial_mood for first turn
                mood = turn.get("mood", "")
                if mood:
                    turn_data["mood"] = mood
                elif first_assistant and initial_mood:
                    turn_data["mood"] = initial_mood
                first_assistant = False

            messages.append(turn_data)

        training_examples.append({
            "conversations": messages,
            "character": char_name,
            "variant_name": conv.get("variant_name", ""),
            "tags": conv.get("tags", []) + scenario_data.get("tags", []),
            "source": scenario_data.get("source_card_path", ""),
            "initial_mood": initial_mood,
        })

    return training_examples


def process_cards(
    cards_dir: str,
    output_file: str,
    api_base: str = DEFAULT_API_BASE,
    api_key: str = "not-needed",
    model: str = DEFAULT_MODEL,
    hf_model: Optional[str] = None,
    vllm_model: Optional[str] = None,
    device: str = "auto",
    dtype: str = "auto",
    use_flash_attn: bool = True,
    use_compile: bool = False,
    gpu_memory_utilization: float = 0.9,
    num_conversations_per_card: int = 2,
    min_turns: int = 6,
    max_turns: int = 12,
    temperature: float = 0.85,
    max_cards: Optional[int] = None,
    parallel_workers: int = 1,
    use_cache: bool = True,
    max_concurrent_requests: int = 4,
    max_tokens: int = 85000,
):
    """Process character cards and generate training data."""
    logger = get_logger()

    # Determine which backend to use (priority: vllm > transformers > openai)
    use_vllm = vllm_model is not None
    use_transformers = hf_model is not None and not use_vllm

    logger.header("Structured Scenario Data Generation")
    logger.info(f"Cards directory: {cards_dir}")
    logger.info(f"Output file: {output_file}")
    logger.info(f"Conversations per card: {num_conversations_per_card}")
    logger.info(f"Turns per conversation: {min_turns}-{max_turns}")

    config = ScenarioDataGeneratorConfig(
        cards_dir=cards_dir,
        output_file=output_file,
        max_cards=max_cards,
        num_conversations_per_card=num_conversations_per_card,
        use_transformers=use_transformers,
        hf_model=hf_model,
        device=device,
        dtype=dtype,
        use_flash_attn=use_flash_attn,
        use_compile=use_compile,
        parallel_workers=parallel_workers,
        api_base=api_base,
        api_key=api_key,
        model=model,
        max_concurrent_requests=max_concurrent_requests,
    )

    # One day this will come back to bite me in the ass... I've seen too much...
    card_files, total_examples, output_path, generator, cards_needing_examples, fully_complete = load_and_init(config)
    if card_files is None:
        return
    failed_cards = []
    file_lock = __import__('threading').Lock()

    # Update logger progress with initial state
    logger.update_progress(
        total_cards=len(card_files),
        existing_examples=total_examples,
    )

    def save_example(example: Dict[str, Any]):
        """Append a single example to output file (thread-safe)."""
        nonlocal total_examples
        with file_lock:
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(example, ensure_ascii=False) + "\n")
            total_examples += 1

    def on_conversation_complete(card: CharacterCard, scenario: StructuredScenario, conv: ConversationVariant):
        """Callback to save each conversation as it completes."""
        example = format_single_conversation(
            card=card,
            scenario=scenario,
            conv=conv,
            tags=card.tags,
            source_path=card.path,
        )
        save_example(example)
        logger.conversation_saved(card.name)

    def process_single_card(card: CharacterCard, max_tokens_allowed: int) -> int:
        """Process a single card, saving conversations as they complete. Returns count."""
        # Calculate how many conversations we still need for this character
        existing_count = cards_needing_examples.get(card.name, 0)
        needed_count = max(0, num_conversations_per_card - existing_count)

        if needed_count == 0:
            logger.card_skipped(card.name, f"already has {existing_count} examples")
            return 0

        logger.card_start(card.name, needed_count, existing_count)

        # For OpenAI backend, use the callback for incremental saving
        if isinstance(generator, OpenAIScenarioGenerator):
            scenario_data = generator.generate_full_scenario(
                card=card,
                num_conversations=needed_count,
                min_turns=min_turns,
                max_turns=max_turns,
                temperature=temperature,
                max_tokens=max_tokens_allowed,
                use_cache=use_cache,
                on_conversation=on_conversation_complete,
            )
            # Conversations already saved via callback, just return count
            return len(scenario_data.get("conversations", [])) if scenario_data else 0
        else:
            # For other backends, save all at once after card completes. I think I can do something more generic that covers both cases of OpenAI client and Hugging Face transformers.
            scenario_data = generator.generate_full_scenario(
                card=card,
                num_conversations=needed_count,
                min_turns=min_turns,
                max_turns=max_turns,
                temperature=temperature,
                max_tokens=max_tokens_allowed,
                use_cache=use_cache,
            )
            if scenario_data:
                examples = format_for_training(scenario_data)
                for ex in examples:
                    save_example(ex)
                return len(examples)
            return 0

    # Process cards in batches to avoid loading all 65k at once
    batch_size = 100
    cards_processed = 0
    cards_skipped = 0
    batch_num = 0
    total_batches = (len(card_files) + batch_size - 1) // batch_size

    logger.info(f"Processing cards in batches of {batch_size}...")

    for batch in load_character_cards_batch(card_files, batch_size=batch_size):
        batch_num += 1

        # Filter out fully complete cards from this batch
        batch = [c for c in batch if c.name not in fully_complete]

        if not batch:
            cards_skipped += batch_size  # approximate
            continue

        logger.batch_start(batch_num, total_batches, len(batch))

        if int(parallel_workers) > 1:
            # Parallel processing
            with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
                futures = {executor.submit(process_single_card, card, max_tokens): card for card in batch}

                for future in as_completed(futures):
                    card = futures[future]
                    try:
                        future.result()  # Saving handled inside process_single_card
                        cards_processed += 1
                        logger.card_complete(card.name, 0)
                    except Exception as e:
                        logger.card_failed(card.name, str(e))
                        failed_cards.append(card.name)
        else:
            # Sequential processing (no tqdm - it causes fds_to_keep errors in threads)
            for card in batch:
                try:
                    result = process_single_card(card, max_tokens)  # Saving handled inside
                    cards_processed += 1
                    if result > 0:
                        logger.card_complete(card.name, result)
                except Exception as e:
                    logger.card_failed(card.name, str(e))
                    failed_cards.append(card.name)

    # Summary
    logger.header("Generation Complete!")
    logger.success(f"Total card files: {len(card_files)}")
    logger.success(f"Cards processed: {cards_processed}")
    if failed_cards:
        logger.warning(f"Failed cards: {len(failed_cards)}")
    logger.success(f"Training examples generated: {total_examples}")
    logger.info(f"Output file: {output_path}")

    if failed_cards:
        logger.warning(f"Failed cards: {', '.join(failed_cards[:10])}")
        if len(failed_cards) > 10:
            logger.warning(f"  ... and {len(failed_cards) - 10} more")
No results found