whyisjake/gemini-3-test.php

## gemini-3-test.php
<?php
/**
 * Temporary: Gemini model A/B testing.
 * Delete this file after testing is complete.
 *
 * Run: wp gemini-compare --file=/path/to/file.pdf
 */
if ( defined( 'WP_CLI' ) && WP_CLI ) {
	WP_CLI::add_command( 'gemini-compare', function( $args, $assoc_args ) {
		$api_key = defined( 'GOOGLE_API_KEY' ) ? GOOGLE_API_KEY : '';
		$file    = $assoc_args['file'] ?? null;

		if ( empty( $api_key ) || ! $file || ! file_exists( $file ) ) {
			WP_CLI::error( 'Provide --file and ensure GOOGLE_API_KEY is defined' );
		}

		// Upload once.
		WP_CLI::line( 'Uploading PDF to Files API...' );
		$upload_url  = 'https://generativelanguage.googleapis.com/upload/v1beta/files?uploadType=media&key=' . $api_key;
		$body        = file_get_contents( $file );
		$upload_resp = wp_remote_post( $upload_url, array(
			'headers' => array( 'Content-Type' => 'application/pdf', 'X-Goog-Upload-Protocol' => 'raw' ),
			'body'    => $body,
			'timeout' => 60,
		) );
		$upload_data = json_decode( wp_remote_retrieve_body( $upload_resp ), true );
		$file_uri    = $upload_data['file']['uri'] ?? null;
		if ( ! $file_uri ) {
			WP_CLI::error( 'Upload failed' );
		}
		WP_CLI::success( "Uploaded: {$file_uri}" );

		$prompt = <<<PROMPT
You are extracting text from a Pew Research Center survey topline document. This is a PDF containing survey questions, response options, and percentage data.

Please extract ALL text from this document and format it as clean, well-structured Markdown following these rules:

1. **Survey Questions/Variables**: Use the original label exactly as it appears in the PDF as the heading (e.g., Q15., FOLNEWS, SMUSE, INTREQ, etc.)
2. **Response Options**: Format as a table with columns for the response text and percentage values
3. **Sample Sizes**: Keep n= values inline with their context
4. **Instructions/Notes**: Format as blockquotes (> text)
5. **Section Headers**: Use appropriate heading levels (# for main sections, ## for subsections)
6. **Preserve ALL data**: Every percentage, every response option, every note must be included
7. **Tables**: Use proper Markdown table syntax with headers

Extract the complete content maintaining the logical structure of the survey document. Do not summarize or omit any content.
PROMPT;

		$models = array(
			'gemini-2.5-flash'        => array(
				'thinkingConfig' => array( 'thinkingBudget' => 8192 ),
			),
			'gemini-3-flash-preview'  => array(
				'thinkingConfig' => array( 'thinkingLevel' => 'medium' ),
			),
		);

		$output_dir = dirname( $file );

		foreach ( $models as $model => $extra_config ) {
			WP_CLI::line( '' );
			WP_CLI::line( str_repeat( '=', 60 ) );
			WP_CLI::line( "Model: {$model}" );
			WP_CLI::line( str_repeat( '=', 60 ) );

			$url = "https://generativelanguage.googleapis.com/v1beta/models/{$model}:generateContent?key={$api_key}";

			$gen_config = array_merge(
				array(
					'temperature'     => 0.1,
					'topP'            => 0.8,
					'topK'            => 40,
					'maxOutputTokens' => 65536,
				),
				$extra_config
			);

			$gen_body = wp_json_encode( array(
				'contents' => array(
					array(
						'parts' => array(
							array(
								'file_data' => array(
									'mime_type' => 'application/pdf',
									'file_uri'  => $file_uri,
								),
							),
							array( 'text' => $prompt ),
						),
					),
				),
				'generationConfig' => $gen_config,
			) );

			$start    = microtime( true );
			$gen_resp = wp_remote_post( $url, array(
				'headers' => array( 'Content-Type' => 'application/json' ),
				'body'    => $gen_body,
				'timeout' => 300,
			) );
			$elapsed = round( microtime( true ) - $start, 2 );

			if ( is_wp_error( $gen_resp ) ) {
				WP_CLI::warning( "FAIL ({$elapsed}s): " . $gen_resp->get_error_message() );
				continue;
			}

			$status = wp_remote_retrieve_response_code( $gen_resp );
			$data   = json_decode( wp_remote_retrieve_body( $gen_resp ), true );

			if ( isset( $data['error'] ) ) {
				WP_CLI::warning( "ERROR {$status}: " . ( $data['error']['message'] ?? wp_json_encode( $data['error'] ) ) );
				continue;
			}

			// Extract text, skip thinking parts.
			$total_text = '';
			if ( isset( $data['candidates'][0]['content']['parts'] ) ) {
				foreach ( $data['candidates'][0]['content']['parts'] as $part ) {
					if ( ! empty( $part['thought'] ) ) {
						continue;
					}
					if ( isset( $part['text'] ) ) {
						$total_text .= $part['text'];
					}
				}
			}

			$finish = $data['candidates'][0]['finishReason'] ?? 'unknown';
			$usage  = $data['usageMetadata'] ?? array();

			// Save full output.
			$safe_name = str_replace( array( '-', '.' ), '_', $model );
			$out_file  = "{$output_dir}/compare_{$safe_name}.md";
			file_put_contents( $out_file, $total_text );

			WP_CLI::success( sprintf(
				'%d chars | %ss | finishReason=%s',
				strlen( $total_text ),
				$elapsed,
				$finish
			) );

			if ( ! empty( $usage ) ) {
				WP_CLI::line( sprintf(
					'  Tokens — prompt: %s, output: %s, total: %s',
					$usage['promptTokenCount'] ?? '?',
					$usage['candidatesTokenCount'] ?? '?',
					$usage['totalTokenCount'] ?? '?'
				) );
			}

			WP_CLI::line( "  Saved: {$out_file}" );
		}

		WP_CLI::line( '' );
		WP_CLI::line( 'Done. Compare the .md files to evaluate output quality.' );
	} );
}
	<?php
	/**
	* Temporary: Gemini model A/B testing.
	* Delete this file after testing is complete.
	*
	* Run: wp gemini-compare --file=/path/to/file.pdf
	*/
	if ( defined( 'WP_CLI' ) && WP_CLI ) {
	WP_CLI::add_command( 'gemini-compare', function( $args, $assoc_args ) {
	$api_key = defined( 'GOOGLE_API_KEY' ) ? GOOGLE_API_KEY : '';
	$file = $assoc_args['file'] ?? null;

	if ( empty( $api_key ) \|\| ! $file \|\| ! file_exists( $file ) ) {
	WP_CLI::error( 'Provide --file and ensure GOOGLE_API_KEY is defined' );
	}

	// Upload once.
	WP_CLI::line( 'Uploading PDF to Files API...' );
	$upload_url = 'https://generativelanguage.googleapis.com/upload/v1beta/files?uploadType=media&key=' . $api_key;
	$body = file_get_contents( $file );
	$upload_resp = wp_remote_post( $upload_url, array(
	'headers' => array( 'Content-Type' => 'application/pdf', 'X-Goog-Upload-Protocol' => 'raw' ),
	'body' => $body,
	'timeout' => 60,
	) );
	$upload_data = json_decode( wp_remote_retrieve_body( $upload_resp ), true );
	$file_uri = $upload_data['file']['uri'] ?? null;
	if ( ! $file_uri ) {
	WP_CLI::error( 'Upload failed' );
	}
	WP_CLI::success( "Uploaded: {$file_uri}" );

	$prompt = <<<PROMPT
	You are extracting text from a Pew Research Center survey topline document. This is a PDF containing survey questions, response options, and percentage data.

	Please extract ALL text from this document and format it as clean, well-structured Markdown following these rules:

	1. Survey Questions/Variables: Use the original label exactly as it appears in the PDF as the heading (e.g., Q15., FOLNEWS, SMUSE, INTREQ, etc.)
	2. Response Options: Format as a table with columns for the response text and percentage values
	3. Sample Sizes: Keep n= values inline with their context
	4. Instructions/Notes: Format as blockquotes (> text)
	5. Section Headers: Use appropriate heading levels (# for main sections, ## for subsections)
	6. Preserve ALL data: Every percentage, every response option, every note must be included
	7. Tables: Use proper Markdown table syntax with headers

	Extract the complete content maintaining the logical structure of the survey document. Do not summarize or omit any content.
	PROMPT;

	$models = array(
	'gemini-2.5-flash' => array(
	'thinkingConfig' => array( 'thinkingBudget' => 8192 ),
	),
	'gemini-3-flash-preview' => array(
	'thinkingConfig' => array( 'thinkingLevel' => 'medium' ),
	),
	);

	$output_dir = dirname( $file );

	foreach ( $models as $model => $extra_config ) {
	WP_CLI::line( '' );
	WP_CLI::line( str_repeat( '=', 60 ) );
	WP_CLI::line( "Model: {$model}" );
	WP_CLI::line( str_repeat( '=', 60 ) );

	$url = "https://generativelanguage.googleapis.com/v1beta/models/{$model}:generateContent?key={$api_key}";

	$gen_config = array_merge(
	array(
	'temperature' => 0.1,
	'topP' => 0.8,
	'topK' => 40,
	'maxOutputTokens' => 65536,
	),
	$extra_config
	);

	$gen_body = wp_json_encode( array(
	'contents' => array(
	array(
	'parts' => array(
	array(
	'file_data' => array(
	'mime_type' => 'application/pdf',
	'file_uri' => $file_uri,
	),
	),
	array( 'text' => $prompt ),
	),
	),
	),
	'generationConfig' => $gen_config,
	) );

	$start = microtime( true );
	$gen_resp = wp_remote_post( $url, array(
	'headers' => array( 'Content-Type' => 'application/json' ),
	'body' => $gen_body,
	'timeout' => 300,
	) );
	$elapsed = round( microtime( true ) - $start, 2 );

	if ( is_wp_error( $gen_resp ) ) {
	WP_CLI::warning( "FAIL ({$elapsed}s): " . $gen_resp->get_error_message() );
	continue;
	}

	$status = wp_remote_retrieve_response_code( $gen_resp );
	$data = json_decode( wp_remote_retrieve_body( $gen_resp ), true );

	if ( isset( $data['error'] ) ) {
	WP_CLI::warning( "ERROR {$status}: " . ( $data['error']['message'] ?? wp_json_encode( $data['error'] ) ) );
	continue;
	}

	// Extract text, skip thinking parts.
	$total_text = '';
	if ( isset( $data['candidates'][0]['content']['parts'] ) ) {
	foreach ( $data['candidates'][0]['content']['parts'] as $part ) {
	if ( ! empty( $part['thought'] ) ) {
	continue;
	}
	if ( isset( $part['text'] ) ) {
	$total_text .= $part['text'];
	}
	}
	}

	$finish = $data['candidates'][0]['finishReason'] ?? 'unknown';
	$usage = $data['usageMetadata'] ?? array();

	// Save full output.
	$safe_name = str_replace( array( '-', '.' ), '_', $model );
	$out_file = "{$output_dir}/compare_{$safe_name}.md";
	file_put_contents( $out_file, $total_text );

	WP_CLI::success( sprintf(
	'%d chars \| %ss \| finishReason=%s',
	strlen( $total_text ),
	$elapsed,
	$finish
	) );

	if ( ! empty( $usage ) ) {
	WP_CLI::line( sprintf(
	' Tokens — prompt: %s, output: %s, total: %s',
	$usage['promptTokenCount'] ?? '?',
	$usage['candidatesTokenCount'] ?? '?',
	$usage['totalTokenCount'] ?? '?'
	) );
	}

	WP_CLI::line( " Saved: {$out_file}" );
	}

	WP_CLI::line( '' );
	WP_CLI::line( 'Done. Compare the .md files to evaluate output quality.' );
	} );
	}
No results found