Created
November 11, 2025 12:07
-
-
Save gordinmitya/769647b364ea3854ce14f2e57dea6140 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| qwen3-vl: | |
| image: ghcr.io/ggml-org/llama.cpp:server-cuda | |
| environment: | |
| - LLAMA_ARG_N_GPU_LAYERS=999 | |
| - LLAMA_ARG_MMPROJ=/models/mmproj-BF16.gguf | |
| - LLAMA_ARG_MODEL=/models/Qwen3-VL-4B-Instruct-UD-Q8_K_XL.gguf | |
| ports: | |
| - "8080:8080" | |
| volumes: | |
| - ./models:/models | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: [gpu] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import base64 | |
| from pathlib import Path | |
| import cv2 | |
| from openai import OpenAI | |
| from pydantic import BaseModel, Field | |
| # pip install openai opencv-python pydantic | |
| class Person(BaseModel): | |
| age: int = Field(..., description="The person's age in years") | |
| gender: str = Field(..., description="The person's gender. Male or female") | |
| class VisionClient: | |
| def __init__(self): | |
| self.client = OpenAI( | |
| base_url="http://localhost:8080", | |
| api_key="any", | |
| ) | |
| self.model = "/models/Qwen3-VL-4B-Instruct-UD-Q8_K_XL.gguf" | |
| def encode_image(self, image_path): | |
| if Path(image_path).suffix.lower() in {".jpg", ".jpeg"}: | |
| data = Path(image_path).read_bytes() | |
| else: | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| raise ValueError(f"Could not read image: {image_path}") | |
| ok, buffer = cv2.imencode(".jpg", image) | |
| if not ok: | |
| raise ValueError("Could not encode image to JPEG") | |
| data = buffer.tobytes() | |
| return base64.b64encode(data).decode("utf-8") | |
| def analyze_image(self, image_path): | |
| prompt = "what's in this image?" | |
| base64_image = self.encode_image(image_path) | |
| response = self.client.chat.completions.parse( | |
| model=self.model, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| }, | |
| }, | |
| ], | |
| } | |
| ], | |
| response_format=Person, | |
| temperature=0.7, | |
| top_p=0.8, | |
| presence_penalty=1.5, | |
| frequency_penalty=0.0, # optional | |
| max_tokens=32768, | |
| seed=3407, | |
| extra_body={ | |
| "top_k": 20, | |
| "repetition_penalty": 1.0, | |
| }, | |
| ) | |
| return response.choices[0].message.content | |
| vision_client = VisionClient() | |
| print(vision_client.analyze_image("person.jpg")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment