Last active
November 11, 2025 17:54
-
-
Save EvilFreelancer/17cde1d37db8cc7427e6b2618d2715c3 to your computer and use it in GitHub Desktop.
mem-agent MCP setup with vLLM on RTX 4090
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| mem-agent: | |
| build: | |
| context: . | |
| ports: | |
| - "8081:8081" | |
| environment: | |
| VLLM_HOST: localhost | |
| VLLM_PORT: 8000 | |
| volumes: | |
| - ./memory_data:/app/memory/mcp-server |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| vllm: | |
| image: vllm/vllm-openai:v0.11.0 | |
| restart: always | |
| volumes: | |
| - ./vllm_data:/root/.cache | |
| entrypoint: vllm | |
| command: > | |
| serve driaforall/mem-agent | |
| --model-impl transformers | |
| --trust-remote-code | |
| --quantization bitsandbytes | |
| --load-format bitsandbytes | |
| --model-loader-extra-config "{\"load_in_8bit\":true,\"load_in_4bit\":false}" | |
| --dtype auto | |
| --gpu-memory-utilization 0.3 | |
| --max-model-len 4000 | |
| environment: | |
| - NCCL_IGNORE_DISABLED_P2P=1 | |
| - HF_HUB_ENABLE_HF_TRANSFER=0 | |
| ports: | |
| - 8000:8000 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| device_ids: [ '1' ] | |
| capabilities: [ gpu ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| FROM python:3.11-slim | |
| # Install system dependencies | |
| RUN apt update \ | |
| && apt install -fy \ | |
| git curl build-essential \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install uv and add to path | |
| RUN curl -LsSf https://astral.sh/uv/install.sh | sh | |
| ENV PATH="/root/.local/bin:$PATH" | |
| # Set working directory | |
| WORKDIR /app | |
| # Clone the repository | |
| ADD https://github.com/firstbatchxyz/mem-agent-mcp.git . | |
| # Install project dependencies | |
| RUN uv sync | |
| # Create memory directory | |
| RUN mkdir -pv memory/mcp-server | |
| # Set default environment variables | |
| ENV PYTHONPATH=/app | |
| ENV MEMORY_PATH=memory/mcp-server | |
| ENV MCP_TRANSPORT=http | |
| ENV MCP_PATH=/mcp/ | |
| ENV FASTMCP_LOG_LEVEL=INFO | |
| # Set necessary variables | |
| ENV MLX_MODEL_NAME=driaforall/mem-agent | |
| ENV MEMORY_PATH=memory/mcp-server | |
| ENV FILTERS= | |
| # Create startup script | |
| RUN cat > start.sh << 'EOF' | |
| #!/bin/bash | |
| set -e | |
| echo "Starting mem-agent-mcp server..." | |
| echo "MLX_MODEL_NAME: ${MLX_MODEL_NAME}" | |
| echo "${MLX_MODEL_NAME}" > .mlx_model_name | |
| echo "MEMORY_PATH: ${MEMORY_PATH}" | |
| echo "${MEMORY_PATH}" > .memory_path | |
| echo "FILTERS: ${FILTERS}" | |
| echo "${FILTERS}" > .filters | |
| case "$MCP_TRANSPORT" in | |
| "stdio") | |
| echo "Starting MCP server over STDIO..." | |
| exec uv run python -m mcp_server.server | |
| ;; | |
| "http") | |
| echo "Starting MCP HTTP server..." | |
| exec uv run python mcp_server/mcp_http_server.py | |
| ;; | |
| "sse") | |
| echo "Starting MCP SSE server..." | |
| exec uv run python mcp_server/mcp_sse_server.py | |
| ;; | |
| "chat-cli") | |
| echo "Starting chat CLI..." | |
| exec uv run python chat_cli.py | |
| ;; | |
| *) | |
| echo "Unknown MODE: $MODE" | |
| echo "Available modes: stdio, http, sse, chat-cli" | |
| exit 1 | |
| ;; | |
| esac | |
| EOF | |
| RUN chmod +x start.sh | |
| # Expose ports | |
| EXPOSE 8081 8082 | |
| # Default command | |
| CMD ["./start.sh"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment