mirror of
https://github.com/hoshikawa2/mdm_project.git
synced 2026-03-06 02:10:37 +00:00
first commit
This commit is contained in:
113
README.md
113
README.md
@@ -107,26 +107,98 @@ conda activate mdm
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Step 2 — Configure Environment Variables
|
### Step 2 — Configure Environment Variables
|
||||||
Create a `.env` file:
|
|
||||||
```bash
|
# Ollama Multi-GPU Setup on OCI A10
|
||||||
REQUEST_TIMEOUT=300
|
|
||||||
OLLAMA_ENDPOINTS="http://127.0.0.1:11434,http://127.0.0.1:11435"
|
## Systemd Services
|
||||||
NUM_GPU=2000
|
|
||||||
NUM_BATCH=512
|
**/etc/systemd/system/ollama-gpu0.service**
|
||||||
NUM_CTX=8192
|
|
||||||
NUM_THREAD=512
|
``` bash
|
||||||
CONCURRENCY_NORMALIZE=16
|
[Unit]
|
||||||
CONCURRENCY_ADDRESS=16
|
Description=Ollama on GPU0 (A10 #0)
|
||||||
ZIPCODEBASE_KEY=your_api_key_here
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=opc
|
||||||
|
Group=opc
|
||||||
|
# <<< IMPORTANT: SAME MODEL FOLDER FOR BOTH >>>
|
||||||
|
Environment=OLLAMA_MODELS=/home/opc/.ollama/models
|
||||||
|
Environment=CUDA_VISIBLE_DEVICES=0
|
||||||
|
# On the server do not use "http://"
|
||||||
|
Environment=OLLAMA_HOST=127.0.0.1:11434
|
||||||
|
Environment=OLLAMA_NUM_PARALLEL=4
|
||||||
|
# Keeps the model loaded between calls
|
||||||
|
Environment=OLLAMA_KEEP_ALIVE=5m
|
||||||
|
# Useful verbose logs (INFO/DEBUG)
|
||||||
|
Environment=OLLAMA_DEBUG=INFO
|
||||||
|
ExecStart=/usr/local/bin/ollama serve
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2s
|
||||||
|
LimitNOFILE=65535
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 3 — Launch Ollama GPU Servers
|
**/etc/systemd/system/ollama-gpu1.service**
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 OLLAMA_HOST=127.0.0.1:11434 ollama serve
|
``` bash
|
||||||
CUDA_VISIBLE_DEVICES=1 OLLAMA_HOST=127.0.0.1:11435 ollama serve
|
[Unit]
|
||||||
|
Description=Ollama on GPU1 (A10 #1)
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=opc
|
||||||
|
Group=opc
|
||||||
|
# <<< SAME MODEL FOLDER AS GPU0 >>>
|
||||||
|
Environment=OLLAMA_MODELS=/home/opc/.ollama/models
|
||||||
|
Environment=CUDA_VISIBLE_DEVICES=1
|
||||||
|
Environment=OLLAMA_HOST=127.0.0.1:11435
|
||||||
|
Environment=OLLAMA_NUM_PARALLEL=4
|
||||||
|
Environment=OLLAMA_KEEP_ALIVE=5m
|
||||||
|
Environment=OLLAMA_DEBUG=INFO
|
||||||
|
ExecStart=/usr/local/bin/ollama serve
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2s
|
||||||
|
LimitNOFILE=65535
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 4 — Run FastAPI Application
|
## Ollama Activation
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable --now ollama-gpu0 ollama-gpu1
|
||||||
|
journalctl -u ollama-gpu0 -f &
|
||||||
|
journalctl -u ollama-gpu1 -f &
|
||||||
|
```
|
||||||
|
|
||||||
|
## CUDA Configuration Variables
|
||||||
|
|
||||||
|
``` sh
|
||||||
|
# 2 endpoints for 2 GPUs
|
||||||
|
export OLLAMA_ENDPOINTS="http://127.0.0.1:11434,http://127.0.0.1:11435"
|
||||||
|
|
||||||
|
# Model-side settings (per server)
|
||||||
|
export NUM_CTX=8192
|
||||||
|
export NUM_BATCH=1024 # later try 1280→1536→2048 if you have VRAM
|
||||||
|
export NUM_GPU=999 # “all layers on GPU”
|
||||||
|
export NUM_THREAD=48 # ~ useful vCPUs, not 600
|
||||||
|
|
||||||
|
# App concurrency
|
||||||
|
export CONCURRENCY_NORMALIZE=24
|
||||||
|
export CONCURRENCY_ADDRESS=24
|
||||||
|
|
||||||
|
# Timeouts/logs
|
||||||
|
export REQUEST_TIMEOUT=180
|
||||||
|
export LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Step 3 — Run FastAPI Application
|
||||||
```bash
|
```bash
|
||||||
uvicorn mdm_app.app:app --host 0.0.0.0 --port 8080 --workers 4
|
uvicorn mdm_app.app:app --host 0.0.0.0 --port 8080 --workers 4
|
||||||
```
|
```
|
||||||
@@ -164,3 +236,12 @@ curl -X POST http://localhost:8080/mdm/process -H "Content-Type: application/j
|
|||||||
---
|
---
|
||||||
|
|
||||||
✅ At this point, the project should be fully deployed, running on **OCI A10 GPUs**, and producing clean, standardized, and enriched master data records.
|
✅ At this point, the project should be fully deployed, running on **OCI A10 GPUs**, and producing clean, standardized, and enriched master data records.
|
||||||
|
|
||||||
|
## Reference
|
||||||
|
|
||||||
|
- [Oracle Cloud GPU Instances](https://www.oracle.com/cloud/compute/gpu/)
|
||||||
|
- [Using NVidia GPU with Oracle Cloud Infrastructure](https://docs.oracle.com/pt-br/iaas/Content/Compute/References/ngcimage.htm)
|
||||||
|
|
||||||
|
## Acknowledgments
|
||||||
|
|
||||||
|
- **Author** - Cristiano Hoshikawa (Oracle LAD A-Team Solution Engineer)
|
||||||
Reference in New Issue
Block a user