runs:
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 1"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 2"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 4"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 8"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 16"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 32"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10
- engine:
- type: vllm
model: NousResearch/Meta-Llama-3.1-8B
args: "--max-num-batched-tokens 163840 --max-num-seqs 256"
env: {}
benchmarks:
- type: conversational_short
dataset-name: random
random-input-len: 100
random-output-len: 100
random-prefix-len: 0
num-prompts: 50
request-rate: 10