Disaggregated Encoder Shm¶
Source https://github.com/vllm-project/vllm/tree/main/examples/online_serving/disaggregated_encoder_shm.
1E1P1D Proxy¶
#!/bin/bash
python ../disaggregated_encoder/disagg_epd_proxy.py \
--encode-servers-urls "http://127.0.0.1:23001" \
--prefill-servers-urls "http://127.0.0.1:33001" \
--decode-servers-urls "http://127.0.0.1:43001" \
--host 127.0.0.1 \
--port 8001
1E1Pd Proxy¶
#!/bin/bash
python ../disaggregated_encoder/disagg_epd_proxy.py \
--encode-servers-urls "http://127.0.0.1:23001" \
--prefill-servers-urls "disable" \
--decode-servers-urls "http://127.0.0.1:33001" \
--host 127.0.0.1 \
--port 8001
Run D¶
#!/bin/bash
MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
VLLM_NIXL_SIDE_CHANNEL_PORT=5601
CUDA_VISIBLE_DEVICES=2 vllm serve "$MODEL" \
--gpu-memory-utilization 0.7 \
--port "43001" \
--enforce-eager \
--enable-request-id-headers \
--served-model-name model_name \
--max-model-len 32768 \
--max-num-seqs 128 \
--kv-transfer-config '{
"kv_connector": "NixlConnector",
"kv_role": "kv_consumer"
}'
Run E¶
#!/bin/bash
MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"
rm -rf "$EC_SHARED_STORAGE_PATH"
mkdir -p "$EC_SHARED_STORAGE_PATH"
CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL" \
--gpu-memory-utilization 0.01 \
--port "23001" \
--enforce-eager \
--conver "mm_encoder_only" \
--enable-request-id-headers \
--served-model-name model_name \
--no-enable-prefix-caching \
--max-num-batched-tokens 114688 \
--max-num-seqs 128 \
--ec-transfer-config '{
"ec_connector": "SHMConnector",
"ec_role": "ec_producer",
"ec_ip": "127.0.0.1",
"ec_connector_extra_config": {
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
"listen_ports": [30161],
"engine_id": 0,
"producer_instances": 1,
"consumer_instances": 1,
"producer": {
"dp_size": 1,
"tp_size": 1
},
"consumer": {
"dp_size": 1,
"tp_size": 1
}
}
}'
Run P¶
#!/bin/bash
MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"
VLLM_NIXL_SIDE_CHANNEL_PORT=5600
CUDA_VISIBLE_DEVICES=1 vllm serve "$MODEL" \
--gpu-memory-utilization 0.7 \
--port "33001" \
--enforce-eager \
--enable-request-id-headers \
--served-model-name model_name \
--max-model-len 32768 \
--max-num-seqs 128 \
--ec-transfer-config '{
"ec_connector": "SHMConnector",
"ec_role": "ec_consumer",
"ec_ip": "127.0.0.1",
"ec_connector_extra_config": {
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
"listen_ports": [30161],
"engine_id": 0,
"producer_instances": 1,
"consumer_instances": 1,
"producer": {
"dp_size": 1,
"tp_size": 1
},
"consumer": {
"dp_size": 1,
"tp_size": 1
}
}
}'
Run Pd¶
#!/bin/bash
MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"
CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL" \
--gpu-memory-utilization 0.7 \
--port "33001" \
--enforce-eager \
--enable-request-id-headers \
--served-model-name model_name \
--max-model-len 32768 \
--max-num-seqs 128 \
--ec-transfer-config '{
"ec_connector": "SHMConnector",
"ec_role": "ec_consumer",
"ec_ip": "127.0.0.1",
"ec_connector_extra_config": {
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
"listen_ports": [30161],
"engine_id": 0,
"producer_instances": 1,
"consumer_instances": 1,
"producer": {
"dp_size": 1,
"tp_size": 1
},
"consumer": {
"dp_size": 1,
"tp_size": 1
}
}
}' \
--kv-transfer-config '{
"kv_connector": "NixlConnector",
"kv_role": "kv_producer"
}'