Skip to content

Disaggregated Encoder Shm

Source https://github.com/vllm-project/vllm/tree/main/examples/online_serving/disaggregated_encoder_shm.

1E1P1D Proxy

#!/bin/bash
python ../disaggregated_encoder/disagg_epd_proxy.py \
      --encode-servers-urls "http://127.0.0.1:23001" \
      --prefill-servers-urls "http://127.0.0.1:33001" \
      --decode-servers-urls "http://127.0.0.1:43001" \
      --host 127.0.0.1 \
      --port 8001

1E1Pd Proxy

#!/bin/bash
python ../disaggregated_encoder/disagg_epd_proxy.py \
      --encode-servers-urls "http://127.0.0.1:23001" \
      --prefill-servers-urls "disable" \
      --decode-servers-urls "http://127.0.0.1:33001" \
      --host 127.0.0.1 \
      --port 8001

Run D

#!/bin/bash

MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
VLLM_NIXL_SIDE_CHANNEL_PORT=5601

CUDA_VISIBLE_DEVICES=2 vllm serve "$MODEL" \
    --gpu-memory-utilization 0.7 \
    --port "43001" \
    --enforce-eager \
    --enable-request-id-headers \
    --served-model-name model_name \
    --max-model-len 32768  \
    --max-num-seqs 128 \
    --kv-transfer-config '{
        "kv_connector": "NixlConnector",
        "kv_role": "kv_consumer"
    }'

Run E

#!/bin/bash

MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"
rm -rf "$EC_SHARED_STORAGE_PATH"
mkdir -p "$EC_SHARED_STORAGE_PATH"

CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL" \
    --gpu-memory-utilization 0.01 \
    --port "23001" \
    --enforce-eager \
    --conver "mm_encoder_only" \
    --enable-request-id-headers \
    --served-model-name model_name \
    --no-enable-prefix-caching \
    --max-num-batched-tokens 114688 \
    --max-num-seqs 128 \
    --ec-transfer-config '{
        "ec_connector": "SHMConnector",
        "ec_role": "ec_producer",
        "ec_ip": "127.0.0.1",
        "ec_connector_extra_config": {
            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
            "listen_ports": [30161],
            "engine_id": 0,
            "producer_instances": 1,
            "consumer_instances": 1,
            "producer": {
                "dp_size": 1,
                "tp_size": 1
            },
            "consumer": {
                "dp_size": 1,
                "tp_size": 1
            }
        }
    }'

Run P

#!/bin/bash

MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"
VLLM_NIXL_SIDE_CHANNEL_PORT=5600

CUDA_VISIBLE_DEVICES=1 vllm serve "$MODEL" \
    --gpu-memory-utilization 0.7 \
    --port "33001" \
    --enforce-eager \
    --enable-request-id-headers \
    --served-model-name model_name \
    --max-model-len 32768  \
    --max-num-seqs 128 \
    --ec-transfer-config '{
      "ec_connector": "SHMConnector",
      "ec_role": "ec_consumer",
      "ec_ip": "127.0.0.1",
      "ec_connector_extra_config": {
            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
            "listen_ports": [30161],
            "engine_id": 0,
            "producer_instances": 1,
            "consumer_instances": 1,
            "producer": {
                "dp_size": 1,
                "tp_size": 1
            },
            "consumer": {
                "dp_size": 1,
                "tp_size": 1
            }
        }
    }'

Run Pd

#!/bin/bash

MODEL="${MODEL:-Qwen/Qwen3-VL-2B-Instruct}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/user/ec_cache}"

CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL" \
    --gpu-memory-utilization 0.7 \
    --port "33001" \
    --enforce-eager \
    --enable-request-id-headers \
    --served-model-name model_name \
    --max-model-len 32768  \
    --max-num-seqs 128 \
    --ec-transfer-config '{
      "ec_connector": "SHMConnector",
      "ec_role": "ec_consumer",
      "ec_ip": "127.0.0.1",
      "ec_connector_extra_config": {
            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'",
            "listen_ports": [30161],
            "engine_id": 0,
            "producer_instances": 1,
            "consumer_instances": 1,
            "producer": {
                "dp_size": 1,
                "tp_size": 1
            },
            "consumer": {
                "dp_size": 1,
                "tp_size": 1
            }
        }
    }' \
    --kv-transfer-config '{
      "kv_connector": "NixlConnector",
      "kv_role": "kv_producer"
    }'