vllm.entrypoints.pooling.score.utils ¶

ScoreMultiModalParam ¶

Bases: TypedDict

A specialized parameter type for scoring multimodal content

The reasons why don't reuse CustomChatCompletionMessageParam directly: 1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions 2. Including chat-specific fields would confuse users about their purpose in scoring 3. This is a more focused interface that only exposes what's needed for scoring

Source code in vllm/entrypoints/pooling/score/utils.py

class ScoreMultiModalParam(TypedDict, total=False):
    """
    A specialized parameter type for scoring multimodal content

    The reasons why don't reuse `CustomChatCompletionMessageParam` directly:
    1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions
    2. Including chat-specific fields would confuse users about their purpose in scoring
    3. This is a more focused interface that only exposes what's needed for scoring
    """  # noqa: E501

    content: Required[list[ScoreContentPartParam]]
    """The multimodal contents"""

content `instance-attribute` ¶

content: Required[list[ScoreContentPartParam]]

The multimodal contents

_ensure_str ¶

_ensure_str(content: list[ConversationMessage]) -> str

Extract a single string prompt from parsed conversation content.

Source code in vllm/entrypoints/pooling/score/utils.py

def _ensure_str(content: list[ConversationMessage]) -> str:
    """Extract a single string prompt from parsed conversation content."""
    assert len(content) == 1
    prompt = content[0]["content"]
    if prompt is not None and isinstance(prompt, str):
        return cast(str, prompt)
    raise ValueError(f"Only string content is supported, but got {content}.")

compress_token_type_ids ¶

compress_token_type_ids(token_type_ids: list[int]) -> int

Return position of the first 1 or the length of the list if not found.

Source code in vllm/entrypoints/pooling/score/utils.py

def compress_token_type_ids(token_type_ids: list[int]) -> int:
    """
    Return position of the first 1 or the length of the list
    if not found.
    """
    first_one = len(token_type_ids)
    err_msg = (
        "Token type ids are expected to be a sequence"
        " of zeros followed by a sequence of ones"
    )
    for i, type_id in enumerate(token_type_ids):
        if type_id == 0 and first_one < i:
            raise ValueError(err_msg)
        elif type_id == 1 and first_one > i:
            first_one = i
        elif type_id > 1:
            raise ValueError(err_msg)

    return first_one

compute_maxsim_score ¶

compute_maxsim_score(
    q_emb: Tensor, d_emb: Tensor
) -> Tensor

Compute ColBERT MaxSim score.

Parameters:

Name	Type	Description	Default
`q_emb`	`Tensor`	Query token embeddings [query_len, dim]	required
`d_emb`	`Tensor`	Document token embeddings [doc_len, dim]	required

Returns:

Type	Description
`Tensor`	MaxSim score (sum over query tokens of max similarity to any doc token)

Source code in vllm/entrypoints/pooling/score/utils.py

def compute_maxsim_score(q_emb: torch.Tensor, d_emb: torch.Tensor) -> torch.Tensor:
    """
    Compute ColBERT MaxSim score.

    Args:
        q_emb: Query token embeddings [query_len, dim]
        d_emb: Document token embeddings [doc_len, dim]

    Returns:
        MaxSim score (sum over query tokens of max similarity to any doc token)
    """
    # [query_len, doc_len]
    token_scores = torch.matmul(q_emb, d_emb.T)
    # Max over document tokens, sum over query tokens
    return token_scores.amax(dim=-1).sum()

parse_score_data ¶

parse_score_data(
    data_1: ScoreData,
    data_2: ScoreData,
    model_config: ModelConfig,
) -> tuple[
    str,
    str,
    MultiModalDataDict | None,
    MultiModalUUIDDict | None,
]

Parse a query-document pair into text prompts and shared multi-modal data.

Uses a single :class:MultiModalItemTracker so that multi-modal items from both inputs are merged into one mm_data dict. This is the correct behaviour for cross-encoder scoring, where query and document are concatenated into a single model prompt.

Source code in vllm/entrypoints/pooling/score/utils.py

def parse_score_data(
    data_1: ScoreData,
    data_2: ScoreData,
    model_config: ModelConfig,
) -> tuple[str, str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
    """Parse a query-document pair into text prompts and shared multi-modal
    data.

    Uses a **single** :class:`MultiModalItemTracker` so that multi-modal
    items from both inputs are merged into one ``mm_data`` dict.  This is
    the correct behaviour for cross-encoder scoring, where query and
    document are concatenated into a single model prompt.
    """
    mm_tracker = MultiModalItemTracker(model_config)

    content_1 = _parse_score_content("query", data_1, mm_tracker)
    content_2 = _parse_score_content("document", data_2, mm_tracker)

    prompt_1 = _ensure_str(content_1)
    prompt_2 = _ensure_str(content_2)
    mm_items, mm_uuids = mm_tracker.resolve_items()

    return prompt_1, prompt_2, mm_items, mm_uuids

parse_score_data_single ¶

parse_score_data_single(
    data: ScoreData, role: str, model_config: ModelConfig
) -> tuple[
    str,
    MultiModalDataDict | None,
    MultiModalUUIDDict | None,
]

Parse one ScoreData into a text prompt and its own multi-modal data.

Unlike :func:parse_score_data, each call creates an independent :class:MultiModalItemTracker so multi-modal items are kept separate. This is the correct behaviour for late-interaction scoring, where query and document are encoded independently.

Source code in vllm/entrypoints/pooling/score/utils.py

def parse_score_data_single(
    data: ScoreData,
    role: str,
    model_config: ModelConfig,
) -> tuple[str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
    """Parse **one** ScoreData into a text prompt and its own multi-modal
    data.

    Unlike :func:`parse_score_data`, each call creates an **independent**
    :class:`MultiModalItemTracker` so multi-modal items are kept separate.
    This is the correct behaviour for late-interaction scoring, where
    query and document are encoded independently.
    """
    mm_tracker = MultiModalItemTracker(model_config)
    content = _parse_score_content(role, data, mm_tracker)

    prompt = _ensure_str(content)
    mm_items, mm_uuids = mm_tracker.resolve_items()
    return prompt, mm_items, mm_uuids

post_process_tokens ¶

post_process_tokens(
    model_config: ModelConfig, prompt: TokensPrompt
) -> None

Perform architecture-specific manipulations on the input tokens.

Note

This is an in-place operation.

Source code in vllm/entrypoints/pooling/score/utils.py

def post_process_tokens(
    model_config: ModelConfig,
    prompt: TokensPrompt,
) -> None:
    """
    Perform architecture-specific manipulations on the input tokens.

    Note:
        This is an in-place operation.
    """
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls

    model = get_model_cls(model_config)
    if supports_score_template(model):
        model.post_process_tokens(prompt)

score_data_to_prompts ¶

score_data_to_prompts(
    data_list: list[ScoreData],
    role: str,
    model_config: ModelConfig,
) -> list[PromptType]

Convert a list of ScoreData into PromptType objects.

For plain text inputs, returns the string directly. For multimodal inputs (list of content parts), parses them into a :class:TextPrompt with attached multi_modal_data / multi_modal_uuids.

This is used by late-interaction scoring where each query/document is encoded independently.

Source code in vllm/entrypoints/pooling/score/utils.py

def score_data_to_prompts(
    data_list: list[ScoreData],
    role: str,
    model_config: ModelConfig,
) -> list[PromptType]:
    """Convert a list of ScoreData into PromptType objects.

    For plain text inputs, returns the string directly.
    For multimodal inputs (list of content parts), parses them into
    a :class:`TextPrompt` with attached ``multi_modal_data`` /
    ``multi_modal_uuids``.

    This is used by late-interaction scoring where each query/document
    is encoded independently.
    """
    prompts: list[PromptType] = []
    for data in data_list:
        if isinstance(data, str):
            prompts.append(data)
        else:
            text, mm_data, mm_uuids = parse_score_data_single(data, role, model_config)
            prompt: TextPrompt = TextPrompt(prompt=text)
            if mm_data is not None:
                prompt["multi_modal_data"] = mm_data
            if mm_uuids is not None:
                prompt["multi_modal_uuids"] = mm_uuids
            prompts.append(prompt)
    return prompts

vllm.entrypoints.pooling.score.utils ¶

ScoreMultiModalParam ¶

content instance-attribute ¶

_ensure_str ¶

compress_token_type_ids ¶

compute_maxsim_score ¶

parse_score_data ¶

parse_score_data_single ¶

post_process_tokens ¶

score_data_to_prompts ¶

content `instance-attribute` ¶