minecraftd/venv/lib/python3.11/site-packages/sentry_sdk/integrations/openai.py

from functools import wraps

import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import (
    set_data_normalized,
    normalize_message_roles,
    truncate_and_annotate_messages,
)
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.tracing_utils import set_span_errored
from sentry_sdk.utils import (
    capture_internal_exceptions,
    event_from_exception,
    safe_serialize,
)

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
    from sentry_sdk.tracing import Span

try:
    try:
        from openai import NotGiven
    except ImportError:
        NotGiven = None

    try:
        from openai import Omit
    except ImportError:
        Omit = None

    from openai.resources.chat.completions import Completions, AsyncCompletions
    from openai.resources import Embeddings, AsyncEmbeddings

    if TYPE_CHECKING:
        from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
except ImportError:
    raise DidNotEnable("OpenAI not installed")

RESPONSES_API_ENABLED = True
try:
    # responses API support was introduced in v1.66.0
    from openai.resources.responses import Responses, AsyncResponses
    from openai.types.responses.response_completed_event import ResponseCompletedEvent
except ImportError:
    RESPONSES_API_ENABLED = False


class OpenAIIntegration(Integration):
    identifier = "openai"
    origin = f"auto.ai.{identifier}"

    def __init__(self, include_prompts=True, tiktoken_encoding_name=None):
        # type: (OpenAIIntegration, bool, Optional[str]) -> None
        self.include_prompts = include_prompts

        self.tiktoken_encoding = None
        if tiktoken_encoding_name is not None:
            import tiktoken  # type: ignore

            self.tiktoken_encoding = tiktoken.get_encoding(tiktoken_encoding_name)

    @staticmethod
    def setup_once():
        # type: () -> None
        Completions.create = _wrap_chat_completion_create(Completions.create)
        AsyncCompletions.create = _wrap_async_chat_completion_create(
            AsyncCompletions.create
        )

        Embeddings.create = _wrap_embeddings_create(Embeddings.create)
        AsyncEmbeddings.create = _wrap_async_embeddings_create(AsyncEmbeddings.create)

        if RESPONSES_API_ENABLED:
            Responses.create = _wrap_responses_create(Responses.create)
            AsyncResponses.create = _wrap_async_responses_create(AsyncResponses.create)

    def count_tokens(self, s):
        # type: (OpenAIIntegration, str) -> int
        if self.tiktoken_encoding is not None:
            return len(self.tiktoken_encoding.encode_ordinary(s))
        return 0


def _capture_exception(exc, manual_span_cleanup=True):
    # type: (Any, bool) -> None
    # Close an eventually open span
    # We need to do this by hand because we are not using the start_span context manager
    current_span = sentry_sdk.get_current_span()
    set_span_errored(current_span)

    if manual_span_cleanup and current_span is not None:
        current_span.__exit__(None, None, None)

    event, hint = event_from_exception(
        exc,
        client_options=sentry_sdk.get_client().options,
        mechanism={"type": "openai", "handled": False},
    )
    sentry_sdk.capture_event(event, hint=hint)


def _get_usage(usage, names):
    # type: (Any, List[str]) -> int
    for name in names:
        if hasattr(usage, name) and isinstance(getattr(usage, name), int):
            return getattr(usage, name)
    return 0


def _calculate_token_usage(
    messages, response, span, streaming_message_responses, count_tokens
):
    # type: (Optional[Iterable[ChatCompletionMessageParam]], Any, Span, Optional[List[str]], Callable[..., Any]) -> None
    input_tokens = 0  # type: Optional[int]
    input_tokens_cached = 0  # type: Optional[int]
    output_tokens = 0  # type: Optional[int]
    output_tokens_reasoning = 0  # type: Optional[int]
    total_tokens = 0  # type: Optional[int]

    if hasattr(response, "usage"):
        input_tokens = _get_usage(response.usage, ["input_tokens", "prompt_tokens"])
        if hasattr(response.usage, "input_tokens_details"):
            input_tokens_cached = _get_usage(
                response.usage.input_tokens_details, ["cached_tokens"]
            )

        output_tokens = _get_usage(
            response.usage, ["output_tokens", "completion_tokens"]
        )
        if hasattr(response.usage, "output_tokens_details"):
            output_tokens_reasoning = _get_usage(
                response.usage.output_tokens_details, ["reasoning_tokens"]
            )

        total_tokens = _get_usage(response.usage, ["total_tokens"])

    # Manually count tokens
    if input_tokens == 0:
        for message in messages or []:
            if isinstance(message, dict) and "content" in message:
                input_tokens += count_tokens(message["content"])
            elif isinstance(message, str):
                input_tokens += count_tokens(message)

    if output_tokens == 0:
        if streaming_message_responses is not None:
            for message in streaming_message_responses:
                output_tokens += count_tokens(message)
        elif hasattr(response, "choices"):
            for choice in response.choices:
                if hasattr(choice, "message"):
                    output_tokens += count_tokens(choice.message)

    # Do not set token data if it is 0
    input_tokens = input_tokens or None
    input_tokens_cached = input_tokens_cached or None
    output_tokens = output_tokens or None
    output_tokens_reasoning = output_tokens_reasoning or None
    total_tokens = total_tokens or None

    record_token_usage(
        span,
        input_tokens=input_tokens,
        input_tokens_cached=input_tokens_cached,
        output_tokens=output_tokens,
        output_tokens_reasoning=output_tokens_reasoning,
        total_tokens=total_tokens,
    )


def _set_input_data(span, kwargs, operation, integration):
    # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None
    # Input messages (the prompt or data sent to the model)
    messages = kwargs.get("messages")
    if messages is None:
        messages = kwargs.get("input")

    if isinstance(messages, str):
        messages = [messages]

    if (
        messages is not None
        and len(messages) > 0
        and should_send_default_pii()
        and integration.include_prompts
    ):
        normalized_messages = normalize_message_roles(messages)
        scope = sentry_sdk.get_current_scope()
        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
        if messages_data is not None:
            # Use appropriate field based on operation type
            if operation == "embeddings":
                set_data_normalized(
                    span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False
                )
            else:
                set_data_normalized(
                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
                )

    # Input attributes: Common
    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)

    # Input attributes: Optional
    kwargs_keys_to_attributes = {
        "model": SPANDATA.GEN_AI_REQUEST_MODEL,
        "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
        "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
        "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
        "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
        "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
        "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
    }
    for key, attribute in kwargs_keys_to_attributes.items():
        value = kwargs.get(key)

        if value is not None and _is_given(value):
            set_data_normalized(span, attribute, value)

    # Input attributes: Tools
    tools = kwargs.get("tools")
    if tools is not None and _is_given(tools) and len(tools) > 0:
        set_data_normalized(
            span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
        )


def _set_output_data(span, response, kwargs, integration, finish_span=True):
    # type: (Span, Any, dict[str, Any], OpenAIIntegration, bool) -> None
    if hasattr(response, "model"):
        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)

    # Input messages (the prompt or data sent to the model)
    # used for the token usage calculation
    messages = kwargs.get("messages")
    if messages is None:
        messages = kwargs.get("input")

    if messages is not None and isinstance(messages, str):
        messages = [messages]

    if hasattr(response, "choices"):
        if should_send_default_pii() and integration.include_prompts:
            response_text = [
                choice.message.model_dump()
                for choice in response.choices
                if choice.message is not None
            ]
            if len(response_text) > 0:
                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text)

        _calculate_token_usage(messages, response, span, None, integration.count_tokens)

        if finish_span:
            span.__exit__(None, None, None)

    elif hasattr(response, "output"):
        if should_send_default_pii() and integration.include_prompts:
            output_messages = {
                "response": [],
                "tool": [],
            }  # type: (dict[str, list[Any]])

            for output in response.output:
                if output.type == "function_call":
                    output_messages["tool"].append(output.dict())
                elif output.type == "message":
                    for output_message in output.content:
                        try:
                            output_messages["response"].append(output_message.text)
                        except AttributeError:
                            # Unknown output message type, just return the json
                            output_messages["response"].append(output_message.dict())

            if len(output_messages["tool"]) > 0:
                set_data_normalized(
                    span,
                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
                    output_messages["tool"],
                    unpack=False,
                )

            if len(output_messages["response"]) > 0:
                set_data_normalized(
                    span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
                )

        _calculate_token_usage(messages, response, span, None, integration.count_tokens)

        if finish_span:
            span.__exit__(None, None, None)

    elif hasattr(response, "_iterator"):
        data_buf: list[list[str]] = []  # one for each choice

        old_iterator = response._iterator

        def new_iterator():
            # type: () -> Iterator[ChatCompletionChunk]
            count_tokens_manually = True
            for x in old_iterator:
                with capture_internal_exceptions():
                    # OpenAI chat completion API
                    if hasattr(x, "choices"):
                        choice_index = 0
                        for choice in x.choices:
                            if hasattr(choice, "delta") and hasattr(
                                choice.delta, "content"
                            ):
                                content = choice.delta.content
                                if len(data_buf) <= choice_index:
                                    data_buf.append([])
                                data_buf[choice_index].append(content or "")
                            choice_index += 1

                    # OpenAI responses API
                    elif hasattr(x, "delta"):
                        if len(data_buf) == 0:
                            data_buf.append([])
                        data_buf[0].append(x.delta or "")

                    # OpenAI responses API end of streaming response
                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
                        _calculate_token_usage(
                            messages,
                            x.response,
                            span,
                            None,
                            integration.count_tokens,
                        )
                        count_tokens_manually = False

                yield x

            with capture_internal_exceptions():
                if len(data_buf) > 0:
                    all_responses = ["".join(chunk) for chunk in data_buf]
                    if should_send_default_pii() and integration.include_prompts:
                        set_data_normalized(
                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                        )
                    if count_tokens_manually:
                        _calculate_token_usage(
                            messages,
                            response,
                            span,
                            all_responses,
                            integration.count_tokens,
                        )

            if finish_span:
                span.__exit__(None, None, None)

        async def new_iterator_async():
            # type: () -> AsyncIterator[ChatCompletionChunk]
            count_tokens_manually = True
            async for x in old_iterator:
                with capture_internal_exceptions():
                    # OpenAI chat completion API
                    if hasattr(x, "choices"):
                        choice_index = 0
                        for choice in x.choices:
                            if hasattr(choice, "delta") and hasattr(
                                choice.delta, "content"
                            ):
                                content = choice.delta.content
                                if len(data_buf) <= choice_index:
                                    data_buf.append([])
                                data_buf[choice_index].append(content or "")
                            choice_index += 1

                    # OpenAI responses API
                    elif hasattr(x, "delta"):
                        if len(data_buf) == 0:
                            data_buf.append([])
                        data_buf[0].append(x.delta or "")

                    # OpenAI responses API end of streaming response
                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
                        _calculate_token_usage(
                            messages,
                            x.response,
                            span,
                            None,
                            integration.count_tokens,
                        )
                        count_tokens_manually = False

                yield x

            with capture_internal_exceptions():
                if len(data_buf) > 0:
                    all_responses = ["".join(chunk) for chunk in data_buf]
                    if should_send_default_pii() and integration.include_prompts:
                        set_data_normalized(
                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                        )
                    if count_tokens_manually:
                        _calculate_token_usage(
                            messages,
                            response,
                            span,
                            all_responses,
                            integration.count_tokens,
                        )
            if finish_span:
                span.__exit__(None, None, None)

        if str(type(response._iterator)) == "<class 'async_generator'>":
            response._iterator = new_iterator_async()
        else:
            response._iterator = new_iterator()
    else:
        _calculate_token_usage(messages, response, span, None, integration.count_tokens)
        if finish_span:
            span.__exit__(None, None, None)


def _new_chat_completion_common(f, *args, **kwargs):
    # type: (Any, Any, Any) -> Any
    integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
    if integration is None:
        return f(*args, **kwargs)

    if "messages" not in kwargs:
        # invalid call (in all versions of openai), let it return error
        return f(*args, **kwargs)

    try:
        iter(kwargs["messages"])
    except TypeError:
        # invalid call (in all versions), messages must be iterable
        return f(*args, **kwargs)

    model = kwargs.get("model")
    operation = "chat"

    span = sentry_sdk.start_span(
        op=consts.OP.GEN_AI_CHAT,
        name=f"{operation} {model}",
        origin=OpenAIIntegration.origin,
    )
    span.__enter__()

    _set_input_data(span, kwargs, operation, integration)

    response = yield f, args, kwargs

    _set_output_data(span, response, kwargs, integration, finish_span=True)

    return response


def _wrap_chat_completion_create(f):
    # type: (Callable[..., Any]) -> Callable[..., Any]
    def _execute_sync(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_chat_completion_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return e.value

        try:
            try:
                result = f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    def _sentry_patched_create_sync(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None or "messages" not in kwargs:
            # no "messages" means invalid call (in all versions of openai), let it return error
            return f(*args, **kwargs)

        return _execute_sync(f, *args, **kwargs)

    return _sentry_patched_create_sync


def _wrap_async_chat_completion_create(f):
    # type: (Callable[..., Any]) -> Callable[..., Any]
    async def _execute_async(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_chat_completion_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return await e.value

        try:
            try:
                result = await f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    async def _sentry_patched_create_async(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None or "messages" not in kwargs:
            # no "messages" means invalid call (in all versions of openai), let it return error
            return await f(*args, **kwargs)

        return await _execute_async(f, *args, **kwargs)

    return _sentry_patched_create_async


def _new_embeddings_create_common(f, *args, **kwargs):
    # type: (Any, Any, Any) -> Any
    integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
    if integration is None:
        return f(*args, **kwargs)

    model = kwargs.get("model")
    operation = "embeddings"

    with sentry_sdk.start_span(
        op=consts.OP.GEN_AI_EMBEDDINGS,
        name=f"{operation} {model}",
        origin=OpenAIIntegration.origin,
    ) as span:
        _set_input_data(span, kwargs, operation, integration)

        response = yield f, args, kwargs

        _set_output_data(span, response, kwargs, integration, finish_span=False)

        return response


def _wrap_embeddings_create(f):
    # type: (Any) -> Any
    def _execute_sync(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_embeddings_create_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return e.value

        try:
            try:
                result = f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e, manual_span_cleanup=False)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    def _sentry_patched_create_sync(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None:
            return f(*args, **kwargs)

        return _execute_sync(f, *args, **kwargs)

    return _sentry_patched_create_sync


def _wrap_async_embeddings_create(f):
    # type: (Any) -> Any
    async def _execute_async(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_embeddings_create_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return await e.value

        try:
            try:
                result = await f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e, manual_span_cleanup=False)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    async def _sentry_patched_create_async(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None:
            return await f(*args, **kwargs)

        return await _execute_async(f, *args, **kwargs)

    return _sentry_patched_create_async


def _new_responses_create_common(f, *args, **kwargs):
    # type: (Any, Any, Any) -> Any
    integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
    if integration is None:
        return f(*args, **kwargs)

    model = kwargs.get("model")
    operation = "responses"

    span = sentry_sdk.start_span(
        op=consts.OP.GEN_AI_RESPONSES,
        name=f"{operation} {model}",
        origin=OpenAIIntegration.origin,
    )
    span.__enter__()

    _set_input_data(span, kwargs, operation, integration)

    response = yield f, args, kwargs

    _set_output_data(span, response, kwargs, integration, finish_span=True)

    return response


def _wrap_responses_create(f):
    # type: (Any) -> Any
    def _execute_sync(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_responses_create_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return e.value

        try:
            try:
                result = f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    def _sentry_patched_create_sync(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None:
            return f(*args, **kwargs)

        return _execute_sync(f, *args, **kwargs)

    return _sentry_patched_create_sync


def _wrap_async_responses_create(f):
    # type: (Any) -> Any
    async def _execute_async(f, *args, **kwargs):
        # type: (Any, Any, Any) -> Any
        gen = _new_responses_create_common(f, *args, **kwargs)

        try:
            f, args, kwargs = next(gen)
        except StopIteration as e:
            return await e.value

        try:
            try:
                result = await f(*args, **kwargs)
            except Exception as e:
                _capture_exception(e)
                raise e from None

            return gen.send(result)
        except StopIteration as e:
            return e.value

    @wraps(f)
    async def _sentry_patched_responses_async(*args, **kwargs):
        # type: (Any, Any) -> Any
        integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
        if integration is None:
            return await f(*args, **kwargs)

        return await _execute_async(f, *args, **kwargs)

    return _sentry_patched_responses_async


def _is_given(obj):
    # type: (Any) -> bool
    """
    Check for givenness safely across different openai versions.
    """
    if NotGiven is not None and isinstance(obj, NotGiven):
        return False
    if Omit is not None and isinstance(obj, Omit):
        return False
    return True