Changed code to support older Python versions
This commit is contained in:
parent
eb92d2d36f
commit
582458cdd0
5027 changed files with 794942 additions and 4 deletions
|
|
@ -0,0 +1,287 @@
|
|||
from typing import TYPE_CHECKING
|
||||
|
||||
import sentry_sdk
|
||||
from sentry_sdk import consts
|
||||
from sentry_sdk.ai.monitoring import record_token_usage
|
||||
from sentry_sdk.ai.utils import (
|
||||
get_start_span_function,
|
||||
set_data_normalized,
|
||||
truncate_and_annotate_messages,
|
||||
)
|
||||
from sentry_sdk.consts import SPANDATA
|
||||
from sentry_sdk.integrations import DidNotEnable, Integration
|
||||
from sentry_sdk.scope import should_send_default_pii
|
||||
from sentry_sdk.utils import event_from_exception
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any, Dict
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import litellm # type: ignore[import-not-found]
|
||||
except ImportError:
|
||||
raise DidNotEnable("LiteLLM not installed")
|
||||
|
||||
|
||||
def _get_metadata_dict(kwargs):
|
||||
# type: (Dict[str, Any]) -> Dict[str, Any]
|
||||
"""Get the metadata dictionary from the kwargs."""
|
||||
litellm_params = kwargs.setdefault("litellm_params", {})
|
||||
|
||||
# we need this weird little dance, as metadata might be set but may be None initially
|
||||
metadata = litellm_params.get("metadata")
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
litellm_params["metadata"] = metadata
|
||||
return metadata
|
||||
|
||||
|
||||
def _input_callback(kwargs):
|
||||
# type: (Dict[str, Any]) -> None
|
||||
"""Handle the start of a request."""
|
||||
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
|
||||
|
||||
if integration is None:
|
||||
return
|
||||
|
||||
# Get key parameters
|
||||
full_model = kwargs.get("model", "")
|
||||
try:
|
||||
model, provider, _, _ = litellm.get_llm_provider(full_model)
|
||||
except Exception:
|
||||
model = full_model
|
||||
provider = "unknown"
|
||||
|
||||
call_type = kwargs.get("call_type", None)
|
||||
if call_type == "embedding":
|
||||
operation = "embeddings"
|
||||
else:
|
||||
operation = "chat"
|
||||
|
||||
# Start a new span/transaction
|
||||
span = get_start_span_function()(
|
||||
op=(
|
||||
consts.OP.GEN_AI_CHAT
|
||||
if operation == "chat"
|
||||
else consts.OP.GEN_AI_EMBEDDINGS
|
||||
),
|
||||
name=f"{operation} {model}",
|
||||
origin=LiteLLMIntegration.origin,
|
||||
)
|
||||
span.__enter__()
|
||||
|
||||
# Store span for later
|
||||
_get_metadata_dict(kwargs)["_sentry_span"] = span
|
||||
|
||||
# Set basic data
|
||||
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
|
||||
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
|
||||
|
||||
# Record input/messages if allowed
|
||||
if should_send_default_pii() and integration.include_prompts:
|
||||
if operation == "embeddings":
|
||||
# For embeddings, look for the 'input' parameter
|
||||
embedding_input = kwargs.get("input")
|
||||
if embedding_input:
|
||||
scope = sentry_sdk.get_current_scope()
|
||||
# Normalize to list format
|
||||
input_list = (
|
||||
embedding_input
|
||||
if isinstance(embedding_input, list)
|
||||
else [embedding_input]
|
||||
)
|
||||
messages_data = truncate_and_annotate_messages(input_list, span, scope)
|
||||
if messages_data is not None:
|
||||
set_data_normalized(
|
||||
span,
|
||||
SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
|
||||
messages_data,
|
||||
unpack=False,
|
||||
)
|
||||
else:
|
||||
# For chat, look for the 'messages' parameter
|
||||
messages = kwargs.get("messages", [])
|
||||
if messages:
|
||||
scope = sentry_sdk.get_current_scope()
|
||||
messages_data = truncate_and_annotate_messages(messages, span, scope)
|
||||
if messages_data is not None:
|
||||
set_data_normalized(
|
||||
span,
|
||||
SPANDATA.GEN_AI_REQUEST_MESSAGES,
|
||||
messages_data,
|
||||
unpack=False,
|
||||
)
|
||||
|
||||
# Record other parameters
|
||||
params = {
|
||||
"model": SPANDATA.GEN_AI_REQUEST_MODEL,
|
||||
"stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
|
||||
"max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
|
||||
"presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
||||
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
||||
"temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
|
||||
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
|
||||
}
|
||||
for key, attribute in params.items():
|
||||
value = kwargs.get(key)
|
||||
if value is not None:
|
||||
set_data_normalized(span, attribute, value)
|
||||
|
||||
# Record LiteLLM-specific parameters
|
||||
litellm_params = {
|
||||
"api_base": kwargs.get("api_base"),
|
||||
"api_version": kwargs.get("api_version"),
|
||||
"custom_llm_provider": kwargs.get("custom_llm_provider"),
|
||||
}
|
||||
for key, value in litellm_params.items():
|
||||
if value is not None:
|
||||
set_data_normalized(span, f"gen_ai.litellm.{key}", value)
|
||||
|
||||
|
||||
def _success_callback(kwargs, completion_response, start_time, end_time):
|
||||
# type: (Dict[str, Any], Any, datetime, datetime) -> None
|
||||
"""Handle successful completion."""
|
||||
|
||||
span = _get_metadata_dict(kwargs).get("_sentry_span")
|
||||
if span is None:
|
||||
return
|
||||
|
||||
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
|
||||
if integration is None:
|
||||
return
|
||||
|
||||
try:
|
||||
# Record model information
|
||||
if hasattr(completion_response, "model"):
|
||||
set_data_normalized(
|
||||
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
|
||||
)
|
||||
|
||||
# Record response content if allowed
|
||||
if should_send_default_pii() and integration.include_prompts:
|
||||
if hasattr(completion_response, "choices"):
|
||||
response_messages = []
|
||||
for choice in completion_response.choices:
|
||||
if hasattr(choice, "message"):
|
||||
if hasattr(choice.message, "model_dump"):
|
||||
response_messages.append(choice.message.model_dump())
|
||||
elif hasattr(choice.message, "dict"):
|
||||
response_messages.append(choice.message.dict())
|
||||
else:
|
||||
# Fallback for basic message objects
|
||||
msg = {}
|
||||
if hasattr(choice.message, "role"):
|
||||
msg["role"] = choice.message.role
|
||||
if hasattr(choice.message, "content"):
|
||||
msg["content"] = choice.message.content
|
||||
if hasattr(choice.message, "tool_calls"):
|
||||
msg["tool_calls"] = choice.message.tool_calls
|
||||
response_messages.append(msg)
|
||||
|
||||
if response_messages:
|
||||
set_data_normalized(
|
||||
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
|
||||
)
|
||||
|
||||
# Record token usage
|
||||
if hasattr(completion_response, "usage"):
|
||||
usage = completion_response.usage
|
||||
record_token_usage(
|
||||
span,
|
||||
input_tokens=getattr(usage, "prompt_tokens", None),
|
||||
output_tokens=getattr(usage, "completion_tokens", None),
|
||||
total_tokens=getattr(usage, "total_tokens", None),
|
||||
)
|
||||
|
||||
finally:
|
||||
# Always finish the span and clean up
|
||||
span.__exit__(None, None, None)
|
||||
|
||||
|
||||
def _failure_callback(kwargs, exception, start_time, end_time):
|
||||
# type: (Dict[str, Any], Exception, datetime, datetime) -> None
|
||||
"""Handle request failure."""
|
||||
span = _get_metadata_dict(kwargs).get("_sentry_span")
|
||||
if span is None:
|
||||
return
|
||||
|
||||
try:
|
||||
# Capture the exception
|
||||
event, hint = event_from_exception(
|
||||
exception,
|
||||
client_options=sentry_sdk.get_client().options,
|
||||
mechanism={"type": "litellm", "handled": False},
|
||||
)
|
||||
sentry_sdk.capture_event(event, hint=hint)
|
||||
finally:
|
||||
# Always finish the span and clean up
|
||||
span.__exit__(type(exception), exception, None)
|
||||
|
||||
|
||||
class LiteLLMIntegration(Integration):
|
||||
"""
|
||||
LiteLLM integration for Sentry.
|
||||
|
||||
This integration automatically captures LiteLLM API calls and sends them to Sentry
|
||||
for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
|
||||
supports, including OpenAI, Anthropic, Google, Cohere, and many others.
|
||||
|
||||
Features:
|
||||
- Automatic exception capture for all LiteLLM calls
|
||||
- Token usage tracking across all providers
|
||||
- Provider detection and attribution
|
||||
- Input/output message capture (configurable)
|
||||
- Streaming response support
|
||||
- Cost tracking integration
|
||||
|
||||
Usage:
|
||||
|
||||
```python
|
||||
import litellm
|
||||
import sentry_sdk
|
||||
|
||||
# Initialize Sentry with the LiteLLM integration
|
||||
sentry_sdk.init(
|
||||
dsn="your-dsn",
|
||||
send_default_pii=True
|
||||
integrations=[
|
||||
sentry_sdk.integrations.LiteLLMIntegration(
|
||||
include_prompts=True # Set to False to exclude message content
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# All LiteLLM calls will now be monitored
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
|
||||
Configuration:
|
||||
- include_prompts (bool): Whether to include prompts and responses in spans.
|
||||
Defaults to True. Set to False to exclude potentially sensitive data.
|
||||
"""
|
||||
|
||||
identifier = "litellm"
|
||||
origin = f"auto.ai.{identifier}"
|
||||
|
||||
def __init__(self, include_prompts=True):
|
||||
# type: (LiteLLMIntegration, bool) -> None
|
||||
self.include_prompts = include_prompts
|
||||
|
||||
@staticmethod
|
||||
def setup_once():
|
||||
# type: () -> None
|
||||
"""Set up LiteLLM callbacks for monitoring."""
|
||||
litellm.input_callback = litellm.input_callback or []
|
||||
if _input_callback not in litellm.input_callback:
|
||||
litellm.input_callback.append(_input_callback)
|
||||
|
||||
litellm.success_callback = litellm.success_callback or []
|
||||
if _success_callback not in litellm.success_callback:
|
||||
litellm.success_callback.append(_success_callback)
|
||||
|
||||
litellm.failure_callback = litellm.failure_callback or []
|
||||
if _failure_callback not in litellm.failure_callback:
|
||||
litellm.failure_callback.append(_failure_callback)
|
||||
Loading…
Add table
Add a link
Reference in a new issue