Tracing Chatbots

Best practices for tracing chatbot and conversational AI applications.

Overview

Chatbots have unique observability needs: multi-turn conversations, session management, and user context tracking. This guide covers best practices for tracing chatbot applications.

Session-Based Tracing

Creating Sessions

Use session IDs to group conversation turns:

import { getObservability } from '@transactional/observability';
 
class ChatBot {
  private obs = getObservability();
  private sessionId: string;
  private userId: string;
  private messages: Message[] = [];
 
  constructor(userId: string) {
    this.userId = userId;
    this.sessionId = `chat-${userId}-${Date.now()}`;
  }
 
  async sendMessage(content: string): Promise<string> {
    // Add user message
    this.messages.push({ role: 'user', content });
 
    // Create trace for this turn
    const trace = this.obs.trace({
      name: 'chat-turn',
      sessionId: this.sessionId,
      userId: this.userId,
      input: {
        message: content,
        turnNumber: this.messages.filter(m => m.role === 'user').length,
        historyLength: this.messages.length,
      },
      metadata: {
        channel: 'web',
        platform: 'chat-widget',
      },
    });
 
    try {
      // Generate response
      const generation = this.obs.generation({
        name: 'generate-response',
        modelName: 'gpt-4o',
        input: { messages: this.messages },
      });
 
      const response = await this.callLLM();
 
      await generation.end({
        output: response,
        promptTokens: response.usage.prompt_tokens,
        completionTokens: response.usage.completion_tokens,
      });
 
      // Add assistant message
      this.messages.push({
        role: 'assistant',
        content: response.content,
      });
 
      await trace.end({
        output: {
          response: response.content,
          messageCount: this.messages.length,
        },
      });
 
      return response.content;
    } catch (error) {
      await trace.error(error as Error);
      throw error;
    }
  }
}

Session Lifecycle

Track session start and end:

class ChatSession {
  private sessionStartTrace: Trace;
 
  async startSession() {
    this.sessionStartTrace = this.obs.trace({
      name: 'session-start',
      sessionId: this.sessionId,
      userId: this.userId,
      input: { startTime: new Date().toISOString() },
    });
    await this.sessionStartTrace.end({});
  }
 
  async endSession(reason: 'user' | 'timeout' | 'error') {
    const trace = this.obs.trace({
      name: 'session-end',
      sessionId: this.sessionId,
      userId: this.userId,
      input: {
        reason,
        turnCount: this.turnCount,
        duration: Date.now() - this.startTime,
      },
    });
    await trace.end({});
  }
}

Context Management

Tracking Context Window

Monitor context usage:

const trace = this.obs.trace({
  name: 'chat-turn',
  sessionId: this.sessionId,
  metadata: {
    contextTokens: this.estimateTokens(this.messages),
    contextLimit: 128000,  // gpt-4o limit
    contextUtilization: this.estimateTokens(this.messages) / 128000,
  },
});

Context Truncation

Track when context is truncated:

async function prepareMessages(fullHistory: Message[]): Promise<Message[]> {
  const obs = getObservability();
 
  const maxTokens = 100000;  // Leave room for response
  let messages = [...fullHistory];
  let truncated = false;
 
  while (estimateTokens(messages) > maxTokens && messages.length > 2) {
    // Keep system message and last user message
    messages = [messages[0], ...messages.slice(2)];
    truncated = true;
  }
 
  if (truncated) {
    const span = obs.observation({
      type: 'SPAN',
      name: 'context-truncation',
      input: { originalLength: fullHistory.length },
    });
 
    await span.end({
      output: {
        finalLength: messages.length,
        removedMessages: fullHistory.length - messages.length,
      },
    });
  }
 
  return messages;
}

Intent Detection

Tracking Intents

Trace detected intents:

async function detectIntent(message: string): Promise<string> {
  const obs = getObservability();
 
  const span = obs.observation({
    type: 'SPAN',
    name: 'intent-detection',
    input: { message },
  });
 
  const intent = await classifyIntent(message);
 
  await span.end({
    output: {
      intent,
      confidence: intent.confidence,
    },
  });
 
  return intent.label;
}
 
async function handleMessage(message: string) {
  const trace = obs.trace({
    name: 'chat-turn',
    input: { message },
  });
 
  // Detect intent
  const intent = await detectIntent(message);
 
  // Route based on intent
  const generation = obs.generation({
    name: `handle-${intent}`,
    modelName: 'gpt-4o',
    metadata: { intent },
  });
 
  // ... handle message
}

Tool/Function Calls

Tracking Tool Usage

Monitor when tools are called:

async function handleToolCall(toolCall: ToolCall) {
  const obs = getObservability();
 
  const span = obs.observation({
    type: 'SPAN',
    name: `tool-${toolCall.name}`,
    input: {
      tool: toolCall.name,
      arguments: JSON.parse(toolCall.arguments),
    },
  });
 
  try {
    const result = await executeToolCall(toolCall);
 
    await span.end({
      output: {
        success: true,
        result,
      },
    });
 
    return result;
  } catch (error) {
    await span.end({
      output: {
        success: false,
        error: error.message,
      },
    });
    throw error;
  }
}

Multi-Step Tool Usage

Track tool chains:

async function processWithTools(messages: Message[]) {
  const obs = getObservability();
  const maxIterations = 5;
 
  for (let i = 0; i < maxIterations; i++) {
    const generation = obs.generation({
      name: 'tool-iteration',
      modelName: 'gpt-4o',
      metadata: { iteration: i },
      input: { messages },
    });
 
    const response = await openai.chat.completions.create({
      model: 'gpt-4o',
      messages,
      tools,
    });
 
    if (response.choices[0].finish_reason === 'tool_calls') {
      // Execute tool and continue
      const toolResults = await Promise.all(
        response.choices[0].message.tool_calls.map(handleToolCall)
      );
 
      await generation.end({
        output: {
          toolCalls: response.choices[0].message.tool_calls,
          results: toolResults,
        },
      });
 
      // Add tool results to messages and continue
      messages.push(response.choices[0].message);
      messages.push(...toolResults.map(r => ({
        role: 'tool',
        tool_call_id: r.id,
        content: JSON.stringify(r.result),
      })));
    } else {
      // Final response
      await generation.end({
        output: response.choices[0].message,
        promptTokens: response.usage?.prompt_tokens,
        completionTokens: response.usage?.completion_tokens,
      });
 
      return response.choices[0].message.content;
    }
  }
 
  throw new Error('Max tool iterations exceeded');
}

User Context

Tracking User State

Include user context in traces:

const trace = obs.trace({
  name: 'chat-turn',
  sessionId,
  userId,
  metadata: {
    userTier: user.subscriptionTier,
    userSince: user.createdAt,
    previousSessions: user.sessionCount,
    preferences: user.chatPreferences,
  },
});

User Journey Tracking

Track user journey across sessions:

// First message of session
if (isFirstMessage) {
  const trace = obs.trace({
    name: 'session-start',
    sessionId,
    userId,
    metadata: {
      entryPoint: 'chat-widget',
      referrer: request.headers.referer,
      device: request.headers['user-agent'],
    },
  });
}

Conversation Quality

Quality Metrics

Track quality signals:

await trace.end({
  output: response,
  scores: {
    responseLength: response.length,
    turnCount: this.turnCount,
    userSatisfaction: await this.getUserRating(),
    intentResolved: this.isIntentResolved(),
  },
});

Feedback Collection

Integrate feedback:

async function collectFeedback(traceId: string, rating: number) {
  const obs = getObservability();
 
  await obs.feedback({
    traceId,
    type: 'rating',
    value: rating,
    scale: { min: 1, max: 5 },
  });
}

Best Practices

1. Always Use Session IDs

// Good - all turns linked
trace({ sessionId: 'chat-abc123' });
 
// Bad - orphaned traces
trace({ name: 'chat' });  // No sessionId

2. Track Conversation State

metadata: {
  turnNumber: 5,
  historyTokens: 2500,
  activeIntent: 'booking',
  pendingAction: 'confirm_date',
}

3. Handle Errors Gracefully

try {
  const response = await chat();
  await trace.end({ output: response });
} catch (error) {
  await trace.error(error);
  // Return friendly error message to user
  return "I'm sorry, I encountered an issue. Please try again.";
}

4. Monitor Session Health

Set up alerts for:

  • Long sessions (> 20 turns)
  • High error rates
  • Low user satisfaction
  • Excessive token usage

Next Steps