PaperDebugger · wjiayis · Mar 22, 2026 · Mar 22, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/internal/api/chat/create_conversation_message_stream_v2.go b/internal/api/chat/create_conversation_message_stream_v2.go
@@ -313,7 +313,33 @@ func (s *ChatServerV2) CreateConversationMessageStream(
 		}
 	}
 
-	openaiChatHistory, inappChatHistory, err := s.aiClientV2.ChatCompletionStreamV2(ctx, stream, conversation.ID.Hex(), modelSlug, conversation.OpenaiChatHistoryCompletion, llmProvider)
+	// Usage is the same as ChatCompletion, just passing the stream parameter
+
+	if customModel == nil {
+		// User did not specify API key for this model
+		llmProvider = &models.LLMProviderConfig{
+			APIKey:        "",
+			IsCustomModel: false,
+		}
+	} else {
+		customModel.BaseUrl = strings.ToLower(customModel.BaseUrl)
+
+		if strings.Contains(customModel.BaseUrl, "paperdebugger.com") {
+			customModel.BaseUrl = ""
+		}
+		if !strings.HasPrefix(customModel.BaseUrl, "https://") {
+			customModel.BaseUrl = strings.Replace(customModel.BaseUrl, "http://", "", 1)
+			customModel.BaseUrl = "https://" + customModel.BaseUrl
+		}
+
+		llmProvider = &models.LLMProviderConfig{
+			APIKey:        customModel.APIKey,
+			Endpoint:      customModel.BaseUrl,
+			IsCustomModel: true,
+		}
+	}
+
+	openaiChatHistory, inappChatHistory, _, err := s.aiClientV2.ChatCompletionStreamV2(ctx, stream, conversation.UserID, conversation.ProjectID, conversation.ID.Hex(), modelSlug, conversation.OpenaiChatHistoryCompletion, llmProvider)
 	if err != nil {
 		return s.sendStreamError(stream, err)
 	}
@@ -339,7 +365,7 @@ func (s *ChatServerV2) CreateConversationMessageStream(
 			for i, bsonMsg := range conversation.InappChatHistory {
 				protoMessages[i] = mapper.BSONToChatMessageV2(bsonMsg)
 			}
-			title, err := s.aiClientV2.GetConversationTitleV2(ctx, protoMessages, llmProvider, modelSlug)
+			title, err := s.aiClientV2.GetConversationTitleV2(ctx, conversation.UserID, conversation.ProjectID, protoMessages, llmProvider, modelSlug)
 			if err != nil {
 				s.logger.Error("Failed to get conversation title", "error", err, "conversationID", conversation.ID.Hex())
 				return

diff --git a/internal/models/usage.go b/internal/models/usage.go
@@ -0,0 +1,71 @@
+package models
+
+import (
+	"time"
+
+	"go.mongodb.org/mongo-driver/v2/bson"
+)
+
+// HourlyUsage tracks cost per user, per project, per hour.
+// Each document represents one hour bucket of usage.
+type HourlyUsage struct {
+	ID          bson.ObjectID `bson:"_id"`
+	UserID      bson.ObjectID `bson:"user_id"`
+	ProjectID   string        `bson:"project_id"`
+	HourBucket  bson.DateTime `bson:"hour_bucket"`  // Timestamp truncated to the hour
+	SuccessCost float64       `bson:"success_cost"` // Cost in USD for successful requests
+	FailedCost  float64       `bson:"failed_cost"`  // Cost in USD for failed requests
+	UpdatedAt   bson.DateTime `bson:"updated_at"`
+}
+
+func (u HourlyUsage) CollectionName() string {
+	return "hourly_usages"
+}
+
+// WeeklyUsage tracks cost per user, per project, per week.
+// Each document represents one week bucket of usage.
+type WeeklyUsage struct {
+	ID          bson.ObjectID `bson:"_id"`
+	UserID      bson.ObjectID `bson:"user_id"`
+	ProjectID   string        `bson:"project_id"`
+	WeekBucket  bson.DateTime `bson:"week_bucket"`  // Timestamp truncated to the week (Monday)
+	SuccessCost float64       `bson:"success_cost"` // Cost in USD for successful requests
+	FailedCost  float64       `bson:"failed_cost"`  // Cost in USD for failed requests
+	UpdatedAt   bson.DateTime `bson:"updated_at"`
+}
+
+func (u WeeklyUsage) CollectionName() string {
+	return "weekly_usages"
+}
+
+// LifetimeUsage tracks total cost per user, per project, across all time.
+// Each document represents the cumulative usage for a user-project pair.
+type LifetimeUsage struct {
+	ID          bson.ObjectID `bson:"_id"`
+	UserID      bson.ObjectID `bson:"user_id"`
+	ProjectID   string        `bson:"project_id"`
+	SuccessCost float64       `bson:"success_cost"` // Total cost in USD for successful requests
+	FailedCost  float64       `bson:"failed_cost"`  // Total cost in USD for failed requests
+	UpdatedAt   bson.DateTime `bson:"updated_at"`
+}
+
+func (u LifetimeUsage) CollectionName() string {
+	return "lifetime_usages"
+}
+
+// TruncateToHour truncates a time to the start of its hour.
+func TruncateToHour(t time.Time) time.Time {
+	return t.Truncate(time.Hour)
+}
+
+// TruncateToWeek truncates a time to the start of its week (Monday 00:00:00 UTC).
+func TruncateToWeek(t time.Time) time.Time {
+	t = t.UTC()
+	weekday := int(t.Weekday())
+	if weekday == 0 {
+		weekday = 7 // Sunday becomes 7
+	}
+	// Subtract days to get to Monday
+	monday := t.AddDate(0, 0, -(weekday - 1))
+	return time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, time.UTC)
+}
diff --git a/internal/services/toolkit/client/client_v2.go b/internal/services/toolkit/client/client_v2.go
@@ -20,6 +20,7 @@ type AIClientV2 struct {
 
 	reverseCommentService *services.ReverseCommentService
 	projectService        *services.ProjectService
+	usageService          *services.UsageService
 	cfg                   *cfg.Cfg
 	logger                *logger.Logger
 }
@@ -62,6 +63,7 @@ func NewAIClientV2(
 
 	reverseCommentService *services.ReverseCommentService,
 	projectService *services.ProjectService,
+	usageService *services.UsageService,
 	cfg *cfg.Cfg,
 	logger *logger.Logger,
 ) *AIClientV2 {
@@ -109,6 +111,7 @@ func NewAIClientV2(
 
 		reverseCommentService: reverseCommentService,
 		projectService:        projectService,
+		usageService:          usageService,
 		cfg:                   cfg,
 		logger:                logger,
 	}

diff --git a/internal/services/toolkit/client/completion_v2.go b/internal/services/toolkit/client/completion_v2.go
@@ -6,11 +6,19 @@ import (
 	"paperdebugger/internal/models"
 	"paperdebugger/internal/services/toolkit/handler"
 	chatv2 "paperdebugger/pkg/gen/api/chat/v2"
+	"strconv"
 	"strings"
+	"time"
 
 	"github.com/openai/openai-go/v3"
+	"go.mongodb.org/mongo-driver/v2/bson"
 )
 
+// UsageCost holds cost information from a completion.
+type UsageCost struct {
+	Cost float64
+}
+
 // define []openai.ChatCompletionMessageParamUnion as OpenAIChatHistory
 
 // ChatCompletion orchestrates a chat completion process with a language model (e.g., GPT), handling tool calls and message history management.
@@ -24,13 +32,14 @@ import (
 // Returns:
 //  1. The full chat history sent to the language model (including any tool call results).
 //  2. The incremental chat history visible to the user (including tool call results and assistant responses).
-//  3. An error, if any occurred during the process.
-func (a *AIClientV2) ChatCompletionV2(ctx context.Context, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, error) {
-	openaiChatHistory, inappChatHistory, err := a.ChatCompletionStreamV2(ctx, nil, "", modelSlug, messages, llmProvider)
+//  3. Cost information (in USD).
+//  4. An error, if any occurred during the process.
+func (a *AIClientV2) ChatCompletionV2(ctx context.Context, userID bson.ObjectID, projectID string, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, UsageCost, error) {
+	openaiChatHistory, inappChatHistory, usage, err := a.ChatCompletionStreamV2(ctx, nil, userID, projectID, "", modelSlug, messages, llmProvider)
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, usage, err
 	}
-	return openaiChatHistory, inappChatHistory, nil
+	return openaiChatHistory, inappChatHistory, usage, nil
 }
 
 // ChatCompletionStream orchestrates a streaming chat completion process with a language model (e.g., GPT), handling tool calls, message history management, and real-time streaming of responses to the client.
@@ -46,17 +55,20 @@ func (a *AIClientV2) ChatCompletionV2(ctx context.Context, modelSlug string, mes
 // Returns: (same as ChatCompletion)
 //  1. The full chat history sent to the language model (including any tool call results).
 //  2. The incremental chat history visible to the user (including tool call results and assistant responses).
-//  3. An error, if any occurred during the process. (However, in the streaming mode, the error is not returned, but sending by callbackStream)
+//  3. Cost information (in USD, accumulated across all calls).
+//  4. An error, if any occurred during the process. (However, in the streaming mode, the error is not returned, but sending by callbackStream)
 //
 // This function works as follows: (same as ChatCompletion)
 //   - It initializes the chat history for the language model and the user, and sets up a stream handler for real-time updates.
 //   - It repeatedly sends the current chat history to the language model, receives streaming responses, and forwards them to the client as they arrive.
 //   - If tool calls are required, it handles them and appends the results to the chat history, then continues the loop.
 //   - If no tool calls are needed, it appends the assistant's response and exits the loop.
-//   - Finally, it returns the updated chat histories and any error encountered.
-func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream chatv2.ChatService_CreateConversationMessageStreamServer, conversationId string, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, error) {
+//   - Finally, it returns the updated chat histories, accumulated cost, and any error encountered.
+func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream chatv2.ChatService_CreateConversationMessageStreamServer, userID bson.ObjectID, projectID string, conversationId string, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, UsageCost, error) {
 	openaiChatHistory := messages
 	inappChatHistory := AppChatHistory{}
+	usage := UsageCost{}
+	success := false // Track whether the request completed successfully
 
 	streamHandler := handler.NewStreamHandlerV2(callbackStream, conversationId, modelSlug)
 
@@ -65,6 +77,19 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		streamHandler.SendFinalization()
 	}()
 
+	// Track usage on all exit paths (success or error) to prevent abuse
+	// Only track if userID is provided and user is not using their own API key (BYOK)
+	defer func() {
+		if !userID.IsZero() && !llmProvider.IsCustomModel && usage.Cost > 0 {
+			// Use a detached context since the request context may be canceled
+			trackCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer cancel()
+			if err := a.usageService.TrackUsage(trackCtx, userID, projectID, usage.Cost, success); err != nil {
+				a.logger.Error("Error while tracking usage", "error", err)
+			}
+		}
+	}()
+
 	oaiClient := a.GetOpenAIClient(llmProvider)
 	params := getDefaultParamsV2(modelSlug, a.toolCallHandler.Registry, llmProvider.IsCustomModel)
 
@@ -77,6 +102,7 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		answer_content := ""
 		answer_content_id := ""
 		has_sent_part_begin := false
+		has_finished := false
 		tool_info := map[int]map[string]string{}
 		toolCalls := []openai.FinishedChatCompletionToolCall{}
 		handleReasoning := func(raw string) (string, bool) {
@@ -92,12 +118,18 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		}
 
 		for stream.Next() {
-			// time.Sleep(5000 * time.Millisecond) // DEBUG POINT: change this to test in a slow mode
 			chunk := stream.Current()
 
+			// Capture cost from any chunk that has usage data (OpenRouter sends usage in a separate chunk after FinishReason)
+			if chunk.Usage.PromptTokens > 0 || chunk.Usage.CompletionTokens > 0 {
+				if costField, ok := chunk.Usage.JSON.ExtraFields["cost"]; ok {
+					if cost, err := strconv.ParseFloat(costField.Raw(), 64); err == nil {
+						usage.Cost += cost
+					}
+				}
+			}
+
 			if len(chunk.Choices) == 0 {
-				// Handle usage information
-				// fmt.Printf("Usage: %+v\n", chunk.Usage)
 				continue
 			}
 
@@ -180,17 +212,15 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 				}
 			}
 
-			if chunk.Choices[0].FinishReason != "" {
-				// fmt.Printf("FinishReason: %s\n", chunk.Choices[0].FinishReason)
-				// answer_content += chunk.Choices[0].Delta.Content
-				// fmt.Printf("answer_content: %s\n", answer_content)
+			if chunk.Choices[0].FinishReason != "" && !has_finished {
 				streamHandler.HandleTextDoneItem(chunk, answer_content, reasoning_content)
-				break
+				has_finished = true
+				// Don't break - continue reading to capture the usage chunk that comes after
 			}
 		}
 
 		if err := stream.Err(); err != nil {
-			return nil, nil, err
+			return nil, nil, usage, err
 		}
 
 		if answer_content != "" {
@@ -200,7 +230,7 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		// Execute the calls (if any), return incremental data
 		openaiToolHistory, inappToolHistory, err := a.toolCallHandler.HandleToolCallsV2(ctx, toolCalls, streamHandler)
 		if err != nil {
-			return nil, nil, err
+			return nil, nil, usage, err
 		}
 
 		// // Record the tool call results
@@ -213,5 +243,6 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		}
 	}
 
-	return openaiChatHistory, inappChatHistory, nil
+	success = true
+	return openaiChatHistory, inappChatHistory, usage, nil
 }
diff --git a/internal/services/toolkit/client/get_citation_keys.go b/internal/services/toolkit/client/get_citation_keys.go
@@ -241,7 +241,7 @@ func (a *AIClientV2) GetCitationKeys(ctx context.Context, sentence string, userI
 	// Bibliography is placed at the start of the prompt to leverage prompt caching
 	message := fmt.Sprintf("Bibliography: %s\nSentence: %s\nBased on the sentence and bibliography, suggest only the most relevant citation keys separated by commas with no spaces (e.g. key1,key2). Be selective and only include citations that are directly relevant. Avoid suggesting more than 3 citations. If no relevant citations are found, return '%s'.", bibliography, sentence, emptyCitation)
 
-	_, resp, err := a.ChatCompletionV2(ctx, "gpt-5.2", OpenAIChatHistory{
+	_, resp, _, err := a.ChatCompletionV2(ctx, userId, projectId, "gpt-5.2", OpenAIChatHistory{
 		openai.SystemMessage("You are a helpful assistant that suggests relevant citation keys."),
 		openai.UserMessage(message),
 	}, llmProvider)

diff --git a/internal/services/toolkit/client/get_citation_keys_test.go b/internal/services/toolkit/client/get_citation_keys_test.go
@@ -25,10 +25,12 @@ func setupTestClient(t *testing.T) (*client.AIClientV2, *services.ProjectService
 	}
 
 	projectService := services.NewProjectService(dbInstance, cfg.GetCfg(), logger.GetLogger())
+	usageService := services.NewUsageService(dbInstance, cfg.GetCfg(), logger.GetLogger())
 	aiClient := client.NewAIClientV2(
 		dbInstance,
 		&services.ReverseCommentService{},
 		projectService,
+		usageService,
 		cfg.GetCfg(),
 		logger.GetLogger(),
 	)

diff --git a/internal/services/toolkit/client/get_conversation_title_v2.go b/internal/services/toolkit/client/get_conversation_title_v2.go
@@ -11,9 +11,10 @@ import (
 
 	"github.com/openai/openai-go/v3"
 	"github.com/samber/lo"
+	"go.mongodb.org/mongo-driver/v2/bson"
 )
 
-func (a *AIClientV2) GetConversationTitleV2(ctx context.Context, inappChatHistory []*chatv2.Message, llmProvider *models.LLMProviderConfig, modelSlug string) (string, error) {
+func (a *AIClientV2) GetConversationTitleV2(ctx context.Context, userID bson.ObjectID, projectID string, inappChatHistory []*chatv2.Message, llmProvider *models.LLMProviderConfig, modelSlug string) (string, error) {
 	messages := lo.Map(inappChatHistory, func(message *chatv2.Message, _ int) string {
 		if _, ok := message.Payload.MessageType.(*chatv2.MessagePayload_Assistant); ok {
 			return fmt.Sprintf("Assistant: %s", message.Payload.GetAssistant().GetContent())
@@ -35,7 +36,7 @@ func (a *AIClientV2) GetConversationTitleV2(ctx context.Context, inappChatHistor
 		modelToUse = modelSlug
 	}
 
-	_, resp, err := a.ChatCompletionV2(ctx, modelToUse, OpenAIChatHistory{
+	_, resp, _, err := a.ChatCompletionV2(ctx, userID, projectID, modelToUse, OpenAIChatHistory{
 		openai.SystemMessage("You are a helpful assistant that generates a title for a conversation."),
 		openai.UserMessage(message),
 	}, llmProvider)

diff --git a/internal/services/toolkit/client/utils_v2.go b/internal/services/toolkit/client/utils_v2.go
@@ -86,6 +86,9 @@ func getDefaultParamsV2(modelSlug string, toolRegistry *registry.ToolRegistryV2,
 				Tools:               toolRegistry.GetTools(),
 				ParallelToolCalls:   openaiv3.Bool(true),
 				Store:               openaiv3.Bool(false),
+				StreamOptions: openaiv3.ChatCompletionStreamOptionsParam{
+					IncludeUsage: openaiv3.Bool(true),
+				},
 			}
 		}
 	}
@@ -97,6 +100,9 @@ func getDefaultParamsV2(modelSlug string, toolRegistry *registry.ToolRegistryV2,
 		Tools:               toolRegistry.GetTools(), // Tool registration is managed centrally by the registry
 		ParallelToolCalls:   openaiv3.Bool(true),
 		Store:               openaiv3.Bool(false), // Must set to false, because we are construct our own chat history.
+		StreamOptions: openaiv3.ChatCompletionStreamOptionsParam{
+			IncludeUsage: openaiv3.Bool(true),
+		},
 	}
 }