diff --git a/.github/workflows/skywalking.yaml b/.github/workflows/skywalking.yaml index 58cac891d8c1..6b31fa840800 100644 --- a/.github/workflows/skywalking.yaml +++ b/.github/workflows/skywalking.yaml @@ -635,6 +635,8 @@ jobs: config: test/e2e-v2/cases/zipkin/banyandb/e2e.yaml - name: Virtual GenAI config: test/e2e-v2/cases/virtual-genai/e2e.yaml + - name: OTLP Virtual GenAI + config: test/e2e-v2/cases/otlp-virtual-genai/e2e.yaml - name: Nginx config: test/e2e-v2/cases/nginx/e2e.yaml diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 3526512d1c0f..d0927e3615df 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -176,6 +176,7 @@ precedence over the `service.name` fallback. * OTel log handler: prefer `service.instance.id` (OTel spec) over `service.instance` with fallback. * Add `SampleFamily.debugDump()` for MAL debugging. +* Support virtual GenAI analysis for otlp and zipkin traces. #### UI * Fix the missing icon in new native trace view. diff --git a/docs/en/setup/service-agent/virtual-genai.md b/docs/en/setup/service-agent/virtual-genai.md index 86b9d5376429..5952e1f95bfa 100644 --- a/docs/en/setup/service-agent/virtual-genai.md +++ b/docs/en/setup/service-agent/virtual-genai.md @@ -6,6 +6,13 @@ metrics of the GenAI operations are from the GenAI client-side perspective. For example, a Spring AI plugin in the Java agent could detect the latency of a chat completion request. As a result, SkyWalking would show traffic, latency, success rate, token usage (input/output), and estimated cost in the GenAI dashboard. +# Data Sources +Virtual GenAI metrics are derived from distributed tracing data. SkyWalking OAP can ingest and analyze trace data adhering to GenAI semantic conventions from the following sources: + +1. Native SkyWalking Traces via SkyWalking Java Agent +2. OpenTelemetry format trace +3. Zipkin format Traces + ## Span Contract The GenAI operation span should have the following properties: @@ -60,4 +67,19 @@ The following metrics are available at the **model** (service instance) level: - `gen_ai_model_total_estimated_cost / avg_estimated_cost` - Estimated cost ## Requirement -`SkyWalking Java Agent` version >= 9.7 \ No newline at end of file + +### Version +`SkyWalking Java Agent` version >= 9.7 + +### Semantic Conventions and Compatibility +The tag keys used in Virtual GenAI follow the **OpenTelemetry GenAI Semantic Conventions**. SkyWalking OAP identifies GenAI-related spans based on the following criteria depending on the data source: + +* **SkyWalking Native Agent**: Requires an **Exit** span with `SpanLayer == GENAI` and relevant `gen_ai.*` tags. +* **OTLP / Zipkin Traces**: Any span containing the `gen_ai.response.model` tag will be identified as a GenAI operation. + +**Note on OTLP / Zipkin Provider Identification**: +To ensure broad compatibility with different OpenTelemetry instrumentation versions, SkyWalking OAP identifies the GenAI provider using the following prioritized logic: + +1. **`gen_ai.provider.name`**: SkyWalking first looks for this tag (the latest OTel semantic convention). +2. **`gen_ai.system`**: If the above is missing, it falls back to this legacy tag for backward compatibility with older instrumentation (e.g., current OTel Python auto-instrumentation). +3. **Prefix Matching**: If neither tag is present, SkyWalking attempts to identify the provider by matching the model name against the `prefix-match` rules defined in the `gen-ai-config.yml`. \ No newline at end of file diff --git a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/VirtualServiceAnalysisListener.java b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/VirtualServiceAnalysisListener.java index 861f699ce830..1d48dd2a386b 100644 --- a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/VirtualServiceAnalysisListener.java +++ b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/VirtualServiceAnalysisListener.java @@ -95,7 +95,7 @@ public AnalysisListener create(ModuleManager moduleManager, AnalyzerModuleConfig new VirtualCacheProcessor(namingControl, config), new VirtualDatabaseProcessor(namingControl, config), new VirtualMQProcessor(namingControl), - new VirtualGenAIProcessor(namingControl, genAIMeterAnalyzerService) + new VirtualGenAIProcessor(genAIMeterAnalyzerService) ) ); } diff --git a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/vservice/VirtualGenAIProcessor.java b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/vservice/VirtualGenAIProcessor.java index 246e1e41c0ec..33955ce71b92 100644 --- a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/vservice/VirtualGenAIProcessor.java +++ b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/vservice/VirtualGenAIProcessor.java @@ -22,13 +22,7 @@ import org.apache.skywalking.apm.network.language.agent.v3.SpanLayer; import org.apache.skywalking.apm.network.language.agent.v3.SpanObject; import org.apache.skywalking.oap.analyzer.genai.service.IGenAIMeterAnalyzerService; -import org.apache.skywalking.oap.server.core.analysis.Layer; -import org.apache.skywalking.oap.server.core.config.NamingControl; import org.apache.skywalking.oap.server.core.source.GenAIMetrics; -import org.apache.skywalking.oap.server.core.source.GenAIModelAccess; -import org.apache.skywalking.oap.server.core.source.GenAIProviderAccess; -import org.apache.skywalking.oap.server.core.source.ServiceInstance; -import org.apache.skywalking.oap.server.core.source.ServiceMeta; import org.apache.skywalking.oap.server.core.source.Source; import java.util.ArrayList; @@ -38,8 +32,6 @@ @RequiredArgsConstructor public class VirtualGenAIProcessor implements VirtualServiceProcessor { - private final NamingControl namingControl; - private final IGenAIMeterAnalyzerService meterAnalyzerService; private final List recordList = new ArrayList<>(); @@ -55,53 +47,7 @@ public void prepareVSIfNecessary(SpanObject span, SegmentObject segmentObject) { return; } - recordList.add(toServiceMeta(metrics)); - recordList.add(toInstance(metrics)); - recordList.add(toProviderAccess(metrics)); - recordList.add(toModelAccess(metrics)); - } - - private ServiceMeta toServiceMeta(GenAIMetrics metrics) { - ServiceMeta service = new ServiceMeta(); - service.setName(namingControl.formatServiceName(metrics.getProviderName())); - service.setLayer(Layer.VIRTUAL_GENAI); - service.setTimeBucket(metrics.getTimeBucket()); - return service; - } - - private Source toInstance(GenAIMetrics metrics) { - ServiceInstance instance = new ServiceInstance(); - instance.setTimeBucket(metrics.getTimeBucket()); - instance.setName(namingControl.formatInstanceName(metrics.getModelName())); - instance.setServiceLayer(Layer.VIRTUAL_GENAI); - instance.setServiceName(metrics.getProviderName()); - return instance; - } - - private GenAIProviderAccess toProviderAccess(GenAIMetrics metrics) { - GenAIProviderAccess source = new GenAIProviderAccess(); - source.setName(namingControl.formatServiceName(metrics.getProviderName())); - source.setInputTokens(metrics.getInputTokens()); - source.setOutputTokens(metrics.getOutputTokens()); - source.setTotalEstimatedCost(metrics.getTotalEstimatedCost()); - source.setLatency(metrics.getLatency()); - source.setStatus(metrics.isStatus()); - source.setTimeBucket(metrics.getTimeBucket()); - return source; - } - - private GenAIModelAccess toModelAccess(GenAIMetrics metrics) { - GenAIModelAccess source = new GenAIModelAccess(); - source.setServiceName(namingControl.formatServiceName(metrics.getProviderName())); - source.setModelName(namingControl.formatInstanceName(metrics.getModelName())); - source.setInputTokens(metrics.getInputTokens()); - source.setOutputTokens(metrics.getOutputTokens()); - source.setTotalEstimatedCost(metrics.getTotalEstimatedCost()); - source.setTimeToFirstToken(metrics.getTimeToFirstToken()); - source.setLatency(metrics.getLatency()); - source.setStatus(metrics.isStatus()); - source.setTimeBucket(metrics.getTimeBucket()); - return source; + recordList.addAll(meterAnalyzerService.transferToSources(metrics)); } @Override diff --git a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/GenAIAnalyzerModuleProvider.java b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/GenAIAnalyzerModuleProvider.java index 4e9eeaf36934..e4ca69acfd19 100644 --- a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/GenAIAnalyzerModuleProvider.java +++ b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/GenAIAnalyzerModuleProvider.java @@ -26,6 +26,7 @@ import org.apache.skywalking.oap.analyzer.genai.service.GenAIMeterAnalyzer; import org.apache.skywalking.oap.analyzer.genai.service.IGenAIMeterAnalyzerService; import org.apache.skywalking.oap.server.core.CoreModule; +import org.apache.skywalking.oap.server.core.config.NamingControl; import org.apache.skywalking.oap.server.core.oal.rt.OALEngineLoaderService; import org.apache.skywalking.oap.server.library.module.ModuleConfig; import org.apache.skywalking.oap.server.library.module.ModuleDefine; @@ -37,6 +38,8 @@ public class GenAIAnalyzerModuleProvider extends ModuleProvider { private GenAIConfig config; + private GenAIMeterAnalyzer analyzer; + @Override public String name() { return "default"; @@ -67,10 +70,11 @@ public void prepare() throws ServiceNotProvidedException, ModuleStartException { GenAIConfigLoader loader = new GenAIConfigLoader(config); config = loader.loadConfig(); GenAIProviderPrefixMatcher matcher = GenAIProviderPrefixMatcher.build(); + + this.analyzer = new GenAIMeterAnalyzer(matcher); this.registerServiceImplementation( IGenAIMeterAnalyzerService.class, - new GenAIMeterAnalyzer(matcher) - ); + analyzer); } @Override @@ -79,6 +83,12 @@ public void start() throws ServiceNotProvidedException, ModuleStartException { .provider() .getService(OALEngineLoaderService.class) .load(GenAIOALDefine.INSTANCE); + + NamingControl namingControl = getManager().find(CoreModule.NAME) + .provider() + .getService(NamingControl.class); + + this.analyzer.setNamingControl(namingControl); } @Override @@ -88,7 +98,7 @@ public void notifyAfterCompleted() throws ServiceNotProvidedException, ModuleSta @Override public String[] requiredModules() { - return new String[] { + return new String[]{ CoreModule.NAME }; } diff --git a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/config/GenAITagKeys.java b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/config/GenAITagKeys.java index e98b49d55a15..3f0038c11623 100644 --- a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/config/GenAITagKeys.java +++ b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/config/GenAITagKeys.java @@ -25,4 +25,8 @@ public class GenAITagKeys { public static final String INPUT_TOKENS = "gen_ai.usage.input_tokens"; public static final String OUTPUT_TOKENS = "gen_ai.usage.output_tokens"; public static final String SERVER_TIME_TO_FIRST_TOKEN = "gen_ai.server.time_to_first_token"; + + public static final String ESTIMATED_COST = "gen_ai.estimated.cost"; + + public static final String SYSTEM_NAME = "gen_ai.system"; } diff --git a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/GenAIMeterAnalyzer.java b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/GenAIMeterAnalyzer.java index 41b13dd0652f..bd91be550707 100644 --- a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/GenAIMeterAnalyzer.java +++ b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/GenAIMeterAnalyzer.java @@ -17,6 +17,8 @@ package org.apache.skywalking.oap.analyzer.genai.service; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import lombok.extern.slf4j.Slf4j; import org.apache.skywalking.apm.network.common.v3.KeyStringValuePair; import org.apache.skywalking.apm.network.language.agent.v3.SegmentObject; @@ -27,9 +29,19 @@ import org.apache.skywalking.oap.server.core.analysis.IDManager; import org.apache.skywalking.oap.server.core.analysis.Layer; import org.apache.skywalking.oap.server.core.analysis.TimeBucket; +import org.apache.skywalking.oap.server.core.config.NamingControl; import org.apache.skywalking.oap.server.core.source.GenAIMetrics; +import org.apache.skywalking.oap.server.core.source.GenAIModelAccess; +import org.apache.skywalking.oap.server.core.source.GenAIProviderAccess; +import org.apache.skywalking.oap.server.core.source.ServiceInstance; +import org.apache.skywalking.oap.server.core.source.ServiceMeta; +import org.apache.skywalking.oap.server.core.source.Source; +import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinSpan; import org.apache.skywalking.oap.server.library.util.StringUtil; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import static java.util.stream.Collectors.toMap; @@ -39,10 +51,16 @@ public class GenAIMeterAnalyzer implements IGenAIMeterAnalyzerService { private final GenAIProviderPrefixMatcher matcher; + private NamingControl namingControl; + public GenAIMeterAnalyzer(GenAIProviderPrefixMatcher matcher) { this.matcher = matcher; } + public void setNamingControl(NamingControl namingControl) { + this.namingControl = namingControl; + } + @Override public GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segment) { Map tags = span.getTagsList().stream() @@ -61,6 +79,7 @@ public GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segm return null; } String provider = tags.get(GenAITagKeys.PROVIDER_NAME); + GenAIProviderPrefixMatcher.MatchResult matchResult = matcher.match(modelName); if (StringUtil.isBlank(provider)) { @@ -72,16 +91,7 @@ public GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segm long inputTokens = parseSafeLong(tags.get(GenAITagKeys.INPUT_TOKENS)); long outputTokens = parseSafeLong(tags.get(GenAITagKeys.OUTPUT_TOKENS)); - // calculate the total cost by the cost configs - double totalCost = 0.0D; - if (modelConfig != null) { - if (modelConfig.getInputEstimatedCostPerM() > 0) { - totalCost += inputTokens * modelConfig.getInputEstimatedCostPerM(); - } - if (modelConfig.getOutputEstimatedCostPerM() > 0) { - totalCost += outputTokens * modelConfig.getOutputEstimatedCostPerM(); - } - } + double totalCost = calculateTotalCost(modelConfig, inputTokens, outputTokens); GenAIMetrics metrics = new GenAIMetrics(); @@ -92,7 +102,7 @@ public GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segm metrics.setOutputTokens(outputTokens); metrics.setTimeToFirstToken(parseSafeInt(tags.get(GenAITagKeys.SERVER_TIME_TO_FIRST_TOKEN))); - metrics.setTotalEstimatedCost(Math.round(totalCost)); + metrics.setTotalEstimatedCost(totalCost); long latency = span.getEndTime() - span.getStartTime(); metrics.setLatency(latency); @@ -102,6 +112,62 @@ public GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segm return metrics; } + @Override + public GenAIMetrics extractMetricsFromZipkinSpan(ZipkinSpan zipkinSpan) { + JsonObject tags = zipkinSpan.getTags(); + JsonElement element = tags.get(GenAITagKeys.RESPONSE_MODEL); + if (element == null || StringUtil.isBlank(element.getAsString())) { + return null; + } + + String modelName = element.getAsString(); + String provider = getZipkinSpanTagValue(tags, GenAITagKeys.PROVIDER_NAME); + + if (StringUtil.isBlank(provider)) { + // Support legacy tags for OTLP or Zipkin traces. + provider = getZipkinSpanTagValue(tags, GenAITagKeys.SYSTEM_NAME); + } + + GenAIProviderPrefixMatcher.MatchResult matchResult = matcher.match(modelName); + if (StringUtil.isBlank(provider)) { + provider = matchResult.getProvider(); + } + + GenAIConfig.Model modelConfig = matchResult.getModelConfig(); + + long inputTokens = parseSafeLong(getZipkinSpanTagValue(tags, GenAITagKeys.INPUT_TOKENS)); + long outputTokens = parseSafeLong(getZipkinSpanTagValue(tags, GenAITagKeys.OUTPUT_TOKENS)); + + double totalCost = calculateTotalCost(modelConfig, inputTokens, outputTokens); + + GenAIMetrics metrics = new GenAIMetrics(); + metrics.setServiceId(IDManager.ServiceID.buildId(provider, Layer.VIRTUAL_GENAI.isNormal())); + metrics.setProviderName(provider); + metrics.setModelName(modelName); + metrics.setInputTokens(inputTokens); + metrics.setOutputTokens(outputTokens); + metrics.setTimeToFirstToken(parseSafeInt(getZipkinSpanTagValue(tags, GenAITagKeys.SERVER_TIME_TO_FIRST_TOKEN))); + metrics.setTotalEstimatedCost(totalCost); + metrics.setLatency(zipkinSpan.getDuration() / 1000); + metrics.setStatus(StringUtil.isBlank(getZipkinSpanTagValue(tags, "error"))); + metrics.setTimeBucket(TimeBucket.getMinuteTimeBucket(zipkinSpan.getTimestamp() / 1000)); + return metrics; + } + + @Override + public List transferToSources(GenAIMetrics metrics) { + if (metrics == null) { + return Collections.emptyList(); + } + + List sources = new ArrayList<>(); + sources.add(toVirtualGenAIServiceMeta(metrics)); + sources.add(toVirtualGenAIInstance(metrics)); + sources.add(toProviderAccess(metrics)); + sources.add(toModelAccess(metrics)); + return sources; + } + private long parseSafeLong(String value) { if (StringUtil.isEmpty(value)) { return 0; @@ -125,4 +191,66 @@ private int parseSafeInt(String value) { return 0; } } + + private String getZipkinSpanTagValue(JsonObject tags, String key) { + JsonElement element = tags.get(key); + return element != null ? element.getAsString() : null; + } + + private double calculateTotalCost(GenAIConfig.Model modelConfig, long inputTokens, long outputTokens) { + if (modelConfig == null) { + return 0.0D; + } + double cost = 0.0D; + if (modelConfig.getInputEstimatedCostPerM() > 0) { + cost += inputTokens * modelConfig.getInputEstimatedCostPerM(); + } + if (modelConfig.getOutputEstimatedCostPerM() > 0) { + cost += outputTokens * modelConfig.getOutputEstimatedCostPerM(); + } + return cost; + } + + private ServiceMeta toVirtualGenAIServiceMeta(GenAIMetrics metrics) { + ServiceMeta service = new ServiceMeta(); + service.setName(namingControl.formatServiceName(metrics.getProviderName())); + service.setLayer(Layer.VIRTUAL_GENAI); + service.setTimeBucket(metrics.getTimeBucket()); + return service; + } + + private Source toVirtualGenAIInstance(GenAIMetrics metrics) { + ServiceInstance instance = new ServiceInstance(); + instance.setTimeBucket(metrics.getTimeBucket()); + instance.setName(namingControl.formatInstanceName(metrics.getModelName())); + instance.setServiceLayer(Layer.VIRTUAL_GENAI); + instance.setServiceName(namingControl.formatServiceName(metrics.getProviderName())); + return instance; + } + + private GenAIProviderAccess toProviderAccess(GenAIMetrics metrics) { + GenAIProviderAccess source = new GenAIProviderAccess(); + source.setName(namingControl.formatServiceName(metrics.getProviderName())); + source.setInputTokens(metrics.getInputTokens()); + source.setOutputTokens(metrics.getOutputTokens()); + source.setTotalEstimatedCost(Math.round(metrics.getTotalEstimatedCost())); + source.setLatency(metrics.getLatency()); + source.setStatus(metrics.isStatus()); + source.setTimeBucket(metrics.getTimeBucket()); + return source; + } + + private GenAIModelAccess toModelAccess(GenAIMetrics metrics) { + GenAIModelAccess source = new GenAIModelAccess(); + source.setServiceName(namingControl.formatServiceName(metrics.getProviderName())); + source.setModelName(namingControl.formatInstanceName(metrics.getModelName())); + source.setInputTokens(metrics.getInputTokens()); + source.setOutputTokens(metrics.getOutputTokens()); + source.setTotalEstimatedCost(Math.round(metrics.getTotalEstimatedCost())); + source.setTimeToFirstToken(metrics.getTimeToFirstToken()); + source.setLatency(metrics.getLatency()); + source.setStatus(metrics.isStatus()); + source.setTimeBucket(metrics.getTimeBucket()); + return source; + } } diff --git a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/IGenAIMeterAnalyzerService.java b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/IGenAIMeterAnalyzerService.java index efbf2192b030..e29b3daa162f 100644 --- a/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/IGenAIMeterAnalyzerService.java +++ b/oap-server/analyzer/gen-ai-analyzer/src/main/java/org/apache/skywalking/oap/analyzer/genai/service/IGenAIMeterAnalyzerService.java @@ -21,10 +21,18 @@ import org.apache.skywalking.apm.network.language.agent.v3.SegmentObject; import org.apache.skywalking.apm.network.language.agent.v3.SpanObject; import org.apache.skywalking.oap.server.core.source.GenAIMetrics; +import org.apache.skywalking.oap.server.core.source.Source; +import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinSpan; import org.apache.skywalking.oap.server.library.module.Service; +import java.util.List; + public interface IGenAIMeterAnalyzerService extends Service { GenAIMetrics extractMetricsFromSWSpan(SpanObject span, SegmentObject segment); + GenAIMetrics extractMetricsFromZipkinSpan(ZipkinSpan zipkinSpan); + + List transferToSources(GenAIMetrics metrics); + } diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/source/GenAIMetrics.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/source/GenAIMetrics.java index 2ef4562143bb..831af21ad53c 100644 --- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/source/GenAIMetrics.java +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/source/GenAIMetrics.java @@ -35,10 +35,11 @@ public class GenAIMetrics { /** * The total estimated cost of GenAI model calls. - * This value is amplified by 10^6 (multiplied by 1,000,000) to be stored as a long - * and to avoid precision issues with double in SumMetrics. + * The unit is 1*10^-6 of the currency (e.g. micro-USD). + * * This is stored as a double to maintain precision during calculation and + * when passing the value to callers (e.g. for Zipkin tags). */ - private long totalEstimatedCost; + private double totalEstimatedCost; private int timeToFirstToken; diff --git a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/pom.xml b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/pom.xml index 9d0be4e30b87..a492a8e9dd06 100644 --- a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/pom.xml +++ b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/pom.xml @@ -37,5 +37,11 @@ org.apache.kafka kafka-clients + + + org.apache.skywalking + gen-ai-analyzer + ${project.version} + diff --git a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/ZipkinReceiverProvider.java b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/ZipkinReceiverProvider.java index 3878a6d7c0ce..cfe7bf7d9982 100644 --- a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/ZipkinReceiverProvider.java +++ b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/ZipkinReceiverProvider.java @@ -20,6 +20,8 @@ import com.linecorp.armeria.common.HttpMethod; import java.util.Arrays; + +import org.apache.skywalking.oap.analyzer.genai.module.GenAIAnalyzerModule; import org.apache.skywalking.oap.server.core.CoreModule; import org.apache.skywalking.oap.server.core.RunningMode; import org.apache.skywalking.oap.server.library.module.ModuleDefine; @@ -117,7 +119,8 @@ public void notifyAfterCompleted() throws ModuleStartException { public String[] requiredModules() { return new String[] { TelemetryModule.NAME, - CoreModule.NAME + CoreModule.NAME, + GenAIAnalyzerModule.NAME }; } } diff --git a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/trace/SpanForward.java b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/trace/SpanForward.java index 34145d85e508..0cdcdb784f7d 100644 --- a/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/trace/SpanForward.java +++ b/oap-server/server-receiver-plugin/zipkin-receiver-plugin/src/main/java/org/apache/skywalking/oap/server/receiver/zipkin/trace/SpanForward.java @@ -20,26 +20,24 @@ import com.google.common.util.concurrent.RateLimiter; import com.google.gson.JsonObject; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import java.util.Map; import lombok.extern.slf4j.Slf4j; +import org.apache.skywalking.oap.analyzer.genai.config.GenAITagKeys; +import org.apache.skywalking.oap.analyzer.genai.module.GenAIAnalyzerModule; +import org.apache.skywalking.oap.analyzer.genai.service.IGenAIMeterAnalyzerService; import org.apache.skywalking.oap.server.core.Const; import org.apache.skywalking.oap.server.core.CoreModule; +import org.apache.skywalking.oap.server.core.analysis.TimeBucket; import org.apache.skywalking.oap.server.core.analysis.manual.searchtag.Tag; import org.apache.skywalking.oap.server.core.analysis.manual.searchtag.TagType; +import org.apache.skywalking.oap.server.core.config.NamingControl; +import org.apache.skywalking.oap.server.core.source.GenAIMetrics; +import org.apache.skywalking.oap.server.core.source.SourceReceiver; import org.apache.skywalking.oap.server.core.source.TagAutocomplete; import org.apache.skywalking.oap.server.core.zipkin.ZipkinSpanRecord; import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinService; import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinServiceRelation; import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinServiceSpan; import org.apache.skywalking.oap.server.core.zipkin.source.ZipkinSpan; -import org.apache.skywalking.oap.server.core.analysis.TimeBucket; -import org.apache.skywalking.oap.server.core.config.NamingControl; -import org.apache.skywalking.oap.server.core.source.SourceReceiver; import org.apache.skywalking.oap.server.library.module.ModuleManager; import org.apache.skywalking.oap.server.library.util.CollectionUtils; import org.apache.skywalking.oap.server.library.util.StringUtil; @@ -49,8 +47,17 @@ import zipkin2.Span; import zipkin2.internal.HexCodec; +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + @Slf4j public class SpanForward implements SpanForwardService { + private final ZipkinReceiverConfig config; private final ModuleManager moduleManager; private final List searchTagKeys; @@ -59,6 +66,8 @@ public class SpanForward implements SpanForwardService { private SourceReceiver receiver; private RateLimiter rateLimiter; + private IGenAIMeterAnalyzerService genAIMeterAnalyzerService; + public SpanForward(final ZipkinReceiverConfig config, final ModuleManager manager) { this.config = config; this.moduleManager = manager; @@ -137,7 +146,7 @@ public List send(List spanList) { for (Map.Entry tag : span.tags().entrySet()) { String tagString = tag.getKey() + "=" + tag.getValue(); tagsJson.addProperty(tag.getKey(), tag.getValue()); - if (tag.getValue().length() > Tag.TAG_LENGTH || tagString.length() > Tag.TAG_LENGTH) { + if (tag.getValue().length() > Tag.TAG_LENGTH || tagString.length() > Tag.TAG_LENGTH) { if (log.isDebugEnabled()) { log.debug("Span tag : {} length > : {}, dropped", tagString, Tag.TAG_LENGTH); } @@ -152,6 +161,9 @@ public List send(List spanList) { } zipkinSpan.setTags(tagsJson); } + + processGenAILogic(zipkinSpan); + getReceiver().receive(zipkinSpan); toService(zipkinSpan, minuteTimeBucket); @@ -195,6 +207,28 @@ private void toServiceRelation(ZipkinSpan zipkinSpan, final long minuteTimeBucke getReceiver().receive(relation); } + private void processGenAILogic(ZipkinSpan zipkinSpan) { + GenAIMetrics metrics = getGenAIMeterAnalyzerService().extractMetricsFromZipkinSpan(zipkinSpan); + if (metrics == null) { + return; + } + + setEstimatedCost(zipkinSpan, metrics.getTotalEstimatedCost()); + + getGenAIMeterAnalyzerService().transferToSources(metrics) + .forEach(source -> getReceiver().receive(source)); + } + + private void setEstimatedCost(ZipkinSpan zipkinSpan, double totalEstimatedCost) { + if (totalEstimatedCost > 0) { + BigDecimal calculatedCost = BigDecimal.valueOf(totalEstimatedCost) + .divide(new BigDecimal("1000000"), 10, RoundingMode.HALF_UP); + JsonObject tags = zipkinSpan.getTags(); + tags.addProperty(GenAITagKeys.ESTIMATED_COST, calculatedCost.stripTrailingZeros().toPlainString()); + zipkinSpan.setTags(tags); + } + } + private List getSampledTraces(List input) { // 100% sampleRate and no rateLimiter, return all spans if (config.getSampleRate() == 10000 && rateLimiter == null) { @@ -206,13 +240,13 @@ private List getSampledTraces(List input) { sampledTraces.add(span); continue; } - + // Apply maximum spans per minute sampling first if (rateLimiter != null && !rateLimiter.tryAcquire()) { log.debug("Span dropped due to maximum spans per minute limit: {}", span.id()); continue; } - + // Apply percentage-based sampling if (config.getSampleRate() == 10000) { // 100% sample rate - include all spans that passed the maximum spans check @@ -241,4 +275,11 @@ private SourceReceiver getReceiver() { } return receiver; } + + private IGenAIMeterAnalyzerService getGenAIMeterAnalyzerService() { + if (genAIMeterAnalyzerService == null) { + genAIMeterAnalyzerService = moduleManager.find(GenAIAnalyzerModule.NAME).provider().getService(IGenAIMeterAnalyzerService.class); + } + return genAIMeterAnalyzerService; + } } diff --git a/test/e2e-v2/cases/otlp-virtual-genai/docker-compose.yml b/test/e2e-v2/cases/otlp-virtual-genai/docker-compose.yml new file mode 100644 index 000000000000..fd198ef5b5a1 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/docker-compose.yml @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3" + +services: + oap: + extends: + file: ../../script/docker-compose/base-compose.yml + service: oap + environment: + SW_QUERY_ZIPKIN: default + SW_RECEIVER_ZIPKIN: default + ports: + - "12800:12800" + - "11800:11800" + networks: + - e2e + + banyandb: + extends: + file: ../../script/docker-compose/base-compose.yml + service: banyandb + ports: + - 17912 + + provider: + extends: + file: ../../script/docker-compose/base-compose.yml + service: provider + ports: + - 9090 + depends_on: + oap: + condition: service_healthy + + provider-py: + build: + context: ../../script/ + dockerfile: dockerfile/Dockerfile-openai.python + networks: + - e2e + ports: + - "9091:9091" + environment: + OPENAI_BASE_URL: http://provider:9090/llm/otlp/v1 + OPENAI_API_KEY: xxxxxxxxxxxxxxxxxxxxxxxx + OTEL_TRACES_EXPORTER: otlp + OTEL_EXPORTER_OTLP_ENDPOINT: http://oap:11800 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + depends_on: + oap: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://127.0.0.1:9091/"] + interval: 5s + timeout: 10s + retries: 5 + command: ["opentelemetry-instrument", "python3", "/openai-call.py"] + +networks: + e2e: diff --git a/test/e2e-v2/cases/otlp-virtual-genai/e2e.yaml b/test/e2e-v2/cases/otlp-virtual-genai/e2e.yaml new file mode 100644 index 000000000000..0a14830ffb68 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/e2e.yaml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is used to show how to write configuration files and can be used to test. + +setup: + env: compose + file: docker-compose.yml + timeout: 20m + init-system-environment: ../../script/env + steps: + - name: set PATH + command: export PATH=/tmp/skywalking-infra-e2e/bin:$PATH + - name: install yq + command: bash test/e2e-v2/script/prepare/setup-e2e-shell/install.sh yq + - name: install swctl + command: bash test/e2e-v2/script/prepare/setup-e2e-shell/install.sh swctl + +trigger: + action: http + interval: 3s + times: 5 + url: http://localhost:9091/ + method: GET + +verify: + retry: + count: 60 + interval: 3s + cases: + - includes: + - ./otlp-virtual-genai.yaml + diff --git a/test/e2e-v2/cases/otlp-virtual-genai/expected/instance.yml b/test/e2e-v2/cases/otlp-virtual-genai/expected/instance.yml new file mode 100644 index 000000000000..f74704b2b875 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/expected/instance.yml @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- contains . }} +- id: {{ notEmpty .id }} + name: gpt-4.1-mini-2025-04-14 + instanceuuid: {{ notEmpty .instanceuuid }} + attributes: [] + language: UNKNOWN +{{- end }} diff --git a/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value-label.yml b/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value-label.yml new file mode 100644 index 000000000000..c983c0e19ba3 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value-label.yml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +debuggingtrace: null +type: TIME_SERIES_VALUES +results: + {{- contains .results }} + - metric: + labels: + {{- contains .metric.labels }} + - key: "p" + value: {{ notEmpty .value }} + {{- end}} + values: + {{- contains .values }} + - id: {{ notEmpty .id }} + value: {{ .value }} + traceid: null + owner: null + - id: {{ notEmpty .id }} + value: null + traceid: null + owner: null + {{- end}} + {{- end}} +error: null + diff --git a/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value.yml b/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value.yml new file mode 100644 index 000000000000..979b9b25775c --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/expected/metrics-has-value.yml @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +debuggingtrace: null +type: TIME_SERIES_VALUES +results: + {{- contains .results }} + - metric: + labels: [] + values: + {{- contains .values }} + - id: {{ notEmpty .id }} + value: {{ notEmpty .value }} + traceid: null + owner: null + - id: {{ notEmpty .id }} + value: null + traceid: null + owner: null + {{- end}} + {{- end}} +error: null diff --git a/test/e2e-v2/cases/otlp-virtual-genai/expected/service.yml b/test/e2e-v2/cases/otlp-virtual-genai/expected/service.yml new file mode 100644 index 000000000000..5471c7d596d8 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/expected/service.yml @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- contains . }} +- id: {{ b64enc "openai" }}.0 + name: openai + group: "" + shortname: openai + layers: + - VIRTUAL_GENAI + normal: false +{{- end }} diff --git a/test/e2e-v2/cases/otlp-virtual-genai/otlp-virtual-genai.yaml b/test/e2e-v2/cases/otlp-virtual-genai/otlp-virtual-genai.yaml new file mode 100644 index 000000000000..a3c1a998dd30 --- /dev/null +++ b/test/e2e-v2/cases/otlp-virtual-genai/otlp-virtual-genai.yaml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is used to show how to write configuration files and can be used to test. + +cases: + # service cases + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql service ls + expected: expected/service.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_resp_time --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_sla --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_cpm --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_latency_percentile --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value-label.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_input_tokens_sum --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_input_tokens_avg --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_output_tokens_sum --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_output_tokens_avg --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_total_estimated_cost --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_provider_avg_estimated_cost --service-id=b3BlbmFp.0 + expected: expected/metrics-has-value.yml + + # instance cases + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql instance ls --service-id=b3BlbmFp.0 + expected: expected/instance.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_call_cpm --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_sla --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_latency_avg --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_latency_percentile --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value-label.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_input_tokens_sum --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_input_tokens_avg --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_output_tokens_sum --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_output_tokens_avg --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_total_estimated_cost --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=gen_ai_model_avg_estimated_cost --service-id=b3BlbmFp.0 --instance-name=gpt-4.1-mini-2025-04-14 + expected: expected/metrics-has-value.yml diff --git a/test/e2e-v2/java-test-service/e2e-service-provider/src/main/java/org/apache/skywalking/e2e/controller/LLMMockController.java b/test/e2e-v2/java-test-service/e2e-service-provider/src/main/java/org/apache/skywalking/e2e/controller/LLMMockController.java index 1d29883ebdd7..1877ac691ad7 100644 --- a/test/e2e-v2/java-test-service/e2e-service-provider/src/main/java/org/apache/skywalking/e2e/controller/LLMMockController.java +++ b/test/e2e-v2/java-test-service/e2e-service-provider/src/main/java/org/apache/skywalking/e2e/controller/LLMMockController.java @@ -30,7 +30,7 @@ @RequestMapping("/llm") public class LLMMockController { @PostMapping("/v1/chat/completions") - public Object completions(HttpServletResponse response) throws Exception { + public Object swCompletions(HttpServletResponse response) throws Exception { response.setContentType("text/event-stream"); response.setCharacterEncoding("UTF-8"); @@ -102,4 +102,44 @@ private void writeStreamChunk(PrintWriter writer, String id, long created, Strin writer.write("data: " + cleanJson + "\n\n"); writer.flush(); } + + @PostMapping(value = "/otlp/v1/chat/completions", produces = "application/json") + public Object otlpCompletions() { + return "{" + + " \"id\": \"chatcmpl-DNhDGYUo62qPZjSFntcM7rlLXLCBr\"," + + " \"object\": \"chat.completion\"," + + " \"created\": 1774540026," + + " \"model\": \"gpt-4.1-mini-2025-04-14\"," + + " \"choices\": [" + + " {" + + " \"index\": 0," + + " \"message\": {" + + " \"role\": \"assistant\"," + + " \"content\": \"Sure! Here's a joke for you:\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything!\"," + + " \"refusal\": null," + + " \"annotations\": []" + + " }," + + " \"logprobs\": null," + + " \"finish_reason\": \"stop\"" + + " }" + + " ]," + + " \"usage\": {" + + " \"prompt_tokens\": 12," + + " \"completion_tokens\": 22," + + " \"total_tokens\": 34," + + " \"prompt_tokens_details\": {" + + " \"cached_tokens\": 0," + + " \"audio_tokens\": 0" + + " }," + + " \"completion_tokens_details\": {" + + " \"reasoning_tokens\": 0," + + " \"audio_tokens\": 0," + + " \"accepted_prediction_tokens\": 0," + + " \"rejected_prediction_tokens\": 0" + + " }" + + " }," + + " \"system_fingerprint\": \"fp_b6f445fc1c\"" + + "}"; + + } } diff --git a/test/e2e-v2/script/dockerfile/Dockerfile-openai.python b/test/e2e-v2/script/dockerfile/Dockerfile-openai.python new file mode 100644 index 000000000000..4416074005ff --- /dev/null +++ b/test/e2e-v2/script/dockerfile/Dockerfile-openai.python @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM python:3.10-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir \ + openai \ + opentelemetry-api \ + opentelemetry-sdk \ + opentelemetry-instrumentation-openai \ + opentelemetry-exporter-otlp-proto-grpc \ + opentelemetry-exporter-zipkin \ + opentelemetry-exporter-zipkin-json \ + opentelemetry-exporter-zipkin-proto-http \ + opentelemetry-distro + +RUN opentelemetry-bootstrap --action=install + +COPY python/openai-call.py /openai-call.py + diff --git a/test/e2e-v2/script/python/openai-call.py b/test/e2e-v2/script/python/openai-call.py new file mode 100644 index 000000000000..d7da374e9a11 --- /dev/null +++ b/test/e2e-v2/script/python/openai-call.py @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import socketserver +from http.server import BaseHTTPRequestHandler +from openai import OpenAI + +client = OpenAI( + timeout=120.0, +) + +class AIRequestHandler(BaseHTTPRequestHandler): + + def do_GET(self): + try: + response = client.chat.completions.create( + model="gpt-4.1-mini", + messages=[{"role": "user", "content": "Tell me a joke."}] + ) + content = response.choices[0].message.content + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(f'{{"response": "{content}"}}'.encode('utf-8')) + except Exception as e: + self.send_response(500) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(f'{{"error": "{str(e)}"}}'.encode('utf-8')) + + def do_POST(self): + self.do_GET() + +PORT = 9091 + +if __name__ == '__main__': + with socketserver.TCPServer(("", PORT), AIRequestHandler) as httpd: + print(f"serving at port {PORT}") + httpd.serve_forever() \ No newline at end of file