diff --git a/apisix/plugins/ai-aws-content-moderation.lua b/apisix/plugins/ai-aws-content-moderation.lua index a3a1295c6870..8d8e3739e820 100644 --- a/apisix/plugins/ai-aws-content-moderation.lua +++ b/apisix/plugins/ai-aws-content-moderation.lua @@ -18,6 +18,7 @@ require("resty.aws.config") -- to read env vars before initing aws module local core = require("apisix.core") local binding = require("apisix.plugins.ai-protocols.binding") +local protocols = require("apisix.plugins.ai-protocols") local aws = require("resty.aws") local aws_instance @@ -107,9 +108,41 @@ function _M.rewrite(conf, ctx) return end - local body, err = core.request.get_body() + local body, err = core.request.get_json_request_body_table() if not body then - return HTTP_BAD_REQUEST, err + local msg = type(err) == "table" and err.message or err + local handled, code, resp = binding.on_unsupported( + conf.fail_mode, _M.name, ctx, + "failed to parse request body: " .. (msg or "invalid JSON"), + HTTP_BAD_REQUEST, err) + if handled then + return code, resp + end + return + end + + -- The plugin runs before ai-proxy, so detect the client protocol here rather + -- than relying on ctx.ai_client_protocol. "passthrough" is the catch-all for + -- non-AI bodies, which carry no LLM content to moderate. + local protocol_name = protocols.detect(body, ctx) + local proto = protocol_name and protocols.get(protocol_name) + if not proto or protocol_name == "passthrough" or not proto.extract_request_content then + local handled, code, resp = binding.on_unsupported( + conf.fail_mode, _M.name, ctx, + "no supported AI protocol for the request", + HTTP_BAD_REQUEST, "no supported AI protocol for the request") + if handled then + return code, resp + end + return + end + + -- moderate the decoded LLM-visible content, not the raw JSON envelope + local contents = proto.extract_request_content(body) + local text = core.table.concat(contents, " ") + if text == "" then + -- no LLM-visible content to moderate + return end local comprehend = conf.comprehend @@ -139,7 +172,7 @@ function _M.rewrite(conf, ctx) local res, err = comprehend:detectToxicContent({ LanguageCode = "en", TextSegments = {{ - Text = body + Text = text }}, }) diff --git a/docs/en/latest/plugins/ai-aws-content-moderation.md b/docs/en/latest/plugins/ai-aws-content-moderation.md index fce2755a3239..239475ff986c 100644 --- a/docs/en/latest/plugins/ai-aws-content-moderation.md +++ b/docs/en/latest/plugins/ai-aws-content-moderation.md @@ -38,7 +38,7 @@ import TabItem from '@theme/TabItem'; The `ai-aws-content-moderation` Plugin integrates with [AWS Comprehend](https://aws.amazon.com/comprehend/) to check request bodies for toxicity when proxying to LLMs, such as profanity, hate speech, insult, harassment, violence, and more, rejecting requests if the evaluated outcome exceeds the configured threshold. -This Plugin must be used in Routes that proxy requests to LLMs only. +This Plugin must be used in Routes that proxy requests to LLMs only. The Plugin parses the `application/json` request body and sends only the decoded LLM-visible content (for example `messages[].content`) to AWS Comprehend, rather than the raw request body. Requests that are not recognized AI requests (non-JSON bodies, or JSON that carries no LLM content) are handled according to `fail_mode`. ## Plugin Attributes diff --git a/t/plugin/ai-aws-content-moderation.t b/t/plugin/ai-aws-content-moderation.t index 765bba1ab567..0bdd8ebbbfe9 100644 --- a/t/plugin/ai-aws-content-moderation.t +++ b/t/plugin/ai-aws-content-moderation.t @@ -139,7 +139,9 @@ passed === TEST 2: toxic request should fail --- request POST /echo -toxic +{"messages":[{"role":"user","content":"toxic"}]} +--- more_headers +Content-Type: application/json --- error_code: 400 --- response_body chomp request body exceeds toxicity threshold @@ -149,7 +151,9 @@ request body exceeds toxicity threshold === TEST 3: good request should pass --- request POST /echo -good_request +{"messages":[{"role":"user","content":"good_request"}]} +--- more_headers +Content-Type: application/json --- error_code: 200 @@ -199,7 +203,9 @@ passed === TEST 5: profane request should fail --- request POST /echo -profane +{"messages":[{"role":"user","content":"profane"}]} +--- more_headers +Content-Type: application/json --- error_code: 400 --- response_body chomp request body exceeds PROFANITY threshold @@ -209,7 +215,9 @@ request body exceeds PROFANITY threshold === TEST 6: very profane request should also fail --- request POST /echo -very_profane +{"messages":[{"role":"user","content":"very_profane"}]} +--- more_headers +Content-Type: application/json --- error_code: 400 --- response_body chomp request body exceeds PROFANITY threshold @@ -219,7 +227,9 @@ request body exceeds PROFANITY threshold === TEST 7: good_request should pass --- request POST /echo -good_request +{"messages":[{"role":"user","content":"good_request"}]} +--- more_headers +Content-Type: application/json --- error_code: 200 @@ -269,7 +279,9 @@ passed === TEST 9: profane request should pass profanity check but fail toxicity check --- request POST /echo -profane +{"messages":[{"role":"user","content":"profane"}]} +--- more_headers +Content-Type: application/json --- error_code: 400 --- response_body chomp request body exceeds toxicity threshold @@ -279,7 +291,9 @@ request body exceeds toxicity threshold === TEST 10: profane_but_not_toxic request should pass --- request POST /echo -profane_but_not_toxic +{"messages":[{"role":"user","content":"profane_but_not_toxic"}]} +--- more_headers +Content-Type: application/json --- error_code: 200 @@ -287,7 +301,9 @@ profane_but_not_toxic === TEST 11: but very profane request will fail --- request POST /echo -very_profane +{"messages":[{"role":"user","content":"very_profane"}]} +--- more_headers +Content-Type: application/json --- error_code: 400 --- response_body chomp request body exceeds PROFANITY threshold @@ -297,7 +313,9 @@ request body exceeds PROFANITY threshold === TEST 12: good_request should pass --- request POST /echo -good_request +{"messages":[{"role":"user","content":"good_request"}]} +--- more_headers +Content-Type: application/json --- error_code: 200 @@ -402,3 +420,88 @@ Content-Type: multipart/form-data --- error_code: 400 --- response_body eval qr/only application\/json is supported/ + + + +=== TEST 17: only the decoded LLM content is moderated, not the raw JSON envelope +--- request +POST /echo +{"model":"gpt-4","messages":[{"role":"user","content":"toxic"}]} +--- more_headers +Content-Type: application/json +--- error_code: 400 +--- response_body chomp +request body exceeds toxicity threshold + + + +=== TEST 18: non-AI JSON body is rejected when fail_mode=error +--- request +POST /echo +{"foo":"bar"} +--- more_headers +Content-Type: application/json +--- error_code: 400 +--- response_body eval +qr/no supported AI protocol for the request/ + + + +=== TEST 19: malformed JSON body is rejected when fail_mode=error +--- request +POST /echo +not-json +--- more_headers +Content-Type: application/json +--- error_code: 400 + + + +=== TEST 20: setup route with default fail_mode (skip) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/echo", + "plugins": { + "ai-aws-content-moderation": { + "comprehend": { + "access_key_id": "access", + "secret_access_key": "ea+secret", + "region": "us-east-1", + "endpoint": "http://localhost:2668" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 21: non-AI JSON body passes through unchecked when fail_mode=skip +--- request +POST /echo +{"foo":"bar"} +--- more_headers +Content-Type: application/json +--- error_code: 200 +--- response_body chomp +{"foo":"bar"} diff --git a/t/plugin/ai-aws-content-moderation2.t b/t/plugin/ai-aws-content-moderation2.t index 869fcf09d124..fbc4db583db1 100644 --- a/t/plugin/ai-aws-content-moderation2.t +++ b/t/plugin/ai-aws-content-moderation2.t @@ -84,7 +84,9 @@ passed === TEST 2: request should fail --- request POST /echo -toxic +{"messages":[{"role":"user","content":"toxic"}]} +--- more_headers +Content-Type: application/json --- error_code: 500 --- response_body chomp Comprehend:detectToxicContent() failed to connect to 'http://localhost:2668': connection refused