Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions apisix/plugins/ai-aws-content-moderation.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require("resty.aws.config") -- to read env vars before initing aws module

local core = require("apisix.core")
local binding = require("apisix.plugins.ai-protocols.binding")
local protocols = require("apisix.plugins.ai-protocols")
local aws = require("resty.aws")
local aws_instance

Expand Down Expand Up @@ -107,9 +108,41 @@ function _M.rewrite(conf, ctx)
return
end

local body, err = core.request.get_body()
local body, err = core.request.get_json_request_body_table()
if not body then
return HTTP_BAD_REQUEST, err
local msg = type(err) == "table" and err.message or err
local handled, code, resp = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"failed to parse request body: " .. (msg or "invalid JSON"),
HTTP_BAD_REQUEST, err)
if handled then
return code, resp
end
return
end

-- The plugin runs before ai-proxy, so detect the client protocol here rather
-- than relying on ctx.ai_client_protocol. "passthrough" is the catch-all for
-- non-AI bodies, which carry no LLM content to moderate.
local protocol_name = protocols.detect(body, ctx)
local proto = protocol_name and protocols.get(protocol_name)
if not proto or protocol_name == "passthrough" or not proto.extract_request_content then
local handled, code, resp = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"no supported AI protocol for the request",
HTTP_BAD_REQUEST, "no supported AI protocol for the request")
if handled then
return code, resp
end
return
end

-- moderate the decoded LLM-visible content, not the raw JSON envelope
local contents = proto.extract_request_content(body)
local text = core.table.concat(contents, " ")
if text == "" then
-- no LLM-visible content to moderate
return
end

local comprehend = conf.comprehend
Expand Down Expand Up @@ -139,7 +172,7 @@ function _M.rewrite(conf, ctx)
local res, err = comprehend:detectToxicContent({
LanguageCode = "en",
TextSegments = {{
Text = body
Text = text
}},
})

Expand Down
2 changes: 1 addition & 1 deletion docs/en/latest/plugins/ai-aws-content-moderation.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import TabItem from '@theme/TabItem';

The `ai-aws-content-moderation` Plugin integrates with [AWS Comprehend](https://aws.amazon.com/comprehend/) to check request bodies for toxicity when proxying to LLMs, such as profanity, hate speech, insult, harassment, violence, and more, rejecting requests if the evaluated outcome exceeds the configured threshold.

This Plugin must be used in Routes that proxy requests to LLMs only.
This Plugin must be used in Routes that proxy requests to LLMs only. The Plugin parses the `application/json` request body and sends only the decoded LLM-visible content (for example `messages[].content`) to AWS Comprehend, rather than the raw request body. Requests that are not recognized AI requests (non-JSON bodies, or JSON that carries no LLM content) are handled according to `fail_mode`.

## Plugin Attributes

Expand Down
121 changes: 112 additions & 9 deletions t/plugin/ai-aws-content-moderation.t
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ passed
=== TEST 2: toxic request should fail
--- request
POST /echo
toxic
{"messages":[{"role":"user","content":"toxic"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds toxicity threshold
Expand All @@ -149,7 +151,9 @@ request body exceeds toxicity threshold
=== TEST 3: good request should pass
--- request
POST /echo
good_request
{"messages":[{"role":"user","content":"good_request"}]}
--- more_headers
Content-Type: application/json
--- error_code: 200


Expand Down Expand Up @@ -199,7 +203,9 @@ passed
=== TEST 5: profane request should fail
--- request
POST /echo
profane
{"messages":[{"role":"user","content":"profane"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds PROFANITY threshold
Expand All @@ -209,7 +215,9 @@ request body exceeds PROFANITY threshold
=== TEST 6: very profane request should also fail
--- request
POST /echo
very_profane
{"messages":[{"role":"user","content":"very_profane"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds PROFANITY threshold
Expand All @@ -219,7 +227,9 @@ request body exceeds PROFANITY threshold
=== TEST 7: good_request should pass
--- request
POST /echo
good_request
{"messages":[{"role":"user","content":"good_request"}]}
--- more_headers
Content-Type: application/json
--- error_code: 200


Expand Down Expand Up @@ -269,7 +279,9 @@ passed
=== TEST 9: profane request should pass profanity check but fail toxicity check
--- request
POST /echo
profane
{"messages":[{"role":"user","content":"profane"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds toxicity threshold
Expand All @@ -279,15 +291,19 @@ request body exceeds toxicity threshold
=== TEST 10: profane_but_not_toxic request should pass
--- request
POST /echo
profane_but_not_toxic
{"messages":[{"role":"user","content":"profane_but_not_toxic"}]}
--- more_headers
Content-Type: application/json
--- error_code: 200



=== TEST 11: but very profane request will fail
--- request
POST /echo
very_profane
{"messages":[{"role":"user","content":"very_profane"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds PROFANITY threshold
Expand All @@ -297,7 +313,9 @@ request body exceeds PROFANITY threshold
=== TEST 12: good_request should pass
--- request
POST /echo
good_request
{"messages":[{"role":"user","content":"good_request"}]}
--- more_headers
Content-Type: application/json
--- error_code: 200


Expand Down Expand Up @@ -402,3 +420,88 @@ Content-Type: multipart/form-data
--- error_code: 400
--- response_body eval
qr/only application\/json is supported/



=== TEST 17: only the decoded LLM content is moderated, not the raw JSON envelope
--- request
POST /echo
{"model":"gpt-4","messages":[{"role":"user","content":"toxic"}]}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body chomp
request body exceeds toxicity threshold



=== TEST 18: non-AI JSON body is rejected when fail_mode=error
--- request
POST /echo
{"foo":"bar"}
--- more_headers
Content-Type: application/json
--- error_code: 400
--- response_body eval
qr/no supported AI protocol for the request/



=== TEST 19: malformed JSON body is rejected when fail_mode=error
--- request
POST /echo
not-json
--- more_headers
Content-Type: application/json
--- error_code: 400



=== TEST 20: setup route with default fail_mode (skip)
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/echo",
"plugins": {
"ai-aws-content-moderation": {
"comprehend": {
"access_key_id": "access",
"secret_access_key": "ea+secret",
"region": "us-east-1",
"endpoint": "http://localhost:2668"
}
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"127.0.0.1:1980": 1
}
}
}]]
)

if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed



=== TEST 21: non-AI JSON body passes through unchecked when fail_mode=skip
--- request
POST /echo
{"foo":"bar"}
--- more_headers
Content-Type: application/json
--- error_code: 200
--- response_body chomp
{"foo":"bar"}
4 changes: 3 additions & 1 deletion t/plugin/ai-aws-content-moderation2.t
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ passed
=== TEST 2: request should fail
--- request
POST /echo
toxic
{"messages":[{"role":"user","content":"toxic"}]}
--- more_headers
Content-Type: application/json
--- error_code: 500
--- response_body chomp
Comprehend:detectToxicContent() failed to connect to 'http://localhost:2668': connection refused
Expand Down
Loading