dnsdist: implement own rate-limiting function - Part 1
dnsdist is a highly DNS-, DoS- and abuse-aware loadbalancer. Its goal in life is to route traffic to the best server, delivering top performance to legitimate users while shunting or blocking abusive traffic.
dnsdist has supported a basic rate-limiting but it wasn’t fit to my case.
Requirements:
- Limit by domain instead of client IP
- More customization: thresholds, window size, action,…
dnsdist is configured and controlled via a Lua based environment.
Very cool!! Let’s write a custom Lua code to implement own rate limiting function.
After searching and reading several articles, I found a blog from Github, it’s simple to implement.
Following Github blog I mentioned above, I will write a rate-limiting function using Fixed window algorithm and Redis (EVAL)
Because I need to limit requests by domain, it means I need to parse a qname to get domain, eg: testing.domain.com
=> domain.com
I thought I would write a parser function but I gave up this idea. And I decided to search =))). After several hours…, I found this library https://github.com/daurnimator/lua-psl
Dependencies
Install dependencies
$ apt-get install redis-server lua5.1 libpsl-dev
$ luarocks install redis-lua
$ luarocks install psl
Code
setLocal("127.0.0.1:53")
newServer({address="1.1.1.1", name="cloudflare"})
newServer({address="8.8.8.8", name="google"})
setServerPolicy(firstAvailable)
-- https://github.com/nrk/redis-lua
local redis = require "redis"
-- https://github.com/daurnimator/lua-psl
local psl = require "psl"
local psl_obj = psl.builtin()
-- redis connection param
local params = {
host = "127.0.0.1",
port = 6379,
-- auth = ""
}
-- enable logging
local enable_logging = true
-- default a threshold for a domain (QPS)
local default_threshold = 5000
-- default a action when a domain reaches `default_threshold`
-- is one of: drop, delay, refused, nxdomain, servfail
local default_action = "delay"
-- drop queries of a domain if reaches this threshold (QPS)
local threshold_to_drop = 20000
-- override threshold, window size and action for a domain
-- threshold: is a number (must be set)
-- window_size: window size to apply a limit to (defined in seconds). If unspecified then default is `1` (1 second)
-- action: is one of: drop, delay, refused, nxdomain, servfail. If unspecified then default is `delay`
local overrides = {
["domain1.com"] = {
threshold = 1,
window_size = 10,
action = "refused"
},
["domain2.com"] = {
threshold = 2,
window_size = 1,
action = "delay"
}
}
-- https://github.blog/2021-04-05-how-we-scaled-github-api-sharded-replicated-rate-limiter-redis/
local script = [[
local rate_limit_key = KEYS[1]
local increment_amount = tonumber(ARGV[1])
local next_expires_at = tonumber(ARGV[2])
local current_time = tonumber(ARGV[3])
local expires_at_key = rate_limit_key .. ":exp"
local expires_at = tonumber(redis.call("get", expires_at_key))
if not expires_at or expires_at < current_time then
redis.call("set", rate_limit_key, 0)
redis.call("set", expires_at_key, next_expires_at)
redis.call("expireat", rate_limit_key, next_expires_at + 1)
redis.call("expireat", expires_at_key, next_expires_at + 1)
expires_at = next_expires_at
end
local current = redis.call("incrby", rate_limit_key, increment_amount)
return { current, expires_at }
]]
local client = redis.connect(params)
if params["auth"] and params["auth"] ~= "" then
client:auth(params["auth"])
end
function rate_limiting(dq)
local qname = string.lower(dq.qname:toString())
-- remove trailing dot
-- must do it, unless `registrable_domain` will be fail
qname = qname:sub(1, -2)
-- get domain from qname
-- eg: test.domain.com -> domain.com
-- test.google.co.uk -> google.co.uk
-- return nil if can't parse
local domain = psl_obj:registrable_domain(qname)
-- if nil then return a response with a Refused rcode
if not domain then
if enable_logging then
errlog("can't get domain from qname: " .. qname .. ". client: " .. dq.remoteaddr:toString())
end
return DNSAction.Refused
end
local threshold = default_threshold
local action = default_action
local increment_amount = 1
local default_window_size = 1
local current_time = os.time()
local next_expires_at = current_time + default_window_size
local override = overrides[domain]
if override then
local window_size = override["window_size"]
if window_size then
next_expires_at = current_time + window_size
end
local override_threshold = override["threshold"]
if override_threshold then
threshold = override_threshold
end
local override_action = override["action"]
if override_action then
action = override_action
end
end
local rate = client:eval(script, 1, domain, increment_amount, next_expires_at, current_time)
-- if a domain have QPS > `threshold_to_drop` then Drop
if rate[1] > threshold_to_drop then
if enable_logging then
warnlog("domain " .. domain .. " has been reached threshold_to_drop: " .. threshold_to_drop .. ". client: " .. dq.remoteaddr:toString())
end
return DNSAction.Drop
end
if rate[1] > threshold then
if enable_logging then
warnlog("domain " .. domain .. " has been reached threshold: " .. threshold .. ". action: " .. action .. ". client: " .. dq.remoteaddr:toString())
end
if action == "drop" then
return DNSAction.Drop
elseif action == "refused" then
return DNSAction.Refused
elseif action == "nxdomain" then
return DNSAction.Nxdomain
elseif action == "servfail" then
return DNSAction.ServFail
else
return DNSAction.Delay
end
end
return DNSAction.None
end
addAction(AllRule(), LuaAction(rate_limiting))
-- dnsdist configuration file, an example can be found in /usr/share/doc/dnsdist/examples/
-- disable security status polling via DNS
setSecurityPollSuffix("")