Skip to main content

Clickhouse module

The Clickhouse module pushes message-related metadata to ClickHouse, an open-source column-oriented database management system optimized for real-time analytics. Collected data includes sender/recipient information, scores, and metadata such as DKIM/DMARC/SPF/Bayes/Fuzzy status, URLs, and attachments.

This module enables you to build analytical dashboards using tools like Redash or Grafana.

Configuration

Settings go in /etc/rspamd/local.d/clickhouse.conf.

Basic configuration

# local.d/clickhouse.conf

# ClickHouse server address (required)
server = "localhost:8123";

# Connection timeout in seconds
timeout = 5;

# Periodic check timeout
check_timeout = 10.0;

# Database name
database = "default";

# Authentication (optional)
# user = "default";
# password = "secret";

# Use HTTPS
use_https = false;

# Disable SSL certificate verification
no_ssl_verify = false;

# Enable gzip compression for data transfer (enabled by default)
use_gzip = true;

Data collection limits

Controls when accumulated data is sent to ClickHouse:

# local.d/clickhouse.conf

limits {
# Send when this many rows accumulated (0 to disable)
max_rows = 1000;

# Send when memory usage exceeds this limit
max_memory = 50mb;

# Maximum time between sends
max_interval = 60s;
}

# Perform garbage collection after sending data
collect_garbage = false;

Note: The legacy limit option is deprecated; use limits.max_rows instead.

IP masking

# local.d/clickhouse.conf

# Bits to mask for IPv4 addresses (default: 19)
ipmask = 19;

# Bits to mask for IPv6 addresses (default: 48)
ipmask6 = 48;

Content options

# local.d/clickhouse.conf

# Store full URL paths (default: false, stores only hosts)
full_urls = false;

# Store message digest/hash
enable_digest = false;

# Store symbols data (Names, Scores, Options, Groups)
enable_symbols = false;

# Store data for local/controller scans
allow_local = false;

Subject storage

# local.d/clickhouse.conf

# Store email subject (default: false)
insert_subject = false;

# Obfuscate subject with hash
subject_privacy = false;

# Hash algorithm for obfuscation
subject_privacy_alg = "blake2";

# Prefix for obfuscated subjects
subject_privacy_prefix = "obf";

# Length of hash to use
subject_privacy_length = 16;

Symbol mapping

Configure which symbols map to the Is* columns:

# local.d/clickhouse.conf

# Bayes classification
bayes_spam_symbols = ["BAYES_SPAM"];
bayes_ham_symbols = ["BAYES_HAM"];

# Neural network classification
ann_symbols_spam = ["NEURAL_SPAM"];
ann_symbols_ham = ["NEURAL_HAM"];

# Fuzzy check
fuzzy_symbols = ["FUZZY_DENIED"];

# Whitelist (affects IsWhitelist column)
whitelist_symbols = ["WHITELIST_DKIM", "WHITELIST_SPF_DKIM", "WHITELIST_DMARC"];

# DKIM status
dkim_allow_symbols = ["R_DKIM_ALLOW"];
dkim_reject_symbols = ["R_DKIM_REJECT"];
dkim_dnsfail_symbols = ["R_DKIM_TEMPFAIL", "R_DKIM_PERMFAIL"];
dkim_na_symbols = ["R_DKIM_NA"];

# DMARC status
dmarc_allow_symbols = ["DMARC_POLICY_ALLOW"];
dmarc_reject_symbols = ["DMARC_POLICY_REJECT"];
dmarc_quarantine_symbols = ["DMARC_POLICY_QUARANTINE"];
dmarc_softfail_symbols = ["DMARC_POLICY_SOFTFAIL"];
dmarc_na_symbols = ["DMARC_NA"];

# SPF status
spf_allow_symbols = ["R_SPF_ALLOW"];
spf_reject_symbols = ["R_SPF_FAIL"];
spf_dnsfail_symbols = ["R_SPF_DNSFAIL", "R_SPF_PERMFAIL"];
spf_neutral_symbols = ["R_SPF_NEUTRAL", "R_SPF_SOFTFAIL"];
spf_na_symbols = ["R_SPF_NA"];

Filtering

# local.d/clickhouse.conf

# Symbols that prevent ClickHouse logging when present
stop_symbols = [];

# Map expressions to skip certain messages
# exceptions = {
# symbol_options = {
# selector = "check_symbol_options";
# map = "/etc/rspamd/clickhouse_exceptions.map";
# };
# };

Retention policy

Automatic cleanup of old data for GDPR compliance:

# local.d/clickhouse.conf

retention {
# Enable retention (default: false)
enable = true;

# Method: "drop" or "detach"
# See: https://clickhouse.com/docs/en/sql-reference/statements/alter/partition
method = "drop";

# Keep data for this many months
period_months = 3;

# How often to run cleanup
run_every = 7d;
}

Extra columns

Add custom columns using selectors (Rspamd 2.4+, ClickHouse 19.3+):

# local.d/clickhouse.conf

extra_columns = {
Mime_From = {
selector = "from('mime'):addr";
type = "String";
default_value = "";
comment = "MIME From address";
};
Mime_Rcpt = {
selector = "rcpts('mime'):addr";
type = "Array(String)";
};
};

Or as an array for strict ordering:

extra_columns = [
{
name = "Mime_From";
selector = "from('mime'):addr";
type = "String";
},
{
name = "Mime_Rcpt";
selector = "rcpts('mime'):addr";
type = "Array(String)";
}
];

Schema additions

Add custom SQL statements executed during schema setup:

# local.d/clickhouse.conf

schema_additions = [
"CREATE TABLE IF NOT EXISTS rspamd_custom (...) ENGINE = MergeTree() ...",
];

Custom rules

Define custom tables with their own schemas:

# local.d/clickhouse.conf

custom_rules {
my_rule {
schema = "CREATE TABLE IF NOT EXISTS my_table ...";
first_row = "return function() return 'INSERT INTO my_table ...' end";
get_row = "return function(task) return {...} end";
};
};

Database schema

The module creates a rspamd table with the following columns:

ColumnTypeDescription
DateDateDate (used for partitioning)
TSDateTimeTimestamp (UTC)
FromStringEnvelope sender domain
MimeFromStringMIME From domain
IPStringSender IP (masked)
HeloStringSMTP HELO hostname
ScoreFloat32Message score
NRcptUInt8Number of recipients
SizeUInt32Message size (bytes)
IsWhitelistEnum8Whitelist status
IsBayesEnum8Bayes classification
IsFuzzyEnum8Fuzzy check result
IsFannEnum8Neural network result
IsDkimEnum8DKIM status
IsDmarcEnum8DMARC status
IsSpfEnum8SPF status
NUrlsInt32Number of URLs
ActionEnum8Rspamd action
CustomActionStringCustom action name
SMTPRecipientsArray(String)Envelope recipients
MimeRecipientsArray(String)MIME recipients
MessageIdStringMessage-ID
ListIdStringList-Id header
SubjectStringSubject (if enabled)
Attachments.*ArraysAttachment info
Urls.*ArraysURL info
EmailsArray(String)Extracted emails
ASNUInt32AS number
CountryFixedString(2)Country code
Symbols.*ArraysSymbol info (if enabled)
Groups.*ArraysGroup scores (if enabled)
ScanTimeRealUInt32Scan time (ms)
AuthUserStringAuthenticated user
SettingsIdStringSettings profile

Query examples

Top spam sender domains

SELECT From, count() AS c
FROM rspamd
WHERE Date = today()
AND Action IN ('reject', 'add header')
GROUP BY From
ORDER BY c DESC
LIMIT 10

Failed DKIM by domain and IP

SELECT From, IP, count() AS c
FROM rspamd
WHERE Date = today() AND IsDkim = 'reject'
GROUP BY From, IP
ORDER BY c DESC
LIMIT 10

Top attachment types in spam

SELECT count() AS c, d
FROM rspamd
ARRAY JOIN Attachments.ContentType AS d
WHERE Date = today()
AND Action IN ('reject', 'add header')
GROUP BY d
ORDER BY c DESC
LIMIT 5

Mailing list statistics

SELECT ListId, IP, count() AS c
FROM rspamd
WHERE Date = today() AND ListId != ''
GROUP BY ListId, IP
ORDER BY c DESC
LIMIT 10

URL flags

Since version 2.8, URLs are stored with flags indicating their source. Use bit operations to filter:

BitFlagDescription
0PHISHEDURL is phished
1NUMERICURL has numeric IP
4HTML_DISPLAYEDFrom HTML display text
5TEXTFrom text part
6SUBJECTFrom subject
13HAS_PORTURL has explicit port
15NO_SCHEMAURL had no schema
18DISPLAY_URLDisplay URL
19IMAGEFrom img src
20QUERYExtracted from query string
21CONTENTFrom content (PDF)
22NO_TLDURL has no TLD

Filter out image and PDF URLs

SELECT
MessageId,
arrayMap(x -> x.1,
arrayFilter(x -> NOT bitTestAny(x.2, 19, 21),
arrayZip(Urls.Url, Urls.Flags))) AS regular_urls
FROM rspamd
WHERE Date = today()

Select only image URLs

SELECT Urls.Url
FROM rspamd
ARRAY JOIN Urls
WHERE Date = today() AND bitTest(Urls.Flags, 19)