1
0
Fork 0
mirror of https://github.com/processone/ejabberd synced 2025-10-03 09:49:18 +02:00

Merge pull request #4373 from sstrigler/mod_antispam

mod_antispam: port from ejabberd-contrib/mod_spam_filter
This commit is contained in:
badlop 2025-07-08 10:19:34 +02:00 committed by GitHub
commit 9e4a6d09df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 2152 additions and 8 deletions

View file

@ -661,6 +661,17 @@ test:
@cd priv && ln -sf ../sql @cd priv && ln -sf ../sql
$(REBAR) $(SKIPDEPS) ct $(REBAR) $(SKIPDEPS) ct
.PHONY: test-%
define test-group-target
test-$1:
$(REBAR) $(SKIPDEPS) ct --suite=test/ejabberd_SUITE --group=$1
endef
ifneq ($(filter test-%,$(MAKECMDGOALS)),)
group_to_test := $(patsubst test-%,%,$(filter test-%,$(MAKECMDGOALS)))
$(eval $(call test-group-target,$(group_to_test)))
endif
test-eunit: test-eunit:
$(REBAR) $(SKIPDEPS) eunit --verbose $(REBAR) $(SKIPDEPS) eunit --verbose
@ -711,6 +722,7 @@ help:
@echo " hooks Run hooks validator" @echo " hooks Run hooks validator"
@echo " test Run Common Tests suite [rebar3]" @echo " test Run Common Tests suite [rebar3]"
@echo " test-eunit Run EUnit suite [rebar3]" @echo " test-eunit Run EUnit suite [rebar3]"
@echo " test-<group> Run Common Test suite for specific group only [rebar3]"
@echo " xref Run cross reference analysis [rebar3]" @echo " xref Run cross reference analysis [rebar3]"
#. #.

36
include/mod_antispam.hrl Normal file
View file

@ -0,0 +1,36 @@
%%%----------------------------------------------------------------------
%%%
%%% ejabberd, Copyright (C) 2002-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
-define(MODULE_ANTISPAM, mod_antispam).
-type url() :: binary().
-type filename() :: binary() | none | false.
-type jid_set() :: sets:set(ljid()).
-type url_set() :: sets:set(url()).
-define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>).
-record(rtbl_service,
{host = none :: binary() | none,
node = ?DEFAULT_RTBL_DOMAINS_NODE :: binary(),
subscribed = false :: boolean(),
retry_timer = undefined :: reference() | undefined}).
-type rtbl_service() :: #rtbl_service{}.

View file

@ -109,6 +109,7 @@ prep_stop(State) ->
ejabberd_service:stop(), ejabberd_service:stop(),
ejabberd_s2s:stop(), ejabberd_s2s:stop(),
ejabberd_system_monitor:stop(), ejabberd_system_monitor:stop(),
gen_mod:prep_stop(),
gen_mod:stop(), gen_mod:stop(),
State. State.

View file

@ -341,7 +341,12 @@ may_hide_data(Data) ->
-spec env_binary_to_list(atom(), atom()) -> {ok, any()} | undefined. -spec env_binary_to_list(atom(), atom()) -> {ok, any()} | undefined.
env_binary_to_list(Application, Parameter) -> env_binary_to_list(Application, Parameter) ->
%% Application need to be loaded to allow setting parameters %% Application need to be loaded to allow setting parameters
application:load(Application), case proplists:is_defined(Application, application:loaded_applications()) of
true ->
ok;
false ->
application:load(Application)
end,
case application:get_env(Application, Parameter) of case application:get_env(Application, Parameter) of
{ok, Val} when is_binary(Val) -> {ok, Val} when is_binary(Val) ->
BVal = binary_to_list(Val), BVal = binary_to_list(Val),
@ -505,8 +510,16 @@ get_predefined_keywords(Host) ->
[{<<"HOST">>, Host}] [{<<"HOST">>, Host}]
end, end,
Home = misc:get_home(), Home = misc:get_home(),
ConfigDirPath =
iolist_to_binary(filename:dirname(
ejabberd_config:path())),
LogDirPath =
iolist_to_binary(filename:dirname(
ejabberd_logger:get_log_path())),
HostList HostList
++ [{<<"HOME">>, list_to_binary(Home)}, ++ [{<<"HOME">>, list_to_binary(Home)},
{<<"CONFIG_PATH">>, ConfigDirPath},
{<<"LOG_PATH">>, LogDirPath},
{<<"SEMVER">>, ejabberd_option:version()}, {<<"SEMVER">>, ejabberd_option:version()},
{<<"VERSION">>, {<<"VERSION">>,
misc:semver_to_xxyy( misc:semver_to_xxyy(

View file

@ -27,7 +27,7 @@
-author('alexey@process-one.net'). -author('alexey@process-one.net').
-export([init/1, start_link/0, start_child/3, start_child/4, -export([init/1, start_link/0, start_child/3, start_child/4,
stop_child/1, stop_child/2, stop/0, config_reloaded/0]). stop_child/1, stop_child/2, prep_stop/0, stop/0, config_reloaded/0]).
-export([start_module/2, stop_module/2, stop_module_keep_config/2, -export([start_module/2, stop_module/2, stop_module_keep_config/2,
get_opt/2, set_opt/3, get_opt_hosts/1, is_equal_opt/3, get_opt/2, set_opt/3, get_opt_hosts/1, is_equal_opt/3,
get_module_opt/3, get_module_opts/2, get_module_opt_hosts/2, get_module_opt/3, get_module_opts/2, get_module_opt_hosts/2,
@ -76,6 +76,7 @@
-callback start(binary(), opts()) -> -callback start(binary(), opts()) ->
ok | {ok, pid()} | ok | {ok, pid()} |
{ok, [registration()]} | {error, term()}. {ok, [registration()]} | {error, term()}.
-callback prep_stop(binary()) -> any().
-callback stop(binary()) -> any(). -callback stop(binary()) -> any().
-callback reload(binary(), opts(), opts()) -> ok | {ok, pid()} | {error, term()}. -callback reload(binary(), opts(), opts()) -> ok | {ok, pid()} | {error, term()}.
-callback mod_opt_type(atom()) -> econf:validator(). -callback mod_opt_type(atom()) -> econf:validator().
@ -86,7 +87,7 @@
example => [string()] | [{binary(), [string()]}]}. example => [string()] | [{binary(), [string()]}]}.
-callback depends(binary(), opts()) -> [{module(), hard | soft}]. -callback depends(binary(), opts()) -> [{module(), hard | soft}].
-optional_callbacks([mod_opt_type/1, reload/3]). -optional_callbacks([mod_opt_type/1, reload/3, prep_stop/1]).
-export_type([opts/0]). -export_type([opts/0]).
-export_type([db_type/0]). -export_type([db_type/0]).
@ -114,6 +115,10 @@ init([]) ->
{read_concurrency, true}]), {read_concurrency, true}]),
{ok, {{one_for_one, 10, 1}, []}}. {ok, {{one_for_one, 10, 1}, []}}.
-spec prep_stop() -> ok.
prep_stop() ->
prep_stop_modules().
-spec stop() -> ok. -spec stop() -> ok.
stop() -> stop() ->
ejabberd_hooks:delete(config_reloaded, ?MODULE, config_reloaded, 60), ejabberd_hooks:delete(config_reloaded, ?MODULE, config_reloaded, 60),
@ -301,6 +306,21 @@ is_app_running(AppName) ->
lists:keymember(AppName, 1, lists:keymember(AppName, 1,
application:which_applications(Timeout)). application:which_applications(Timeout)).
-spec prep_stop_modules() -> ok.
prep_stop_modules() ->
lists:foreach(
fun(Host) ->
prep_stop_modules(Host)
end, ejabberd_option:hosts()).
-spec prep_stop_modules(binary()) -> ok.
prep_stop_modules(Host) ->
Modules = lists:reverse(loaded_modules_with_opts(Host)),
lists:foreach(
fun({Module, _Args}) ->
prep_stop_module_keep_config(Host, Module)
end, Modules).
-spec stop_modules() -> ok. -spec stop_modules() -> ok.
stop_modules() -> stop_modules() ->
lists:foreach( lists:foreach(
@ -320,6 +340,22 @@ stop_modules(Host) ->
stop_module(Host, Module) -> stop_module(Host, Module) ->
stop_module_keep_config(Host, Module). stop_module_keep_config(Host, Module).
-spec prep_stop_module_keep_config(binary(), atom()) -> error | ok.
prep_stop_module_keep_config(Host, Module) ->
?DEBUG("Preparing to stop ~ts at ~ts", [Module, Host]),
try Module:prep_stop(Host) of
_ ->
ok
catch ?EX_RULE(error, undef, _St) ->
ok;
?EX_RULE(Class, Reason, St) ->
StackTrace = ?EX_STACK(St),
?ERROR_MSG("Failed to prepare stop module ~ts at ~ts:~n** ~ts",
[Module, Host,
misc:format_exception(2, Class, Reason, StackTrace)]),
error
end.
-spec stop_module_keep_config(binary(), atom()) -> error | ok. -spec stop_module_keep_config(binary(), atom()) -> error | ok.
stop_module_keep_config(Host, Module) -> stop_module_keep_config(Host, Module) ->
?DEBUG("Stopping ~ts at ~ts", [Module, Host]), ?DEBUG("Stopping ~ts at ~ts", [Module, Host]),

893
src/mod_antispam.erl Normal file
View file

@ -0,0 +1,893 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam.erl
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Filter spam messages based on sender JID and content
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
%%| definitions
-module(mod_antispam).
-author('holger@zedat.fu-berlin.de').
-author('stefan@strigler.de').
-behaviour(gen_server).
-behaviour(gen_mod).
%% gen_mod callbacks.
-export([start/2,
prep_stop/1,
stop/1,
reload/3,
depends/2,
mod_doc/0,
mod_opt_type/1,
mod_options/1]).
%% gen_server callbacks.
-export([init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3]).
-export([get_rtbl_services_option/1]).
%% ejabberd_commands callbacks.
-export([add_blocked_domain/2,
add_to_spam_filter_cache/2,
drop_from_spam_filter_cache/2,
expire_spam_filter_cache/2,
get_blocked_domains/1,
get_commands_spec/0,
get_spam_filter_cache/1,
reload_spam_filter_files/1,
remove_blocked_domain/2]).
-include("ejabberd_commands.hrl").
-include("logger.hrl").
-include("mod_antispam.hrl").
-include("translate.hrl").
-include_lib("xmpp/include/xmpp.hrl").
-record(state,
{host = <<>> :: binary(),
dump_fd = undefined :: file:io_device() | undefined,
url_set = sets:new() :: url_set(),
jid_set = sets:new() :: jid_set(),
jid_cache = #{} :: map(),
max_cache_size = 0 :: non_neg_integer() | unlimited,
rtbl_host = none :: binary() | none,
rtbl_subscribed = false :: boolean(),
rtbl_retry_timer = undefined :: reference() | undefined,
rtbl_domains_node :: binary(),
blocked_domains = #{} :: #{binary() => any()},
whitelist_domains = #{} :: #{binary() => false}
}).
-type state() :: #state{}.
-define(COMMAND_TIMEOUT, timer:seconds(30)).
-define(DEFAULT_CACHE_SIZE, 10000).
%% @format-begin
%%--------------------------------------------------------------------
%%| gen_mod callbacks
-spec start(binary(), gen_mod:opts()) -> ok | {error, any()}.
start(Host, Opts) ->
case gen_mod:is_loaded_elsewhere(Host, ?MODULE) of
false ->
ejabberd_commands:register_commands(?MODULE, get_commands_spec());
true ->
ok
end,
gen_mod:start_child(?MODULE, Host, Opts).
-spec prep_stop(binary()) -> ok | {error, any()}.
prep_stop(Host) ->
case try_call_by_host(Host, prepare_stop) of
ready_to_stop ->
ok
end.
-spec stop(binary()) -> ok | {error, any()}.
stop(Host) ->
case gen_mod:is_loaded_elsewhere(Host, ?MODULE) of
false ->
ejabberd_commands:unregister_commands(get_commands_spec());
true ->
ok
end,
gen_mod:stop_child(?MODULE, Host).
-spec reload(binary(), gen_mod:opts(), gen_mod:opts()) -> ok.
reload(Host, NewOpts, OldOpts) ->
?DEBUG("reloading", []),
Proc = get_proc_name(Host),
gen_server:cast(Proc, {reload, NewOpts, OldOpts}).
-spec depends(binary(), gen_mod:opts()) -> [{module(), hard | soft}].
depends(_Host, _Opts) ->
[{mod_pubsub, soft}].
-spec mod_opt_type(atom()) -> econf:validator().
mod_opt_type(access_spam) ->
econf:acl();
mod_opt_type(cache_size) ->
econf:pos_int(unlimited);
mod_opt_type(rtbl_services) ->
econf:list(
econf:either(
econf:binary(),
econf:map(
econf:binary(),
econf:map(
econf:enum([spam_source_domains_node]), econf:binary()))));
mod_opt_type(spam_domains_file) ->
econf:either(
econf:enum([none]), econf:file());
mod_opt_type(spam_dump_file) ->
econf:either(
econf:bool(), econf:file(write));
mod_opt_type(spam_jids_file) ->
econf:either(
econf:enum([none]), econf:file());
mod_opt_type(spam_urls_file) ->
econf:either(
econf:enum([none]), econf:file());
mod_opt_type(whitelist_domains_file) ->
econf:either(
econf:enum([none]), econf:file()).
-spec mod_options(binary()) -> [{rtbl_services, [tuple()]} | {atom(), any()}].
mod_options(_Host) ->
[{access_spam, none},
{cache_size, ?DEFAULT_CACHE_SIZE},
{rtbl_services, []},
{spam_domains_file, none},
{spam_dump_file, false},
{spam_jids_file, none},
{spam_urls_file, none},
{whitelist_domains_file, none}].
mod_doc() ->
#{desc =>
?T("Filter spam messages and subscription requests received from "
"remote servers based on "
"https://xmppbl.org/[Real-Time Block Lists (RTBL)], "
"lists of known spammer JIDs and/or URLs mentioned in spam messages. "
"Traffic classified as spam is rejected with an error "
"(and an '[info]' message is logged) unless the sender "
"is subscribed to the recipient's presence."),
note => "added in 25.xx",
opts =>
[{access_spam,
#{value => ?T("Access"),
desc =>
?T("Access rule that controls what accounts may receive spam messages. "
"If the rule returns 'allow' for a given recipient, "
"spam messages aren't rejected for that recipient. "
"The default value is 'none', which means that all recipients "
"are subject to spam filtering verification.")}},
{cache_size,
#{value => "pos_integer()",
desc =>
?T("Maximum number of JIDs that will be cached due to sending spam URLs. "
"If that limit is exceeded, the least recently used "
"entries are removed from the cache. "
"Setting this option to '0' disables the caching feature. "
"Note that separate caches are used for each virtual host, "
" and that the caches aren't distributed across cluster nodes. "
"The default value is '10000'.")}},
{rtbl_services,
#{value => ?T("[Service]"),
example =>
["rtbl_services:",
" - pubsub.server1.localhost:",
" spam_source_domains_node: actual_custom_pubsub_node"],
desc =>
?T("Query a RTBL service to get domains to block, as provided by "
"https://xmppbl.org/[xmppbl.org]. "
"Please note right now this option only supports one service in that list. "
"For blocking spam and abuse on MUC channels, please use _`mod_muc_rtbl`_ for now. "
"If only the host is provided, the default node names will be assumed. "
"If the node name is different than 'spam_source_domains', "
"you can setup the custom node name with the option 'spam_source_domains_node'. "
"The default value is an empty list of services.")}},
{spam_domains_file,
#{value => ?T("none | Path"),
desc =>
?T("Path to a plain text file containing a list of "
"known spam domains, one domain per line. "
"Messages and subscription requests sent from one of the listed domains "
"are classified as spam if sender is not in recipient's roster. "
"This list of domains gets merged with the one retrieved "
"by an RTBL host if any given. "
"The default value is 'none'.")}},
{spam_dump_file,
#{value => ?T("false | true | Path"),
desc =>
?T("Path to the file to store blocked messages. "
"Use an absolute path, or the '@LOG_PATH@' "
"https://docs.ejabberd.im/admin/configuration/file-format/#predefined-keywords[predefined keyword] "
"to store logs "
"in the same place that the other ejabberd log files. "
"If set to 'false', it doesn't dump stanzas, which is the default. "
"If set to 'true', it stores in '\"@LOG_PATH@/spam_dump_@HOST@.log\"'.")}},
{spam_jids_file,
#{value => ?T("none | Path"),
desc =>
?T("Path to a plain text file containing a list of "
"known spammer JIDs, one JID per line. "
"Messages and subscription requests sent from one of "
"the listed JIDs are classified as spam. "
"Messages containing at least one of the listed JIDs"
"are classified as spam as well. "
"Furthermore, the sender's JID will be cached, "
"so that future traffic originating from that JID will also be classified as spam. "
"The default value is 'none'.")}},
{spam_urls_file,
#{value => ?T("none | Path"),
desc =>
?T("Path to a plain text file containing a list of "
"URLs known to be mentioned in spam message bodies. "
"Messages containing at least one of the listed URLs are classified as spam. "
"Furthermore, the sender's JID will be cached, "
"so that future traffic originating from that JID will be classified as spam as well. "
"The default value is 'none'.")}},
{whitelist_domains_file,
#{value => ?T("none | Path"),
desc =>
?T("Path to a file containing a list of "
"domains to whitelist from being blocked, one per line. "
"If either it is in 'spam_domains_file' or more realistically "
"in a domain sent by a RTBL host (see option 'rtbl_services') "
"then this domain will be ignored and stanzas from there won't be blocked. "
"The default value is 'none'.")}}],
example =>
["modules:",
" mod_antispam:",
" rtbl_services:",
" - xmppbl.org",
" spam_jids_file: \"@CONFIG_PATH@/spam_jids.txt\"",
" spam_dump_file: \"@LOG_PATH@/spam/host-@HOST@.log\""]}.
%%--------------------------------------------------------------------
%%| gen_server callbacks
-spec init(list()) -> {ok, state()} | {stop, term()}.
init([Host, Opts]) ->
process_flag(trap_exit, true),
mod_antispam_files:init_files(Host),
FilesResults = read_files(Host),
#{jid := JIDsSet,
url := URLsSet,
domains := SpamDomainsSet,
whitelist_domains := WhitelistDomains} =
FilesResults,
ejabberd_hooks:add(local_send_to_resource_hook,
Host,
mod_antispam_rtbl,
pubsub_event_handler,
50),
[#rtbl_service{host = RTBLHost, node = RTBLDomainsNode}] = get_rtbl_services_option(Opts),
mod_antispam_filter:init_filtering(Host),
InitState =
#state{host = Host,
jid_set = JIDsSet,
url_set = URLsSet,
dump_fd = mod_antispam_dump:init_dumping(Host),
max_cache_size = gen_mod:get_opt(cache_size, Opts),
blocked_domains = set_to_map(SpamDomainsSet),
whitelist_domains = set_to_map(WhitelistDomains, false),
rtbl_host = RTBLHost,
rtbl_domains_node = RTBLDomainsNode},
mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
{ok, InitState}.
-spec handle_call(term(), {pid(), term()}, state()) ->
{reply, {spam_filter, term()}, state()} | {noreply, state()}.
handle_call({check_jid, From}, _From, #state{jid_set = JIDsSet} = State) ->
{Result, State1} = filter_jid(From, JIDsSet, State),
{reply, {spam_filter, Result}, State1};
handle_call({check_body, URLs, JIDs, From},
_From,
#state{url_set = URLsSet, jid_set = JIDsSet} = State) ->
{Result1, State1} = filter_body(URLs, URLsSet, From, State),
{Result2, State2} = filter_body(JIDs, JIDsSet, From, State1),
Result =
if Result1 == spam ->
Result1;
true ->
Result2
end,
{reply, {spam_filter, Result}, State2};
handle_call(reload_spam_files, _From, State) ->
{Result, State1} = reload_files(State),
{reply, {spam_filter, Result}, State1};
handle_call({expire_cache, Age}, _From, State) ->
{Result, State1} = expire_cache(Age, State),
{reply, {spam_filter, Result}, State1};
handle_call({add_to_cache, JID}, _From, State) ->
{Result, State1} = add_to_cache(JID, State),
{reply, {spam_filter, Result}, State1};
handle_call({drop_from_cache, JID}, _From, State) ->
{Result, State1} = drop_from_cache(JID, State),
{reply, {spam_filter, Result}, State1};
handle_call(get_cache, _From, #state{jid_cache = Cache} = State) ->
{reply, {spam_filter, maps:to_list(Cache)}, State};
handle_call({add_blocked_domain, Domain},
_From,
#state{blocked_domains = BlockedDomains} = State) ->
BlockedDomains1 = maps:merge(BlockedDomains, #{Domain => true}),
Txt = format("~s added to blocked domains", [Domain]),
{reply, {spam_filter, {ok, Txt}}, State#state{blocked_domains = BlockedDomains1}};
handle_call({remove_blocked_domain, Domain},
_From,
#state{blocked_domains = BlockedDomains} = State) ->
BlockedDomains1 = maps:remove(Domain, BlockedDomains),
Txt = format("~s removed from blocked domains", [Domain]),
{reply, {spam_filter, {ok, Txt}}, State#state{blocked_domains = BlockedDomains1}};
handle_call(get_blocked_domains,
_From,
#state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} =
State) ->
{reply, {blocked_domains, maps:merge(BlockedDomains, WhitelistDomains)}, State};
handle_call({is_blocked_domain, Domain},
_From,
#state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} =
State) ->
{reply,
maps:get(Domain, maps:merge(BlockedDomains, WhitelistDomains), false) =/= false,
State};
handle_call(prepare_stop,
_From,
#state{host = Host,
rtbl_host = RTBLHost,
rtbl_domains_node = RTBLDomainsNode} =
State) ->
mod_antispam_rtbl:unsubscribe(RTBLHost, RTBLDomainsNode, Host),
{reply, ready_to_stop, State};
handle_call(Request, From, State) ->
?ERROR_MSG("Got unexpected request from ~p: ~p", [From, Request]),
{noreply, State}.
-spec handle_cast(term(), state()) -> {noreply, state()}.
handle_cast({dump_stanza, XML}, #state{dump_fd = Fd} = State) ->
mod_antispam_dump:write_stanza_dump(Fd, XML),
{noreply, State};
handle_cast(reopen_log, #state{host = Host, dump_fd = Fd} = State) ->
{noreply, State#state{dump_fd = mod_antispam_dump:reopen_dump_file(Host, Fd)}};
handle_cast({reload, NewOpts, OldOpts},
#state{host = Host,
dump_fd = Fd,
rtbl_host = OldRTBLHost,
rtbl_domains_node = OldRTBLDomainsNode,
rtbl_retry_timer = RTBLRetryTimer} =
State) ->
misc:cancel_timer(RTBLRetryTimer),
State1 =
State#state{dump_fd = mod_antispam_dump:reload_dumping(Host, Fd, OldOpts, NewOpts)},
State2 =
case {gen_mod:get_opt(cache_size, OldOpts), gen_mod:get_opt(cache_size, NewOpts)} of
{OldMax, NewMax} when NewMax < OldMax ->
shrink_cache(State1#state{max_cache_size = NewMax});
{OldMax, NewMax} when NewMax > OldMax ->
State1#state{max_cache_size = NewMax};
{_OldMax, _NewMax} ->
State1
end,
ok = mod_antispam_rtbl:unsubscribe(OldRTBLHost, OldRTBLDomainsNode, Host),
{_Result, State3} = reload_files(State2#state{blocked_domains = #{}}),
[#rtbl_service{host = RTBLHost, node = RTBLDomainsNode}] =
get_rtbl_services_option(NewOpts),
ok = mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
{noreply, State3#state{rtbl_host = RTBLHost, rtbl_domains_node = RTBLDomainsNode}};
handle_cast({update_blocked_domains, NewItems},
#state{blocked_domains = BlockedDomains} = State) ->
{noreply, State#state{blocked_domains = maps:merge(BlockedDomains, NewItems)}};
handle_cast(Request, State) ->
?ERROR_MSG("Got unexpected request from: ~p", [Request]),
{noreply, State}.
-spec handle_info(term(), state()) -> {noreply, state()}.
handle_info({iq_reply, timeout, blocked_domains}, State) ->
?WARNING_MSG("Fetching blocked domains failed: fetch timeout. Retrying in 60 seconds",
[]),
{noreply,
State#state{rtbl_retry_timer =
erlang:send_after(60000, self(), request_blocked_domains)}};
handle_info({iq_reply, #iq{type = error} = IQ, blocked_domains}, State) ->
?WARNING_MSG("Fetching blocked domains failed: ~p. Retrying in 60 seconds",
[xmpp:format_stanza_error(
xmpp:get_error(IQ))]),
{noreply,
State#state{rtbl_retry_timer =
erlang:send_after(60000, self(), request_blocked_domains)}};
handle_info({iq_reply, IQReply, blocked_domains},
#state{blocked_domains = OldBlockedDomains,
rtbl_host = RTBLHost,
rtbl_domains_node = RTBLDomainsNode,
host = Host} =
State) ->
case mod_antispam_rtbl:parse_blocked_domains(IQReply) of
undefined ->
?WARNING_MSG("Fetching initial list failed: invalid result payload", []),
{noreply, State#state{rtbl_retry_timer = undefined}};
NewBlockedDomains ->
ok = mod_antispam_rtbl:subscribe(RTBLHost, RTBLDomainsNode, Host),
{noreply,
State#state{rtbl_retry_timer = undefined,
rtbl_subscribed = true,
blocked_domains = maps:merge(OldBlockedDomains, NewBlockedDomains)}}
end;
handle_info({iq_reply, timeout, subscribe_result}, State) ->
?WARNING_MSG("Subscription error: request timeout", []),
{noreply, State#state{rtbl_subscribed = false}};
handle_info({iq_reply, #iq{type = error} = IQ, subscribe_result}, State) ->
?WARNING_MSG("Subscription error: ~p",
[xmpp:format_stanza_error(
xmpp:get_error(IQ))]),
{noreply, State#state{rtbl_subscribed = false}};
handle_info({iq_reply, IQReply, subscribe_result}, State) ->
?DEBUG("Got subscribe result: ~p", [IQReply]),
{noreply, State#state{rtbl_subscribed = true}};
handle_info({iq_reply, _IQReply, unsubscribe_result}, State) ->
%% FIXME: we should check it's true (of type `result`, not `error`), but at that point, what
%% would we do?
{noreply, State#state{rtbl_subscribed = false}};
handle_info(request_blocked_domains,
#state{host = Host,
rtbl_host = RTBLHost,
rtbl_domains_node = RTBLDomainsNode} =
State) ->
mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
{noreply, State};
handle_info(Info, State) ->
?ERROR_MSG("Got unexpected info: ~p", [Info]),
{noreply, State}.
-spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok.
terminate(Reason,
#state{host = Host,
dump_fd = Fd,
rtbl_host = RTBLHost,
rtbl_domains_node = RTBLDomainsNode,
rtbl_retry_timer = RTBLRetryTimer} =
_State) ->
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
misc:cancel_timer(RTBLRetryTimer),
mod_antispam_dump:terminate_dumping(Host, Fd),
mod_antispam_files:terminate_files(Host),
mod_antispam_filter:terminate_filtering(Host),
ejabberd_hooks:delete(local_send_to_resource_hook,
Host,
mod_antispam_rtbl,
pubsub_event_handler,
50),
mod_antispam_rtbl:unsubscribe(RTBLHost, RTBLDomainsNode, Host),
ok.
-spec code_change({down, term()} | term(), state(), term()) -> {ok, state()}.
code_change(_OldVsn, #state{host = Host} = State, _Extra) ->
?DEBUG("Updating spam filter process for ~s", [Host]),
{ok, State}.
%%--------------------------------------------------------------------
%%| Internal functions
-spec filter_jid(ljid(), jid_set(), state()) -> {ham | spam, state()}.
filter_jid(From, Set, #state{host = Host} = State) ->
case sets:is_element(From, Set) of
true ->
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
ejabberd_hooks:run(spam_found, Host, [{jid, From}]),
{spam, State};
false ->
case cache_lookup(From, State) of
{true, State1} ->
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
ejabberd_hooks:run(spam_found, Host, [{jid, From}]),
{spam, State1};
{false, State1} ->
?DEBUG("JID not listed: ~s", [jid:encode(From)]),
{ham, State1}
end
end.
-spec filter_body({urls, [url()]} | {jids, [ljid()]} | none,
url_set() | jid_set(),
jid(),
state()) ->
{ham | spam, state()}.
filter_body({_, Addrs}, Set, From, #state{host = Host} = State) ->
case lists:any(fun(Addr) -> sets:is_element(Addr, Set) end, Addrs) of
true ->
?DEBUG("Spam addresses found: ~p", [Addrs]),
ejabberd_hooks:run(spam_found, Host, [{body, Addrs}]),
{spam, cache_insert(From, State)};
false ->
?DEBUG("Addresses not listed: ~p", [Addrs]),
{ham, State}
end;
filter_body(none, _Set, _From, State) ->
{ham, State}.
-spec reload_files(state()) -> {ok | {error, binary()}, state()}.
reload_files(#state{host = Host, blocked_domains = BlockedDomains} = State) ->
case read_files(Host) of
#{jid := JIDsSet,
url := URLsSet,
domains := SpamDomainsSet,
whitelist_domains := WhitelistDomains} ->
case sets_equal(JIDsSet, State#state.jid_set) of
true ->
?INFO_MSG("Reloaded spam JIDs for ~s (unchanged)", [Host]);
false ->
?INFO_MSG("Reloaded spam JIDs for ~s (changed)", [Host])
end,
case sets_equal(URLsSet, State#state.url_set) of
true ->
?INFO_MSG("Reloaded spam URLs for ~s (unchanged)", [Host]);
false ->
?INFO_MSG("Reloaded spam URLs for ~s (changed)", [Host])
end,
{ok,
State#state{jid_set = JIDsSet,
url_set = URLsSet,
blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)),
whitelist_domains = set_to_map(WhitelistDomains, false)}};
{config_error, ErrorText} ->
{{error, ErrorText}, State}
end.
set_to_map(Set) ->
set_to_map(Set, true).
set_to_map(Set, V) ->
sets:fold(fun(K, M) -> M#{K => V} end, #{}, Set).
read_files(Host) ->
AccInitial =
#{jid => sets:new(),
url => sets:new(),
domains => sets:new(),
whitelist_domains => sets:new()},
Files =
#{jid => gen_mod:get_module_opt(Host, ?MODULE, spam_jids_file),
url => gen_mod:get_module_opt(Host, ?MODULE, spam_urls_file),
domains => gen_mod:get_module_opt(Host, ?MODULE, spam_domains_file),
whitelist_domains => gen_mod:get_module_opt(Host, ?MODULE, whitelist_domains_file)},
ejabberd_hooks:run_fold(antispam_get_lists, Host, AccInitial, [Files]).
get_rtbl_services_option(Host) when is_binary(Host) ->
get_rtbl_services_option(gen_mod:get_module_opts(Host, ?MODULE));
get_rtbl_services_option(Opts) when is_map(Opts) ->
Services = gen_mod:get_opt(rtbl_services, Opts),
case length(Services) =< 1 of
true ->
ok;
false ->
?WARNING_MSG("Option rtbl_services only supports one service, but several "
"were configured. Will use only first one",
[])
end,
case Services of
[] ->
[#rtbl_service{}];
[Host | _] when is_binary(Host) ->
[#rtbl_service{host = Host, node = ?DEFAULT_RTBL_DOMAINS_NODE}];
[[{Host, [{spam_source_domains_node, Node}]}] | _] ->
[#rtbl_service{host = Host, node = Node}]
end.
-spec get_proc_name(binary()) -> atom().
get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE).
-spec get_spam_filter_hosts() -> [binary()].
get_spam_filter_hosts() ->
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE)].
-spec sets_equal(sets:set(), sets:set()) -> boolean().
sets_equal(A, B) ->
sets:is_subset(A, B) andalso sets:is_subset(B, A).
-spec format(io:format(), [term()]) -> binary().
format(Format, Data) ->
iolist_to_binary(io_lib:format(Format, Data)).
%%--------------------------------------------------------------------
%%| Caching
-spec cache_insert(ljid(), state()) -> state().
cache_insert(_LJID, #state{max_cache_size = 0} = State) ->
State;
cache_insert(LJID, #state{jid_cache = Cache, max_cache_size = MaxSize} = State)
when MaxSize /= unlimited, map_size(Cache) >= MaxSize ->
cache_insert(LJID, shrink_cache(State));
cache_insert(LJID, #state{jid_cache = Cache} = State) ->
?INFO_MSG("Caching spam JID: ~s", [jid:encode(LJID)]),
Cache1 = Cache#{LJID => erlang:monotonic_time(second)},
State#state{jid_cache = Cache1}.
-spec cache_lookup(ljid(), state()) -> {boolean(), state()}.
cache_lookup(LJID, #state{jid_cache = Cache} = State) ->
case Cache of
#{LJID := _Timestamp} ->
Cache1 = Cache#{LJID => erlang:monotonic_time(second)},
State1 = State#state{jid_cache = Cache1},
{true, State1};
#{} ->
{false, State}
end.
-spec shrink_cache(state()) -> state().
shrink_cache(#state{jid_cache = Cache, max_cache_size = MaxSize} = State) ->
ShrinkedSize = round(MaxSize / 2),
N = map_size(Cache) - ShrinkedSize,
L = lists:keysort(2, maps:to_list(Cache)),
Cache1 =
maps:from_list(
lists:nthtail(N, L)),
State#state{jid_cache = Cache1}.
-spec expire_cache(integer(), state()) -> {{ok, binary()}, state()}.
expire_cache(Age, #state{jid_cache = Cache} = State) ->
Threshold = erlang:monotonic_time(second) - Age,
Cache1 = maps:filter(fun(_, TS) -> TS >= Threshold end, Cache),
NumExp = map_size(Cache) - map_size(Cache1),
Txt = format("Expired ~B cache entries", [NumExp]),
{{ok, Txt}, State#state{jid_cache = Cache1}}.
-spec add_to_cache(ljid(), state()) -> {{ok, binary()}, state()}.
add_to_cache(LJID, State) ->
State1 = cache_insert(LJID, State),
Txt = format("~s added to cache", [jid:encode(LJID)]),
{{ok, Txt}, State1}.
-spec drop_from_cache(ljid(), state()) -> {{ok, binary()}, state()}.
drop_from_cache(LJID, #state{jid_cache = Cache} = State) ->
Cache1 = maps:remove(LJID, Cache),
if map_size(Cache1) < map_size(Cache) ->
Txt = format("~s removed from cache", [jid:encode(LJID)]),
{{ok, Txt}, State#state{jid_cache = Cache1}};
true ->
Txt = format("~s wasn't cached", [jid:encode(LJID)]),
{{ok, Txt}, State}
end.
%%--------------------------------------------------------------------
%%| ejabberd command callbacks
-spec get_commands_spec() -> [ejabberd_commands()].
get_commands_spec() ->
[#ejabberd_commands{name = reload_spam_filter_files,
tags = [filter],
desc = "Reload spam JID/URL files",
module = ?MODULE,
function = reload_spam_filter_files,
args = [{host, binary}],
result = {res, rescode}},
#ejabberd_commands{name = get_spam_filter_cache,
tags = [filter],
desc = "Show spam filter cache contents",
module = ?MODULE,
function = get_spam_filter_cache,
args = [{host, binary}],
result =
{spammers,
{list, {spammer, {tuple, [{jid, string}, {timestamp, integer}]}}}}},
#ejabberd_commands{name = expire_spam_filter_cache,
tags = [filter],
desc = "Remove old/unused spam JIDs from cache",
module = ?MODULE,
function = expire_spam_filter_cache,
args = [{host, binary}, {seconds, integer}],
result = {res, restuple}},
#ejabberd_commands{name = add_to_spam_filter_cache,
tags = [filter],
desc = "Add JID to spam filter cache",
module = ?MODULE,
function = add_to_spam_filter_cache,
args = [{host, binary}, {jid, binary}],
result = {res, restuple}},
#ejabberd_commands{name = drop_from_spam_filter_cache,
tags = [filter],
desc = "Drop JID from spam filter cache",
module = ?MODULE,
function = drop_from_spam_filter_cache,
args = [{host, binary}, {jid, binary}],
result = {res, restuple}},
#ejabberd_commands{name = get_blocked_domains,
tags = [filter],
desc = "Get list of domains being blocked",
module = ?MODULE,
function = get_blocked_domains,
args = [{host, binary}],
result = {blocked_domains, {list, {jid, string}}}},
#ejabberd_commands{name = add_blocked_domain,
tags = [filter],
desc = "Add domain to list of blocked domains",
module = ?MODULE,
function = add_blocked_domain,
args = [{host, binary}, {domain, binary}],
result = {res, restuple}},
#ejabberd_commands{name = remove_blocked_domain,
tags = [filter],
desc = "Remove domain from list of blocked domains",
module = ?MODULE,
function = remove_blocked_domain,
args = [{host, binary}, {domain, binary}],
result = {res, restuple}}].
for_all_hosts(F, A) ->
try lists:map(fun(Host) -> apply(F, [Host | A]) end, get_spam_filter_hosts()) of
List ->
case lists:filter(fun ({error, _}) ->
true;
(_) ->
false
end,
List)
of
[] ->
hd(List);
Errors ->
hd(Errors)
end
catch
error:{badmatch, {error, _Reason} = Error} ->
Error
end.
try_call_by_host(Host, Call) ->
LServer = jid:nameprep(Host),
Proc = get_proc_name(LServer),
try gen_server:call(Proc, Call, ?COMMAND_TIMEOUT) of
Result ->
Result
catch
exit:{noproc, _} ->
{error, "Not configured for " ++ binary_to_list(Host)};
exit:{timeout, _} ->
{error, "Timeout while querying ejabberd"}
end.
-spec reload_spam_filter_files(binary()) -> ok | {error, string()}.
reload_spam_filter_files(<<"global">>) ->
for_all_hosts(fun reload_spam_filter_files/1, []);
reload_spam_filter_files(Host) ->
case try_call_by_host(Host, reload_spam_files) of
{spam_filter, ok} ->
ok;
{spam_filter, {error, Txt}} ->
{error, Txt};
{error, _R} = Error ->
Error
end.
-spec get_blocked_domains(binary()) -> [binary()].
get_blocked_domains(Host) ->
case try_call_by_host(Host, get_blocked_domains) of
{blocked_domains, BlockedDomains} ->
maps:keys(
maps:filter(fun (_, false) ->
false;
(_, _) ->
true
end,
BlockedDomains));
{error, _R} = Error ->
Error
end.
-spec add_blocked_domain(binary(), binary()) -> {ok, string()}.
add_blocked_domain(<<"global">>, Domain) ->
for_all_hosts(fun add_blocked_domain/2, [Domain]);
add_blocked_domain(Host, Domain) ->
case try_call_by_host(Host, {add_blocked_domain, Domain}) of
{spam_filter, {Status, Txt}} ->
{Status, binary_to_list(Txt)};
{error, _R} = Error ->
Error
end.
-spec remove_blocked_domain(binary(), binary()) -> {ok, string()}.
remove_blocked_domain(<<"global">>, Domain) ->
for_all_hosts(fun remove_blocked_domain/2, [Domain]);
remove_blocked_domain(Host, Domain) ->
case try_call_by_host(Host, {remove_blocked_domain, Domain}) of
{spam_filter, {Status, Txt}} ->
{Status, binary_to_list(Txt)};
{error, _R} = Error ->
Error
end.
-spec get_spam_filter_cache(binary()) -> [{binary(), integer()}] | {error, string()}.
get_spam_filter_cache(Host) ->
case try_call_by_host(Host, get_cache) of
{spam_filter, Cache} ->
[{jid:encode(JID), TS + erlang:time_offset(second)} || {JID, TS} <- Cache];
{error, _R} = Error ->
Error
end.
-spec expire_spam_filter_cache(binary(), integer()) -> {ok | error, string()}.
expire_spam_filter_cache(<<"global">>, Age) ->
for_all_hosts(fun expire_spam_filter_cache/2, [Age]);
expire_spam_filter_cache(Host, Age) ->
case try_call_by_host(Host, {expire_cache, Age}) of
{spam_filter, {Status, Txt}} ->
{Status, binary_to_list(Txt)};
{error, _R} = Error ->
Error
end.
-spec add_to_spam_filter_cache(binary(), binary()) ->
[{binary(), integer()}] | {error, string()}.
add_to_spam_filter_cache(<<"global">>, JID) ->
for_all_hosts(fun add_to_spam_filter_cache/2, [JID]);
add_to_spam_filter_cache(Host, EncJID) ->
try jid:decode(EncJID) of
#jid{} = JID ->
LJID =
jid:remove_resource(
jid:tolower(JID)),
case try_call_by_host(Host, {add_to_cache, LJID}) of
{spam_filter, {Status, Txt}} ->
{Status, binary_to_list(Txt)};
{error, _R} = Error ->
Error
end
catch
_:{bad_jid, _} ->
{error, "Not a valid JID: " ++ binary_to_list(EncJID)}
end.
-spec drop_from_spam_filter_cache(binary(), binary()) -> {ok | error, string()}.
drop_from_spam_filter_cache(<<"global">>, JID) ->
for_all_hosts(fun drop_from_spam_filter_cache/2, [JID]);
drop_from_spam_filter_cache(Host, EncJID) ->
try jid:decode(EncJID) of
#jid{} = JID ->
LJID =
jid:remove_resource(
jid:tolower(JID)),
case try_call_by_host(Host, {drop_from_cache, LJID}) of
{spam_filter, {Status, Txt}} ->
{Status, binary_to_list(Txt)};
{error, _R} = Error ->
Error
end
catch
_:{bad_jid, _} ->
{error, "Not a valid JID: " ++ binary_to_list(EncJID)}
end.
%%--------------------------------------------------------------------
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:

186
src/mod_antispam_dump.erl Normal file
View file

@ -0,0 +1,186 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam_dump.erl
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Manage dump file for filtered spam messages
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
%%| Definitions
%% @format-begin
-module(mod_antispam_dump).
-author('holger@zedat.fu-berlin.de').
-author('stefan@strigler.de').
-export([init_dumping/1, terminate_dumping/2, reload_dumping/4, reopen_dump_file/2,
write_stanza_dump/2]).
%% ejabberd_hooks callbacks
-export([dump_spam_stanza/1, reopen_log/0]).
-include("logger.hrl").
-include("mod_antispam.hrl").
-include("translate.hrl").
-include_lib("xmpp/include/xmpp.hrl").
%%--------------------------------------------------------------------
%%| Exported
init_dumping(Host) ->
case get_path_option(Host) of
false ->
undefined;
DumpFile when is_binary(DumpFile) ->
case filelib:ensure_dir(DumpFile) of
ok ->
ejabberd_hooks:add(spam_stanza_rejected, Host, ?MODULE, dump_spam_stanza, 50),
ejabberd_hooks:add(reopen_log_hook, ?MODULE, reopen_log, 50),
open_dump_file(DumpFile);
{error, Reason} ->
Dirname = filename:dirname(DumpFile),
throw({open, Dirname, Reason})
end
end.
terminate_dumping(_Host, false) ->
ok;
terminate_dumping(Host, Fd) ->
DumpFile1 = get_path_option(Host),
close_dump_file(Fd, DumpFile1),
ejabberd_hooks:delete(spam_stanza_rejected, Host, ?MODULE, dump_spam_stanza, 50),
case gen_mod:is_loaded_elsewhere(Host, ?MODULE) of
false ->
ejabberd_hooks:delete(reopen_log_hook, ?MODULE, reopen_log, 50);
true ->
ok
end.
reload_dumping(Host, Fd, OldOpts, NewOpts) ->
case {get_path_option(Host, OldOpts), get_path_option(Host, NewOpts)} of
{Old, Old} ->
Fd;
{Old, New} ->
reopen_dump_file(Fd, Old, New)
end.
-spec reopen_dump_file(binary(), file:io_device()) -> file:io_device().
reopen_dump_file(Host, Fd) ->
DumpFile1 = get_path_option(Host),
reopen_dump_file(Fd, DumpFile1, DumpFile1).
%%--------------------------------------------------------------------
%%| Hook callbacks
-spec dump_spam_stanza(message()) -> ok.
dump_spam_stanza(#message{to = #jid{lserver = LServer}} = Msg) ->
By = jid:make(<<>>, LServer),
Proc = get_proc_name(LServer),
Time = erlang:timestamp(),
Msg1 = misc:add_delay_info(Msg, By, Time),
XML = fxml:element_to_binary(
xmpp:encode(Msg1)),
gen_server:cast(Proc, {dump_stanza, XML}).
-spec reopen_log() -> ok.
reopen_log() ->
lists:foreach(fun(Host) ->
Proc = get_proc_name(Host),
gen_server:cast(Proc, reopen_log)
end,
get_spam_filter_hosts()).
%%--------------------------------------------------------------------
%%| File management
-spec open_dump_file(filename()) -> undefined | file:io_device().
open_dump_file(false) ->
undefined;
open_dump_file(Name) ->
Modes = [append, raw, binary, delayed_write],
case file:open(Name, Modes) of
{ok, Fd} ->
?DEBUG("Opened ~s", [Name]),
Fd;
{error, Reason} ->
?ERROR_MSG("Cannot open dump file ~s: ~s", [Name, file:format_error(Reason)]),
undefined
end.
-spec close_dump_file(undefined | file:io_device(), filename()) -> ok.
close_dump_file(undefined, false) ->
ok;
close_dump_file(Fd, Name) ->
case file:close(Fd) of
ok ->
?DEBUG("Closed ~s", [Name]);
{error, Reason} ->
?ERROR_MSG("Cannot close ~s: ~s", [Name, file:format_error(Reason)])
end.
-spec reopen_dump_file(file:io_device(), binary(), binary()) -> file:io_device().
reopen_dump_file(Fd, OldDumpFile, NewDumpFile) ->
close_dump_file(Fd, OldDumpFile),
open_dump_file(NewDumpFile).
write_stanza_dump(Fd, XML) ->
case file:write(Fd, [XML, <<$\n>>]) of
ok ->
ok;
{error, Reason} ->
?ERROR_MSG("Cannot write spam to dump file: ~s", [file:format_error(Reason)])
end.
%%--------------------------------------------------------------------
%%| Auxiliary
get_path_option(Host) ->
Opts = gen_mod:get_module_opts(Host, ?MODULE_ANTISPAM),
get_path_option(Host, Opts).
get_path_option(Host, Opts) ->
case gen_mod:get_opt(spam_dump_file, Opts) of
false ->
false;
true ->
LogDirPath =
iolist_to_binary(filename:dirname(
ejabberd_logger:get_log_path())),
filename:join([LogDirPath, <<"spam_dump_", Host/binary, ".log">>]);
B when is_binary(B) ->
B
end.
%%--------------------------------------------------------------------
%%| Copied from mod_antispam.erl
-spec get_proc_name(binary()) -> atom().
get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
-spec get_spam_filter_hosts() -> [binary()].
get_spam_filter_hosts() ->
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE_ANTISPAM)].
%%--------------------------------------------------------------------
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:

181
src/mod_antispam_files.erl Normal file
View file

@ -0,0 +1,181 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam_files.erl
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Filter spam messages based on sender JID and content
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
%%| definitions
%% @format-begin
-module(mod_antispam_files).
-author('holger@zedat.fu-berlin.de').
-author('stefan@strigler.de').
%% Exported
-export([init_files/1, terminate_files/1]).
% Hooks
-export([get_files_lists/2]).
-include("ejabberd_commands.hrl").
-include("logger.hrl").
-include("mod_antispam.hrl").
-include("translate.hrl").
-include_lib("xmpp/include/xmpp.hrl").
-type files_map() :: #{atom() => filename()}.
-type lists_map() ::
#{jid => jid_set(),
url => url_set(),
atom() => sets:set(binary())}.
-define(COMMAND_TIMEOUT, timer:seconds(30)).
-define(DEFAULT_CACHE_SIZE, 10000).
-define(HTTPC_TIMEOUT, timer:seconds(3)).
%%--------------------------------------------------------------------
%%| Exported
init_files(Host) ->
ejabberd_hooks:add(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
terminate_files(Host) ->
ejabberd_hooks:delete(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
%%--------------------------------------------------------------------
%%| Hooks
-spec get_files_lists(lists_map(), files_map()) -> lists_map().
get_files_lists(#{jid := AccJids,
url := AccUrls,
domains := AccDomains,
whitelist_domains := AccWhitelist} =
Acc,
Files) ->
try read_files(Files) of
#{jid := JIDsSet,
url := URLsSet,
domains := SpamDomainsSet,
whitelist_domains := WhitelistDomains} ->
Acc#{jid => sets:union(AccJids, JIDsSet),
url => sets:union(AccUrls, URLsSet),
domains => sets:union(AccDomains, SpamDomainsSet),
whitelist_domains => sets:union(AccWhitelist, WhitelistDomains)}
catch
{Op, File, Reason} when Op == open; Op == read ->
ErrorText = format("Error trying to ~s file ~s: ~s", [Op, File, format_error(Reason)]),
?CRITICAL_MSG(ErrorText, []),
{stop, {config_error, ErrorText}}
end.
%%--------------------------------------------------------------------
%%| read_files
-spec read_files(files_map()) -> lists_map().
read_files(Files) ->
maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files).
-spec line_parser(Type :: atom()) -> fun((binary()) -> binary()).
line_parser(jid) ->
fun parse_jid/1;
line_parser(url) ->
fun parse_url/1;
line_parser(_) ->
fun trim/1.
-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set().
read_file(none, _ParseLine) ->
sets:new();
read_file(File, ParseLine) ->
case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of
{ok, Fd} ->
try
read_line(Fd, ParseLine, sets:new())
catch
E ->
throw({read, File, E})
after
ok = file:close(Fd)
end;
{error, Reason} ->
throw({open, File, Reason})
end.
-spec read_line(file:io_device(),
fun((binary()) -> ljid() | url()),
jid_set() | url_set()) ->
jid_set() | url_set().
read_line(Fd, ParseLine, Set) ->
case file:read_line(Fd) of
{ok, Line} ->
read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set));
{error, Reason} ->
throw(Reason);
eof ->
Set
end.
-spec parse_jid(binary()) -> ljid().
parse_jid(S) ->
try jid:decode(trim(S)) of
#jid{} = JID ->
jid:remove_resource(
jid:tolower(JID))
catch
_:{bad_jid, _} ->
throw({bad_jid, S})
end.
-spec parse_url(binary()) -> url().
parse_url(S) ->
URL = trim(S),
RE = <<"https?://\\S+$">>,
Options = [anchored, caseless, {capture, none}],
case re:run(URL, RE, Options) of
match ->
URL;
nomatch ->
throw({bad_url, S})
end.
-spec trim(binary()) -> binary().
trim(S) ->
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
%% Function copied from mod_antispam.erl
-spec format(io:format(), [term()]) -> binary().
format(Format, Data) ->
iolist_to_binary(io_lib:format(Format, Data)).
-spec format_error(atom() | tuple()) -> binary().
format_error({bad_jid, JID}) ->
<<"Not a valid JID: ", JID/binary>>;
format_error({bad_url, URL}) ->
<<"Not an HTTP(S) URL: ", URL/binary>>;
format_error(Reason) ->
list_to_binary(file:format_error(Reason)).
%%--------------------------------------------------------------------
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:

298
src/mod_antispam_filter.erl Normal file
View file

@ -0,0 +1,298 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam_filter.erl
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Filter C2S and S2S stanzas
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
%%| Definitions
%% @format-begin
-module(mod_antispam_filter).
-author('holger@zedat.fu-berlin.de').
-author('stefan@strigler.de').
-export([init_filtering/1, terminate_filtering/1]).
%% ejabberd_hooks callbacks
-export([s2s_in_handle_info/2, s2s_receive_packet/1, sm_receive_packet/1]).
-include("logger.hrl").
-include("translate.hrl").
-include("mod_antispam.hrl").
-include_lib("xmpp/include/xmpp.hrl").
-type s2s_in_state() :: ejabberd_s2s_in:state().
-define(HTTPC_TIMEOUT, timer:seconds(3)).
%%--------------------------------------------------------------------
%%| Exported
init_filtering(Host) ->
ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90),
ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50).
terminate_filtering(Host) ->
ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50),
ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90).
%%--------------------------------------------------------------------
%%| Hook callbacks
-spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) ->
{stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}.
s2s_receive_packet({A, State}) ->
case sm_receive_packet(A) of
{stop, drop} ->
{stop, {drop, State}};
Result ->
{Result, State}
end.
-spec sm_receive_packet(stanza() | drop) -> stanza() | drop | {stop, drop}.
sm_receive_packet(drop = Acc) ->
Acc;
sm_receive_packet(#message{from = From,
to = #jid{lserver = LServer} = To,
type = Type} =
Msg)
when Type /= groupchat, Type /= error ->
do_check(From, To, LServer, Msg);
sm_receive_packet(#presence{from = From,
to = #jid{lserver = LServer} = To,
type = subscribe} =
Presence) ->
do_check(From, To, LServer, Presence);
sm_receive_packet(Acc) ->
Acc.
%%--------------------------------------------------------------------
%%| Filtering deciding
do_check(From, To, LServer, Stanza) ->
case needs_checking(From, To) of
true ->
case check_from(LServer, From) of
ham ->
case check_stanza(LServer, From, Stanza) of
ham ->
Stanza;
spam ->
reject(Stanza),
{stop, drop}
end;
spam ->
reject(Stanza),
{stop, drop}
end;
false ->
Stanza
end.
check_stanza(LServer, From, #message{body = Body}) ->
check_body(LServer, From, xmpp:get_text(Body));
check_stanza(_, _, _) ->
ham.
-spec s2s_in_handle_info(s2s_in_state(), any()) ->
s2s_in_state() | {stop, s2s_in_state()}.
s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) ->
?DEBUG("Dropping expired spam filter result", []),
{stop, State};
s2s_in_handle_info(State, _) ->
State.
-spec needs_checking(jid(), jid()) -> boolean().
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
case gen_mod:is_loaded(LServer, ?MODULE_ANTISPAM) of
true ->
Access = gen_mod:get_module_opt(LServer, ?MODULE_ANTISPAM, access_spam),
case acl:match_rule(LServer, Access, To) of
allow ->
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
false;
deny ->
?DEBUG("Spam is filtered for ~s", [jid:encode(To)]),
not mod_roster:is_subscribed(From, To)
andalso not
mod_roster:is_subscribed(
jid:make(<<>>, FromHost),
To) % likely a gateway
end;
false ->
?DEBUG("~s not loaded for ~s", [?MODULE_ANTISPAM, LServer]),
false
end.
-spec check_from(binary(), jid()) -> ham | spam.
check_from(Host, From) ->
Proc = get_proc_name(Host),
LFrom =
{_, FromDomain, _} =
jid:remove_resource(
jid:tolower(From)),
try
case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of
true ->
?DEBUG("Spam JID found in blocked domains: ~p", [From]),
ejabberd_hooks:run(spam_found, Host, [{jid, From}]),
spam;
false ->
case gen_server:call(Proc, {check_jid, LFrom}) of
{spam_filter, Result} ->
Result
end
end
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers",
[jid:encode(From)]),
ham
end.
-spec check_body(binary(), jid(), binary()) -> ham | spam.
check_body(Host, From, Body) ->
case {extract_urls(Host, Body), extract_jids(Body)} of
{none, none} ->
?DEBUG("No JIDs/URLs found in message", []),
ham;
{URLs, JIDs} ->
Proc = get_proc_name(Host),
LFrom =
jid:remove_resource(
jid:tolower(From)),
try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of
{spam_filter, Result} ->
Result
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking body", []),
ham
end
end.
%%--------------------------------------------------------------------
%%| Auxiliary
-spec extract_urls(binary(), binary()) -> {urls, [url()]} | none.
extract_urls(Host, Body) ->
RE = <<"https?://\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
Urls = resolve_redirects(Host, lists:flatten(Captured)),
{urls, Urls};
nomatch ->
none
end.
-spec resolve_redirects(binary(), [url()]) -> [url()].
resolve_redirects(_Host, URLs) ->
try do_resolve_redirects(URLs, []) of
ResolvedURLs ->
ResolvedURLs
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]),
URLs
end.
-spec do_resolve_redirects([url()], [url()]) -> [url()].
do_resolve_redirects([], Result) ->
Result;
do_resolve_redirects([URL | Rest], Acc) ->
case httpc:request(get,
{URL, [{"user-agent", "curl/8.7.1"}]},
[{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}],
[])
of
{ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 ->
Location = proplists:get_value("location", Headers),
case Location == undefined orelse lists:member(Location, Acc) of
true ->
do_resolve_redirects(Rest, [URL | Acc]);
false ->
do_resolve_redirects([Location | Rest], [URL | Acc])
end;
_Res ->
do_resolve_redirects(Rest, [URL | Acc])
end.
-spec extract_jids(binary()) -> {jids, [ljid()]} | none.
extract_jids(Body) ->
RE = <<"\\S+@\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
{jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))};
nomatch ->
none
end.
-spec try_decode_jid(binary()) -> {true, ljid()} | false.
try_decode_jid(S) ->
try jid:decode(S) of
#jid{} = JID ->
{true,
jid:remove_resource(
jid:tolower(JID))}
catch
_:{bad_jid, _} ->
false
end.
-spec reject(stanza()) -> ok.
reject(#message{from = From,
to = To,
type = Type,
lang = Lang} =
Msg)
when Type /= groupchat, Type /= error ->
?INFO_MSG("Rejecting unsolicited message from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your message is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_hooks:run(spam_stanza_rejected, To#jid.lserver, [Msg]),
ejabberd_router:route_error(Msg, Err);
reject(#presence{from = From,
to = To,
lang = Lang} =
Presence) ->
?INFO_MSG("Rejecting unsolicited presence from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your traffic is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_router:route_error(Presence, Err);
reject(_) ->
ok.
-spec get_proc_name(binary()) -> atom().
get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
%%--------------------------------------------------------------------
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:

147
src/mod_antispam_rtbl.erl Normal file
View file

@ -0,0 +1,147 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam_rtbl.erl
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Collection of RTBL specific functionality
%%% Created : 20 Mar 2025 by Stefan Strigler <stefan@strigler.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
-module(mod_antispam_rtbl).
-author('stefan@strigler.de').
-include_lib("xmpp/include/xmpp.hrl").
-include("logger.hrl").
-include("mod_antispam.hrl").
-define(SERVICE_MODULE, mod_antispam).
-define(SERVICE_JID_PREFIX, "rtbl-").
-export([parse_blocked_domains/1,
parse_pubsub_event/1,
pubsub_event_handler/1,
request_blocked_domains/3,
subscribe/3,
unsubscribe/3]).
%% @format-begin
subscribe(RTBLHost, RTBLDomainsNode, From) ->
FromJID = service_jid(From),
SubIQ =
#iq{type = set,
to = jid:make(RTBLHost),
from = FromJID,
sub_els = [#pubsub{subscribe = #ps_subscribe{jid = FromJID, node = RTBLDomainsNode}}]},
?DEBUG("Sending subscription request:~n~p", [xmpp:encode(SubIQ)]),
ejabberd_router:route_iq(SubIQ, subscribe_result, self()).
-spec unsubscribe(binary() | none, binary(), binary()) -> ok.
unsubscribe(none, _PSNode, _From) ->
ok;
unsubscribe(RTBLHost, RTBLDomainsNode, From) ->
FromJID = jid:make(From),
SubIQ =
#iq{type = set,
to = jid:make(RTBLHost),
from = FromJID,
sub_els =
[#pubsub{unsubscribe = #ps_unsubscribe{jid = FromJID, node = RTBLDomainsNode}}]},
ejabberd_router:route_iq(SubIQ, unsubscribe_result, self()).
-spec request_blocked_domains(binary() | none, binary(), binary()) -> ok.
request_blocked_domains(none, _PSNode, _From) ->
ok;
request_blocked_domains(RTBLHost, RTBLDomainsNode, From) ->
IQ = #iq{type = get,
from = jid:make(From),
to = jid:make(RTBLHost),
sub_els = [#pubsub{items = #ps_items{node = RTBLDomainsNode}}]},
?DEBUG("Requesting RTBL blocked domains from ~s:~n~p", [RTBLHost, xmpp:encode(IQ)]),
ejabberd_router:route_iq(IQ, blocked_domains, self()).
-spec parse_blocked_domains(stanza()) -> #{binary() => any()} | undefined.
parse_blocked_domains(#iq{to = #jid{lserver = LServer}, type = result} = IQ) ->
?DEBUG("parsing iq-result items: ~p", [IQ]),
[#rtbl_service{node = RTBLDomainsNode}] = mod_antispam:get_rtbl_services_option(LServer),
case xmpp:get_subtag(IQ, #pubsub{}) of
#pubsub{items = #ps_items{node = RTBLDomainsNode, items = Items}} ->
?DEBUG("Got items:~n~p", [Items]),
parse_items(Items);
_ ->
undefined
end.
-spec parse_pubsub_event(stanza()) -> #{binary() => any()}.
parse_pubsub_event(#message{to = #jid{lserver = LServer}} = Msg) ->
[#rtbl_service{node = RTBLDomainsNode}] = mod_antispam:get_rtbl_services_option(LServer),
case xmpp:get_subtag(Msg, #ps_event{}) of
#ps_event{items =
#ps_items{node = RTBLDomainsNode,
items = Items,
retract = RetractIds}} ->
maps:merge(retract_items(RetractIds), parse_items(Items));
Other ->
?WARNING_MSG("Couldn't extract items: ~p", [Other]),
#{}
end.
-spec parse_items([ps_item()]) -> #{binary() => any()}.
parse_items(Items) ->
lists:foldl(fun(#ps_item{id = ID}, Acc) ->
%% TODO extract meta/extra instructions
maps:put(ID, true, Acc)
end,
#{},
Items).
-spec retract_items([binary()]) -> #{binary() => false}.
retract_items(Ids) ->
lists:foldl(fun(ID, Acc) -> Acc#{ID => false} end, #{}, Ids).
-spec service_jid(binary()) -> jid().
service_jid(Host) ->
jid:make(<<>>, Host, <<?SERVICE_JID_PREFIX, (ejabberd_cluster:node_id())/binary>>).
%%--------------------------------------------------------------------
%% Hook callbacks.
%%--------------------------------------------------------------------
-spec pubsub_event_handler(stanza()) -> drop | stanza().
pubsub_event_handler(#message{from = FromJid,
to =
#jid{lserver = LServer,
lresource = <<?SERVICE_JID_PREFIX, _/binary>>}} =
Msg) ->
?DEBUG("Got RTBL message:~n~p", [Msg]),
From = jid:encode(FromJid),
[#rtbl_service{host = RTBLHost}] = mod_antispam:get_rtbl_services_option(LServer),
case RTBLHost of
From ->
ParsedItems = parse_pubsub_event(Msg),
Proc = gen_mod:get_module_proc(LServer, ?SERVICE_MODULE),
gen_server:cast(Proc, {update_blocked_domains, ParsedItems}),
%% FIXME what's the difference between `{drop, ...}` and `{stop, {drop, ...}}`?
drop;
_Other ->
?INFO_MSG("Got unexpected message from ~s to rtbl resource:~n~p", [From, Msg]),
Msg
end;
pubsub_event_handler(Acc) ->
?DEBUG("unexpected something on pubsub_event_handler: ~p", [Acc]),
Acc.

290
test/antispam_tests.erl Normal file
View file

@ -0,0 +1,290 @@
%%%-------------------------------------------------------------------
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Created : 8 May 2025 by Stefan Strigler
%%%
%%%
%%% ejabberd, Copyright (C) 2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
-module(antispam_tests).
-compile(export_all).
-import(suite, [recv_presence/1, send_recv/2, my_jid/1, muc_room_jid/1,
send/2, recv_message/1, recv_iq/1, muc_jid/1,
alt_room_jid/1, wait_for_slave/1, wait_for_master/1,
disconnect/1, put_event/2, get_event/1, peer_muc_jid/1,
my_muc_jid/1, get_features/2, set_opt/3]).
-include("suite.hrl").
-include("mod_antispam.hrl").
%% @format-begin
%%%===================================================================
%%% API
%%%===================================================================
%%%===================================================================
%%% Single tests
%%%===================================================================
single_cases() ->
{antispam_single,
[sequence],
[single_test(block_by_jid),
single_test(block_by_url),
single_test(blocked_jid_is_cached),
single_test(uncache_blocked_jid),
single_test(check_blocked_domain),
single_test(unblock_domain),
single_test(empty_domain_list),
single_test(block_domain_globally),
single_test(check_domain_blocked_globally),
single_test(unblock_domain_in_vhost),
single_test(unblock_domain_globally),
single_test(block_domain_in_vhost),
single_test(unblock_domain_in_vhost2),
single_test(jid_cache),
single_test(rtbl_domains),
single_test(rtbl_domains_whitelisted),
single_test(spam_dump_file)]}.
%%%===================================================================
block_by_jid(Config) ->
is_spam(message_hello(<<"spammer_jid">>, <<"localhost">>, Config)).
block_by_url(Config) ->
From = jid:make(<<"spammer">>, <<"localhost">>, <<"spam_client">>),
To = my_jid(Config),
is_not_spam(message_hello(<<"spammer">>, <<"localhost">>, Config)),
is_spam(message(From, To, <<"hello world\nhttps://spam.domain.url">>)).
blocked_jid_is_cached(Config) ->
is_spam(message_hello(<<"spammer">>, <<"localhost">>, Config)).
uncache_blocked_jid(Config) ->
Host = ?config(server, Config),
Spammer = jid:make(<<"spammer">>, <<"localhost">>, <<"">>),
mod_antispam:drop_from_spam_filter_cache(Host, jid:to_string(Spammer)),
is_not_spam(message_hello(<<"spammer">>, <<"localhost">>, Config)).
check_blocked_domain(Config) ->
is_spam(message_hello(<<"other_spammer">>, <<"spam_domain.org">>, Config)).
unblock_domain(Config) ->
Host = ?config(server, Config),
?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam_domain.org">>)),
?match([], mod_antispam:get_blocked_domains(Host)),
is_not_spam(message_hello(<<"spammer">>, <<"spam_domain.org">>, Config)).
%%%===================================================================
empty_domain_list(Config) ->
Host = ?config(server, Config),
?match([], mod_antispam:get_blocked_domains(Host)),
SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>),
To = my_jid(Config),
Msg = message(SpamFrom, To, <<"hello world">>),
is_not_spam(Msg).
block_domain_globally(Config) ->
?match({ok, _}, mod_antispam:add_blocked_domain(<<"global">>, <<"spam.domain">>)),
SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>),
To = my_jid(Config),
is_spam(message(SpamFrom, To, <<"hello world">>)).
check_domain_blocked_globally(_Config) ->
Vhosts = [H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, mod_antispam)],
NumVhosts = length(Vhosts),
?match(NumVhosts, length(lists:filter(has_spam_domain(<<"spam.domain">>), Vhosts))).
unblock_domain_in_vhost(Config) ->
Host = ?config(server, Config),
?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam.domain">>)),
?match([], mod_antispam:get_blocked_domains(Host)),
SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>),
To = my_jid(Config),
is_not_spam(message(SpamFrom, To, <<"hello world">>)).
unblock_domain_globally(_Config) ->
Vhosts = [H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, mod_antispam)],
NumVhosts = length(Vhosts),
?match(NumVhosts, length(lists:filter(has_spam_domain(<<"spam.domain">>), Vhosts)) + 1),
?match({ok, _}, mod_antispam:remove_blocked_domain(<<"global">>, <<"spam.domain">>)),
?match([], lists:filter(has_spam_domain(<<"spam.domain">>), Vhosts)).
block_domain_in_vhost(Config) ->
Host = ?config(server, Config),
Vhosts = [H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, mod_antispam)],
?match({ok, _}, mod_antispam:add_blocked_domain(Host, <<"spam.domain">>)),
?match([Host], lists:filter(has_spam_domain(<<"spam.domain">>), Vhosts)),
SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>),
To = my_jid(Config),
is_spam(message(SpamFrom, To, <<"hello world">>)).
unblock_domain_in_vhost2(Config) ->
Host = ?config(server, Config),
?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam.domain">>)),
SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>),
To = my_jid(Config),
is_not_spam(message(SpamFrom, To, <<"hello world">>)),
disconnect(Config).
%%%===================================================================
jid_cache(Config) ->
Host = ?config(server, Config),
SpamFrom = jid:make(<<"spammer">>, Host, <<"spam_client">>),
is_not_spam(message_hello(<<"spammer">>, Host, Config)),
mod_antispam:add_to_spam_filter_cache(Host, jid:to_string(SpamFrom)),
is_spam(message_hello(<<"spammer">>, Host, Config)),
mod_antispam:drop_from_spam_filter_cache(Host, jid:to_string(SpamFrom)),
is_not_spam(message_hello(<<"spammer">>, Host, Config)),
disconnect(Config).
%%%===================================================================
rtbl_domains(Config) ->
Host = ?config(server, Config),
RTBLHost =
jid:to_string(
suite:pubsub_jid(Config)),
RTBLDomainsNode = <<"spam_source_domains">>,
OldOpts = gen_mod:get_module_opts(Host, mod_antispam),
NewOpts =
maps:merge(OldOpts,
#{rtbl_services => [#rtbl_service{host = RTBLHost, node = RTBLDomainsNode}]}),
Owner = jid:make(?config(user, Config), ?config(server, Config), <<>>),
{result, _} =
mod_pubsub:create_node(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"flat">>),
{result, _} =
mod_pubsub:publish_item(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"spam.source.domain">>,
[xmpp:encode(#ps_item{id = <<"spam.source.domain">>,
sub_els = []})]),
mod_antispam:reload(Host, OldOpts, NewOpts),
?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam_domain.org">>)),
?retry(100,
10,
?match([<<"spam.source.domain">>], mod_antispam:get_blocked_domains(Host))),
{result, _} =
mod_pubsub:publish_item(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"spam.source.another">>,
[xmpp:encode(#ps_item{id = <<"spam.source.another">>,
sub_els = []})]),
?retry(100, 10, ?match(true, (has_spam_domain(<<"spam.source.another">>))(Host))),
{result, _} =
mod_pubsub:delete_item(RTBLHost, RTBLDomainsNode, Owner, <<"spam.source.another">>, true),
?retry(100, 10, ?match(false, (has_spam_domain(<<"spam.source.another">>))(Host))),
{result, _} = mod_pubsub:delete_node(RTBLHost, RTBLDomainsNode, Owner),
disconnect(Config).
rtbl_domains_whitelisted(Config) ->
Host = ?config(server, Config),
RTBLHost =
jid:to_string(
suite:pubsub_jid(Config)),
RTBLDomainsNode = <<"spam_source_domains">>,
OldOpts = gen_mod:get_module_opts(Host, mod_antispam),
NewOpts =
maps:merge(OldOpts,
#{rtbl_services => [#rtbl_service{host = RTBLHost, node = RTBLDomainsNode}]}),
Owner = jid:make(?config(user, Config), ?config(server, Config), <<>>),
{result, _} =
mod_pubsub:create_node(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"flat">>),
{result, _} =
mod_pubsub:publish_item(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"whitelisted.domain">>,
[xmpp:encode(#ps_item{id = <<"whitelisted.domain">>,
sub_els = []})]),
mod_antispam:reload(Host, OldOpts, NewOpts),
{result, _} =
mod_pubsub:publish_item(RTBLHost,
?config(server, Config),
RTBLDomainsNode,
Owner,
<<"yetanother.domain">>,
[xmpp:encode(#ps_item{id = <<"yetanother.domain">>,
sub_els = []})]),
?retry(100, 10, ?match(true, (has_spam_domain(<<"yetanother.domain">>))(Host))),
%% we assume that the previous "whitelisted.domain" pubsub item has been consumed by now, so we
%% can check that it doesn't exist
?match(false, (has_spam_domain(<<"whitelisted.domain">>))(Host)),
{result, _} = mod_pubsub:delete_node(RTBLHost, RTBLDomainsNode, Owner),
disconnect(Config).
%%%===================================================================
spam_dump_file(Config) ->
{ok, CWD} = file:get_cwd(),
Filename = filename:join([CWD, "spam.log"]),
?retry(100, 100, ?match(true, size(get_bytes(Filename)) > 0)),
From = jid:make(<<"spammer_jid">>, <<"localhost">>, <<"spam_client">>),
To = my_jid(Config),
is_spam(message(From, To, <<"A very specific spam message">>)),
?retry(100,
100,
?match({match, _}, re:run(get_bytes(Filename), <<"A very specific spam message">>))).
%%%===================================================================
%%% Internal functions
%%%===================================================================
single_test(T) ->
list_to_atom("antispam_" ++ atom_to_list(T)).
has_spam_domain(Domain) ->
fun(Host) -> lists:member(Domain, mod_antispam:get_blocked_domains(Host)) end.
is_not_spam(Msg) ->
?match({Msg, undefined}, mod_antispam_filter:s2s_receive_packet({Msg, undefined})).
is_spam(Spam) ->
?match({stop, {drop, undefined}},
mod_antispam_filter:s2s_receive_packet({Spam, undefined})).
message_hello(Username, Host, Config) ->
SpamFrom = jid:make(Username, Host, <<"spam_client">>),
To = my_jid(Config),
message(SpamFrom, To, <<"hello world">>).
message(From, To, BodyText) ->
#message{from = From,
to = To,
type = chat,
body = [#text{data = BodyText}]}.
get_bytes(Filename) ->
{ok, Bytes} = file:read_file(Filename),
Bytes.

View file

@ -10,7 +10,7 @@ attached to it.
``` ```
mkdir test/docker/db/mysql/data mkdir test/docker/db/mysql/data
mkdir test/docker/db/postgres/data mkdir test/docker/db/postgres/data
(cd test/docker; docker-compose up) (cd test/docker; docker compose up)
``` ```
You can stop all the databases with CTRL-C. You can stop all the databases with CTRL-C.
@ -20,8 +20,8 @@ You can stop all the databases with CTRL-C.
The following commands will create the necessary login, user and database, will grant rights on the database in MSSQL and create the ejabberd schema: The following commands will create the necessary login, user and database, will grant rights on the database in MSSQL and create the ejabberd schema:
``` ```
docker exec ejabberd-mssql /opt/mssql-tools/bin/sqlcmd -U SA -P ejabberd_Test1 -S localhost -i /initdb_mssql.sql docker exec ejabberd-mssql /opt/mssql-tools18/bin/sqlcmd -U SA -P ejabberd_Test1 -S localhost -i /initdb_mssql.sql -C
docker exec ejabberd-mssql /opt/mssql-tools/bin/sqlcmd -U SA -P ejabberd_Test1 -S localhost -d ejabberd_test -i /mssql.sql docker exec ejabberd-mssql /opt/mssql-tools18/bin/sqlcmd -U SA -P ejabberd_Test1 -S localhost -d ejabberd_test -i /mssql.sql -C
``` ```
## Running tests ## Running tests
@ -44,7 +44,7 @@ make test
You can fully clean up the environment with: You can fully clean up the environment with:
``` ```
(cd test/docker; docker-compose down) (cd test/docker; docker compose down)
``` ```
If you want to clean the data, you can remove the data volumes after the `docker-compose down` command: If you want to clean the data, you can remove the data volumes after the `docker-compose down` command:

View file

@ -7,7 +7,6 @@ services:
volumes: volumes:
- mysqldata:/var/lib/mysql - mysqldata:/var/lib/mysql
- ../../sql/mysql.sql:/docker-entrypoint-initdb.d/mysql.sql:ro - ../../sql/mysql.sql:/docker-entrypoint-initdb.d/mysql.sql:ro
command: --default-authentication-plugin=mysql_native_password
restart: always restart: always
ports: ports:
- 3306:3306 - 3306:3306

View file

@ -339,6 +339,10 @@ init_per_testcase(TestCase, OrigConfig) ->
bind(auth(connect(Config))); bind(auth(connect(Config)));
"replaced" ++ _ -> "replaced" ++ _ ->
auth(connect(Config)); auth(connect(Config));
"antispam" ++ _ ->
Password = ?config(password, Config),
ejabberd_auth:try_register(User, Server, Password),
open_session(bind(auth(connect(Config))));
_ when IsMaster or IsSlave -> _ when IsMaster or IsSlave ->
Password = ?config(password, Config), Password = ?config(password, Config),
ejabberd_auth:try_register(User, Server, Password), ejabberd_auth:try_register(User, Server, Password),
@ -425,6 +429,7 @@ db_tests(DB) when DB == mnesia; DB == redis ->
auth_md5, auth_md5,
presence_broadcast, presence_broadcast,
last, last,
antispam_tests:single_cases(),
webadmin_tests:single_cases(), webadmin_tests:single_cases(),
roster_tests:single_cases(), roster_tests:single_cases(),
private_tests:single_cases(), private_tests:single_cases(),

View file

@ -6,6 +6,14 @@ define_macro:
mod_announce: mod_announce:
db_type: internal db_type: internal
access: local access: local
mod_antispam:
rtbl_services:
- "pubsub.mnesia.localhost"
spam_jids_file: spam_jids.txt
spam_domains_file: spam_domains.txt
spam_urls_file: spam_urls.txt
whitelist_domains_file: whitelist_domains.txt
spam_dump_file: spam.log
mod_blocking: [] mod_blocking: []
mod_caps: mod_caps:
db_type: internal db_type: internal

View file

@ -7,6 +7,14 @@ define_macro:
mod_announce: mod_announce:
db_type: internal db_type: internal
access: local access: local
mod_antispam:
rtbl_services:
- "pubsub.redis.localhost"
spam_jids_file: spam_jids.txt
spam_domains_file: spam_domains.txt
spam_urls_file: spam_urls.txt
whitelist_domains_file: whitelist_domains.txt
spam_dump_file: spam.log
mod_blocking: [] mod_blocking: []
mod_caps: mod_caps:
db_type: internal db_type: internal

View file

@ -0,0 +1 @@
spam_domain.org

View file

@ -0,0 +1 @@
spammer_jid@localhost

View file

@ -0,0 +1 @@
https://spam.domain.url

View file

@ -0,0 +1 @@
whitelisted.domain

View file

@ -51,6 +51,11 @@ init_config(Config) ->
{ok, _} = file:copy(SelfSignedCertFile, {ok, _} = file:copy(SelfSignedCertFile,
filename:join([CWD, "self-signed-cert.pem"])), filename:join([CWD, "self-signed-cert.pem"])),
{ok, _} = file:copy(CAFile, filename:join([CWD, "ca.pem"])), {ok, _} = file:copy(CAFile, filename:join([CWD, "ca.pem"])),
copy_file(Config, "spam_jids.txt"),
copy_file(Config, "spam_urls.txt"),
copy_file(Config, "spam_domains.txt"),
copy_file(Config, "whitelist_domains.txt"),
file:write_file(filename:join([CWD, "spam.log"]), []),
{ok, MacrosContentTpl} = file:read_file(MacrosPathTpl), {ok, MacrosContentTpl} = file:read_file(MacrosPathTpl),
Password = <<"password!@#$%^&*()'\"`~<>+-/;:_=[]{}|\\">>, Password = <<"password!@#$%^&*()'\"`~<>+-/;:_=[]{}|\\">>,
Backends = get_config_backends(), Backends = get_config_backends(),
@ -138,6 +143,11 @@ init_config(Config) ->
{backends, Backends} {backends, Backends}
|Config]. |Config].
copy_file(Config, File) ->
{ok, CWD} = file:get_cwd(),
DataDir = proplists:get_value(data_dir, Config),
{ok, _} = file:copy(filename:join([DataDir, File]), filename:join([CWD, File])).
copy_configtest_yml(DataDir, CWD) -> copy_configtest_yml(DataDir, CWD) ->
Files = filelib:wildcard(filename:join([DataDir, "configtest.yml"])), Files = filelib:wildcard(filename:join([DataDir, "configtest.yml"])),
lists:foreach( lists:foreach(
@ -906,6 +916,21 @@ receiver(NS, Owner, Socket, MRef) ->
receiver(NS, Owner, Socket, MRef) receiver(NS, Owner, Socket, MRef)
end. end.
%% @doc Retry an action until success, at max N times with an interval
%% `Interval'
%% Shamlessly stolen (with slight adaptations) from snabbkaffee.
-spec retry(integer(), non_neg_integer(), fun(() -> Ret)) -> Ret.
retry(_, 0, Fun) ->
Fun();
retry(Interval, N, Fun) ->
try Fun()
catch
EC:Err ->
timer:sleep(Interval),
ct:pal("retrying ~p more times, result was ~p:~p", [N, EC, Err]),
retry(Interval, N - 1, Fun)
end.
%%%=================================================================== %%%===================================================================
%%% Clients puts and gets events via this relay. %%% Clients puts and gets events via this relay.
%%%=================================================================== %%%===================================================================

View file

@ -89,6 +89,8 @@
-define(send_recv(Send, Recv), -define(send_recv(Send, Recv),
?match(Recv, suite:send_recv(Config, Send))). ?match(Recv, suite:send_recv(Config, Send))).
-define(retry(TIMEOUT, N, FUN), suite:retry(TIMEOUT, N, fun() -> FUN end)).
-define(COMMON_VHOST, <<"localhost">>). -define(COMMON_VHOST, <<"localhost">>).
-define(MNESIA_VHOST, <<"mnesia.localhost">>). -define(MNESIA_VHOST, <<"mnesia.localhost">>).
-define(REDIS_VHOST, <<"redis.localhost">>). -define(REDIS_VHOST, <<"redis.localhost">>).