From d00561b58c2d3f1ecf9d1ed1cda455f6ac08691c Mon Sep 17 00:00:00 2001 From: Badlop Date: Wed, 18 Jun 2025 10:32:49 +0200 Subject: [PATCH] Move filtering implementation to a submodule --- src/mod_antispam.erl | 253 ++---------------------------- src/mod_antispam_filter.erl | 299 ++++++++++++++++++++++++++++++++++++ test/antispam_tests.erl | 5 +- 3 files changed, 313 insertions(+), 244 deletions(-) create mode 100644 src/mod_antispam_filter.erl diff --git a/src/mod_antispam.erl b/src/mod_antispam.erl index 8b161b558..ef2106219 100644 --- a/src/mod_antispam.erl +++ b/src/mod_antispam.erl @@ -50,11 +50,6 @@ terminate/2, code_change/3]). -%% ejabberd_hooks callbacks. --export([s2s_in_handle_info/2, - s2s_receive_packet/1, - sm_receive_packet/1]). - %% ejabberd_commands callbacks. -export([add_blocked_domain/2, add_to_spam_filter_cache/2, @@ -76,7 +71,6 @@ -type filename() :: binary() | none | false. -type jid_set() :: sets:set(ljid()). -type url_set() :: sets:set(url()). --type s2s_in_state() :: ejabberd_s2s_in:state(). -record(state, {host = <<>> :: binary(), @@ -96,7 +90,6 @@ -type state() :: #state{}. -define(COMMAND_TIMEOUT, timer:seconds(30)). --define(HTTPC_TIMEOUT, timer:seconds(3)). -define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>). -define(DEFAULT_CACHE_SIZE, 10000). @@ -178,7 +171,15 @@ mod_doc() -> #{desc => ?T("Reads from text file and RTBL, filters stanzas and writes dump file."), note => "added in 25.xx", opts => - [{spam_dump_file, + [{access_spam, + #{value => ?T("Access"), + desc => + ?T("Access rule that controls what accounts may receive spam messages. " + "If the rule returns `allow` for a given recipient, " + "spam messages aren't rejected for that recipient. " + "The default value is 'none', which means that all recipients " + "are subject to spam filtering verification.")}}, + {spam_dump_file, #{value => ?T("false | true | Path"), desc => ?T("Path to the file to store blocked messages. " @@ -207,9 +208,6 @@ init([Host, Opts]) -> url := URLsSet, domains := SpamDomainsSet, whitelist_domains := WhitelistDomains} -> - ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90), - ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), - ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50), ejabberd_hooks:add(local_send_to_resource_hook, Host, mod_antispam_rtbl, @@ -217,6 +215,7 @@ init([Host, Opts]) -> 50), RTBLHost = gen_mod:get_opt(rtbl_host, Opts), RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, Opts), + mod_antispam_filter:init_filtering(Host), InitState = #state{host = Host, jid_set = JIDsSet, @@ -252,9 +251,6 @@ handle_call({check_body, URLs, JIDs, From}, Result2 end, {reply, {spam_filter, Result}, State2}; -handle_call({resolve_redirects, URLs}, _From, State) -> - ResolvedURLs = do_resolve_redirects(URLs, []), - {reply, {spam_filter, ResolvedURLs}, State}; handle_call({reload_files, Files}, _From, State) -> {Result, State1} = reload_files(Files, State), {reply, {spam_filter, Result}, State1}; @@ -408,9 +404,7 @@ terminate(Reason, ?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]), misc:cancel_timer(RTBLRetryTimer), mod_antispam_dump:terminate_dumping(Host, Fd), - ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), - ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50), - ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90), + mod_antispam_filter:terminate_filtering(Host), ejabberd_hooks:delete(local_send_to_resource_hook, Host, mod_antispam_rtbl, @@ -424,209 +418,9 @@ code_change(_OldVsn, #state{host = Host} = State, _Extra) -> ?DEBUG("Updating spam filter process for ~s", [Host]), {ok, State}. -%%-------------------------------------------------------------------- -%%| Hook callbacks - --spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) -> - {stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}. -s2s_receive_packet({A, State}) -> - case sm_receive_packet(A) of - {stop, drop} -> - {stop, {drop, State}}; - Result -> - {Result, State} - end. - --spec sm_receive_packet(stanza() | drop) -> stanza() | drop | {stop, drop}. -sm_receive_packet(drop = Acc) -> - Acc; -sm_receive_packet(#message{from = From, - to = #jid{lserver = LServer} = To, - type = Type} = - Msg) - when Type /= groupchat, Type /= error -> - do_check(From, To, LServer, Msg); -sm_receive_packet(#presence{from = From, - to = #jid{lserver = LServer} = To, - type = subscribe} = - Presence) -> - do_check(From, To, LServer, Presence); -sm_receive_packet(Acc) -> - Acc. - -do_check(From, To, LServer, Stanza) -> - case needs_checking(From, To) of - true -> - case check_from(LServer, From) of - ham -> - case check_stanza(LServer, From, Stanza) of - ham -> - Stanza; - spam -> - reject(Stanza), - {stop, drop} - end; - spam -> - reject(Stanza), - {stop, drop} - end; - false -> - Stanza - end. - -check_stanza(LServer, From, #message{body = Body}) -> - check_body(LServer, From, xmpp:get_text(Body)); -check_stanza(_, _, _) -> - ham. - --spec s2s_in_handle_info(s2s_in_state(), any()) -> - s2s_in_state() | {stop, s2s_in_state()}. -s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) -> - ?DEBUG("Dropping expired spam filter result", []), - {stop, State}; -s2s_in_handle_info(State, _) -> - State. - %%-------------------------------------------------------------------- %%| Internal functions --spec needs_checking(jid(), jid()) -> boolean(). -needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) -> - case gen_mod:is_loaded(LServer, ?MODULE) of - true -> - Access = gen_mod:get_module_opt(LServer, ?MODULE, access_spam), - case acl:match_rule(LServer, Access, To) of - allow -> - ?DEBUG("Spam not filtered for ~s", [jid:encode(To)]), - false; - deny -> - ?DEBUG("Spam is filtered for ~s", [jid:encode(To)]), - not mod_roster:is_subscribed(From, To) - andalso not - mod_roster:is_subscribed( - jid:make(<<>>, FromHost), - To) % likely a gateway - end; - false -> - ?DEBUG("~s not loaded for ~s", [?MODULE, LServer]), - false - end. - --spec check_from(binary(), jid()) -> ham | spam. -check_from(Host, From) -> - Proc = get_proc_name(Host), - LFrom = - {_, FromDomain, _} = - jid:remove_resource( - jid:tolower(From)), - try - case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of - true -> - ?DEBUG("Spam JID found in blocked domains: ~p", [From]), - ejabberd_hooks:run(spam_found, Host, [{jid, From}]), - spam; - false -> - case gen_server:call(Proc, {check_jid, LFrom}) of - {spam_filter, Result} -> - Result - end - end - catch - exit:{timeout, _} -> - ?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers", - [jid:encode(From)]), - ham - end. - --spec check_body(binary(), jid(), binary()) -> ham | spam. -check_body(Host, From, Body) -> - case {extract_urls(Host, Body), extract_jids(Body)} of - {none, none} -> - ?DEBUG("No JIDs/URLs found in message", []), - ham; - {URLs, JIDs} -> - Proc = get_proc_name(Host), - LFrom = - jid:remove_resource( - jid:tolower(From)), - try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of - {spam_filter, Result} -> - Result - catch - exit:{timeout, _} -> - ?WARNING_MSG("Timeout while checking body", []), - ham - end - end. - --spec extract_urls(binary(), binary()) -> {urls, [url()]} | none. -extract_urls(Host, Body) -> - RE = <<"https?://\\S+">>, - Options = [global, {capture, all, binary}], - case re:run(Body, RE, Options) of - {match, Captured} when is_list(Captured) -> - Urls = resolve_redirects(Host, lists:flatten(Captured)), - {urls, Urls}; - nomatch -> - none - end. - --spec resolve_redirects(binary(), [url()]) -> [url()]. -resolve_redirects(Host, URLs) -> - Proc = get_proc_name(Host), - try gen_server:call(Proc, {resolve_redirects, URLs}) of - {spam_filter, ResolvedURLs} -> - ResolvedURLs - catch - exit:{timeout, _} -> - ?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]), - URLs - end. - --spec do_resolve_redirects([url()], [url()]) -> [url()]. -do_resolve_redirects([], Result) -> - Result; -do_resolve_redirects([URL | Rest], Acc) -> - case httpc:request(get, - {URL, [{"user-agent", "curl/8.7.1"}]}, - [{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}], - []) - of - {ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 -> - Location = proplists:get_value("location", Headers), - case Location == undefined orelse lists:member(Location, Acc) of - true -> - do_resolve_redirects(Rest, [URL | Acc]); - false -> - do_resolve_redirects([Location | Rest], [URL | Acc]) - end; - _Res -> - do_resolve_redirects(Rest, [URL | Acc]) - end. - --spec extract_jids(binary()) -> {jids, [ljid()]} | none. -extract_jids(Body) -> - RE = <<"\\S+@\\S+">>, - Options = [global, {capture, all, binary}], - case re:run(Body, RE, Options) of - {match, Captured} when is_list(Captured) -> - {jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))}; - nomatch -> - none - end. - --spec try_decode_jid(binary()) -> {true, ljid()} | false. -try_decode_jid(S) -> - try jid:decode(S) of - #jid{} = JID -> - {true, - jid:remove_resource( - jid:tolower(JID))} - catch - _:{bad_jid, _} -> - false - end. - -spec filter_jid(ljid(), jid_set(), state()) -> {ham | spam, state()}. filter_jid(From, Set, #state{host = Host} = State) -> case sets:is_element(From, Set) of @@ -777,31 +571,6 @@ parse_url(S) -> trim(S) -> re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]). --spec reject(stanza()) -> ok. -reject(#message{from = From, - to = To, - type = Type, - lang = Lang} = - Msg) - when Type /= groupchat, Type /= error -> - ?INFO_MSG("Rejecting unsolicited message from ~s to ~s", - [jid:encode(From), jid:encode(To)]), - Txt = <<"Your message is unsolicited">>, - Err = xmpp:err_policy_violation(Txt, Lang), - ejabberd_hooks:run(spam_stanza_rejected, To#jid.lserver, [Msg]), - ejabberd_router:route_error(Msg, Err); -reject(#presence{from = From, - to = To, - lang = Lang} = - Presence) -> - ?INFO_MSG("Rejecting unsolicited presence from ~s to ~s", - [jid:encode(From), jid:encode(To)]), - Txt = <<"Your traffic is unsolicited">>, - Err = xmpp:err_policy_violation(Txt, Lang), - ejabberd_router:route_error(Presence, Err); -reject(_) -> - ok. - -spec get_proc_name(binary()) -> atom(). get_proc_name(Host) -> gen_mod:get_module_proc(Host, ?MODULE). diff --git a/src/mod_antispam_filter.erl b/src/mod_antispam_filter.erl new file mode 100644 index 000000000..9fd8abf36 --- /dev/null +++ b/src/mod_antispam_filter.erl @@ -0,0 +1,299 @@ +%%%---------------------------------------------------------------------- +%%% File : mod_antispam_filter.erl +%%% Author : Holger Weiss +%%% Author : Stefan Strigler +%%% Purpose : Filter C2S and S2S stanzas +%%% Created : 31 Mar 2019 by Holger Weiss +%%% +%%% +%%% ejabberd, Copyright (C) 2019-2025 ProcessOne +%%% +%%% This program is free software; you can redistribute it and/or +%%% modify it under the terms of the GNU General Public License as +%%% published by the Free Software Foundation; either version 2 of the +%%% License, or (at your option) any later version. +%%% +%%% This program is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%%% General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License along +%%% with this program; if not, write to the Free Software Foundation, Inc., +%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +%%% +%%%---------------------------------------------------------------------- + +%%| Definitions +%% @format-begin + +-module(mod_antispam_filter). + +-author('holger@zedat.fu-berlin.de'). +-author('stefan@strigler.de'). + +-export([init_filtering/1, terminate_filtering/1]). +%% ejabberd_hooks callbacks +-export([s2s_in_handle_info/2, s2s_receive_packet/1, sm_receive_packet/1]). + +-include("logger.hrl"). +-include("translate.hrl"). + +-include_lib("xmpp/include/xmpp.hrl"). + +-type url() :: binary(). +-type s2s_in_state() :: ejabberd_s2s_in:state(). + +-define(MODULE_PARENT, mod_antispam). +-define(HTTPC_TIMEOUT, timer:seconds(3)). + +%%-------------------------------------------------------------------- +%%| Exported + +init_filtering(Host) -> + ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90), + ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), + ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50). + +terminate_filtering(Host) -> + ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), + ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50), + ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90). + +%%-------------------------------------------------------------------- +%%| Hook callbacks + +-spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) -> + {stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}. +s2s_receive_packet({A, State}) -> + case sm_receive_packet(A) of + {stop, drop} -> + {stop, {drop, State}}; + Result -> + {Result, State} + end. + +-spec sm_receive_packet(stanza() | drop) -> stanza() | drop | {stop, drop}. +sm_receive_packet(drop = Acc) -> + Acc; +sm_receive_packet(#message{from = From, + to = #jid{lserver = LServer} = To, + type = Type} = + Msg) + when Type /= groupchat, Type /= error -> + do_check(From, To, LServer, Msg); +sm_receive_packet(#presence{from = From, + to = #jid{lserver = LServer} = To, + type = subscribe} = + Presence) -> + do_check(From, To, LServer, Presence); +sm_receive_packet(Acc) -> + Acc. + +%%-------------------------------------------------------------------- +%%| Filtering deciding + +do_check(From, To, LServer, Stanza) -> + case needs_checking(From, To) of + true -> + case check_from(LServer, From) of + ham -> + case check_stanza(LServer, From, Stanza) of + ham -> + Stanza; + spam -> + reject(Stanza), + {stop, drop} + end; + spam -> + reject(Stanza), + {stop, drop} + end; + false -> + Stanza + end. + +check_stanza(LServer, From, #message{body = Body}) -> + check_body(LServer, From, xmpp:get_text(Body)); +check_stanza(_, _, _) -> + ham. + +-spec s2s_in_handle_info(s2s_in_state(), any()) -> + s2s_in_state() | {stop, s2s_in_state()}. +s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) -> + ?DEBUG("Dropping expired spam filter result", []), + {stop, State}; +s2s_in_handle_info(State, _) -> + State. + +-spec needs_checking(jid(), jid()) -> boolean(). +needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) -> + case gen_mod:is_loaded(LServer, ?MODULE_PARENT) of + true -> + Access = gen_mod:get_module_opt(LServer, ?MODULE_PARENT, access_spam), + case acl:match_rule(LServer, Access, To) of + allow -> + ?DEBUG("Spam not filtered for ~s", [jid:encode(To)]), + false; + deny -> + ?DEBUG("Spam is filtered for ~s", [jid:encode(To)]), + not mod_roster:is_subscribed(From, To) + andalso not + mod_roster:is_subscribed( + jid:make(<<>>, FromHost), + To) % likely a gateway + end; + false -> + ?DEBUG("~s not loaded for ~s", [?MODULE_PARENT, LServer]), + false + end. + +-spec check_from(binary(), jid()) -> ham | spam. +check_from(Host, From) -> + Proc = get_proc_name(Host), + LFrom = + {_, FromDomain, _} = + jid:remove_resource( + jid:tolower(From)), + try + case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of + true -> + ?DEBUG("Spam JID found in blocked domains: ~p", [From]), + ejabberd_hooks:run(spam_found, Host, [{jid, From}]), + spam; + false -> + case gen_server:call(Proc, {check_jid, LFrom}) of + {spam_filter, Result} -> + Result + end + end + catch + exit:{timeout, _} -> + ?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers", + [jid:encode(From)]), + ham + end. + +-spec check_body(binary(), jid(), binary()) -> ham | spam. +check_body(Host, From, Body) -> + case {extract_urls(Host, Body), extract_jids(Body)} of + {none, none} -> + ?DEBUG("No JIDs/URLs found in message", []), + ham; + {URLs, JIDs} -> + Proc = get_proc_name(Host), + LFrom = + jid:remove_resource( + jid:tolower(From)), + try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of + {spam_filter, Result} -> + Result + catch + exit:{timeout, _} -> + ?WARNING_MSG("Timeout while checking body", []), + ham + end + end. + +%%-------------------------------------------------------------------- +%%| Auxiliary + +-spec extract_urls(binary(), binary()) -> {urls, [url()]} | none. +extract_urls(Host, Body) -> + RE = <<"https?://\\S+">>, + Options = [global, {capture, all, binary}], + case re:run(Body, RE, Options) of + {match, Captured} when is_list(Captured) -> + Urls = resolve_redirects(Host, lists:flatten(Captured)), + {urls, Urls}; + nomatch -> + none + end. + +-spec resolve_redirects(binary(), [url()]) -> [url()]. +resolve_redirects(_Host, URLs) -> + try do_resolve_redirects(URLs, []) of + ResolvedURLs -> + ResolvedURLs + catch + exit:{timeout, _} -> + ?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]), + URLs + end. + +-spec do_resolve_redirects([url()], [url()]) -> [url()]. +do_resolve_redirects([], Result) -> + Result; +do_resolve_redirects([URL | Rest], Acc) -> + case httpc:request(get, + {URL, [{"user-agent", "curl/8.7.1"}]}, + [{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}], + []) + of + {ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 -> + Location = proplists:get_value("location", Headers), + case Location == undefined orelse lists:member(Location, Acc) of + true -> + do_resolve_redirects(Rest, [URL | Acc]); + false -> + do_resolve_redirects([Location | Rest], [URL | Acc]) + end; + _Res -> + do_resolve_redirects(Rest, [URL | Acc]) + end. + +-spec extract_jids(binary()) -> {jids, [ljid()]} | none. +extract_jids(Body) -> + RE = <<"\\S+@\\S+">>, + Options = [global, {capture, all, binary}], + case re:run(Body, RE, Options) of + {match, Captured} when is_list(Captured) -> + {jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))}; + nomatch -> + none + end. + +-spec try_decode_jid(binary()) -> {true, ljid()} | false. +try_decode_jid(S) -> + try jid:decode(S) of + #jid{} = JID -> + {true, + jid:remove_resource( + jid:tolower(JID))} + catch + _:{bad_jid, _} -> + false + end. + +-spec reject(stanza()) -> ok. +reject(#message{from = From, + to = To, + type = Type, + lang = Lang} = + Msg) + when Type /= groupchat, Type /= error -> + ?INFO_MSG("Rejecting unsolicited message from ~s to ~s", + [jid:encode(From), jid:encode(To)]), + Txt = <<"Your message is unsolicited">>, + Err = xmpp:err_policy_violation(Txt, Lang), + ejabberd_hooks:run(spam_stanza_rejected, To#jid.lserver, [Msg]), + ejabberd_router:route_error(Msg, Err); +reject(#presence{from = From, + to = To, + lang = Lang} = + Presence) -> + ?INFO_MSG("Rejecting unsolicited presence from ~s to ~s", + [jid:encode(From), jid:encode(To)]), + Txt = <<"Your traffic is unsolicited">>, + Err = xmpp:err_policy_violation(Txt, Lang), + ejabberd_router:route_error(Presence, Err); +reject(_) -> + ok. + +-spec get_proc_name(binary()) -> atom(). +get_proc_name(Host) -> + gen_mod:get_module_proc(Host, ?MODULE_PARENT). + +%%-------------------------------------------------------------------- + +%%| vim: set foldmethod=marker foldmarker=%%|,%%-: diff --git a/test/antispam_tests.erl b/test/antispam_tests.erl index debfe9981..f60872913 100644 --- a/test/antispam_tests.erl +++ b/test/antispam_tests.erl @@ -265,10 +265,11 @@ has_spam_domain(Domain) -> fun(Host) -> lists:member(Domain, mod_antispam:get_blocked_domains(Host)) end. is_not_spam(Msg) -> - ?match({Msg, undefined}, mod_antispam:s2s_receive_packet({Msg, undefined})). + ?match({Msg, undefined}, mod_antispam_filter:s2s_receive_packet({Msg, undefined})). is_spam(Spam) -> - ?match({stop, {drop, undefined}}, mod_antispam:s2s_receive_packet({Spam, undefined})). + ?match({stop, {drop, undefined}}, + mod_antispam_filter:s2s_receive_packet({Spam, undefined})). message_hello(Username, Host, Config) -> SpamFrom = jid:make(Username, Host, <<"spam_client">>),