1
0
Fork 0
mirror of https://github.com/processone/ejabberd synced 2025-10-03 17:59:31 +02:00

Move filtering implementation to a submodule

This commit is contained in:
Badlop 2025-06-18 10:32:49 +02:00
parent 432810db89
commit d00561b58c
3 changed files with 313 additions and 244 deletions

View file

@ -50,11 +50,6 @@
terminate/2,
code_change/3]).
%% ejabberd_hooks callbacks.
-export([s2s_in_handle_info/2,
s2s_receive_packet/1,
sm_receive_packet/1]).
%% ejabberd_commands callbacks.
-export([add_blocked_domain/2,
add_to_spam_filter_cache/2,
@ -76,7 +71,6 @@
-type filename() :: binary() | none | false.
-type jid_set() :: sets:set(ljid()).
-type url_set() :: sets:set(url()).
-type s2s_in_state() :: ejabberd_s2s_in:state().
-record(state,
{host = <<>> :: binary(),
@ -96,7 +90,6 @@
-type state() :: #state{}.
-define(COMMAND_TIMEOUT, timer:seconds(30)).
-define(HTTPC_TIMEOUT, timer:seconds(3)).
-define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>).
-define(DEFAULT_CACHE_SIZE, 10000).
@ -178,7 +171,15 @@ mod_doc() ->
#{desc => ?T("Reads from text file and RTBL, filters stanzas and writes dump file."),
note => "added in 25.xx",
opts =>
[{spam_dump_file,
[{access_spam,
#{value => ?T("Access"),
desc =>
?T("Access rule that controls what accounts may receive spam messages. "
"If the rule returns `allow` for a given recipient, "
"spam messages aren't rejected for that recipient. "
"The default value is 'none', which means that all recipients "
"are subject to spam filtering verification.")}},
{spam_dump_file,
#{value => ?T("false | true | Path"),
desc =>
?T("Path to the file to store blocked messages. "
@ -207,9 +208,6 @@ init([Host, Opts]) ->
url := URLsSet,
domains := SpamDomainsSet,
whitelist_domains := WhitelistDomains} ->
ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90),
ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50),
ejabberd_hooks:add(local_send_to_resource_hook,
Host,
mod_antispam_rtbl,
@ -217,6 +215,7 @@ init([Host, Opts]) ->
50),
RTBLHost = gen_mod:get_opt(rtbl_host, Opts),
RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, Opts),
mod_antispam_filter:init_filtering(Host),
InitState =
#state{host = Host,
jid_set = JIDsSet,
@ -252,9 +251,6 @@ handle_call({check_body, URLs, JIDs, From},
Result2
end,
{reply, {spam_filter, Result}, State2};
handle_call({resolve_redirects, URLs}, _From, State) ->
ResolvedURLs = do_resolve_redirects(URLs, []),
{reply, {spam_filter, ResolvedURLs}, State};
handle_call({reload_files, Files}, _From, State) ->
{Result, State1} = reload_files(Files, State),
{reply, {spam_filter, Result}, State1};
@ -408,9 +404,7 @@ terminate(Reason,
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
misc:cancel_timer(RTBLRetryTimer),
mod_antispam_dump:terminate_dumping(Host, Fd),
ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50),
ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90),
mod_antispam_filter:terminate_filtering(Host),
ejabberd_hooks:delete(local_send_to_resource_hook,
Host,
mod_antispam_rtbl,
@ -424,209 +418,9 @@ code_change(_OldVsn, #state{host = Host} = State, _Extra) ->
?DEBUG("Updating spam filter process for ~s", [Host]),
{ok, State}.
%%--------------------------------------------------------------------
%%| Hook callbacks
-spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) ->
{stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}.
s2s_receive_packet({A, State}) ->
case sm_receive_packet(A) of
{stop, drop} ->
{stop, {drop, State}};
Result ->
{Result, State}
end.
-spec sm_receive_packet(stanza() | drop) -> stanza() | drop | {stop, drop}.
sm_receive_packet(drop = Acc) ->
Acc;
sm_receive_packet(#message{from = From,
to = #jid{lserver = LServer} = To,
type = Type} =
Msg)
when Type /= groupchat, Type /= error ->
do_check(From, To, LServer, Msg);
sm_receive_packet(#presence{from = From,
to = #jid{lserver = LServer} = To,
type = subscribe} =
Presence) ->
do_check(From, To, LServer, Presence);
sm_receive_packet(Acc) ->
Acc.
do_check(From, To, LServer, Stanza) ->
case needs_checking(From, To) of
true ->
case check_from(LServer, From) of
ham ->
case check_stanza(LServer, From, Stanza) of
ham ->
Stanza;
spam ->
reject(Stanza),
{stop, drop}
end;
spam ->
reject(Stanza),
{stop, drop}
end;
false ->
Stanza
end.
check_stanza(LServer, From, #message{body = Body}) ->
check_body(LServer, From, xmpp:get_text(Body));
check_stanza(_, _, _) ->
ham.
-spec s2s_in_handle_info(s2s_in_state(), any()) ->
s2s_in_state() | {stop, s2s_in_state()}.
s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) ->
?DEBUG("Dropping expired spam filter result", []),
{stop, State};
s2s_in_handle_info(State, _) ->
State.
%%--------------------------------------------------------------------
%%| Internal functions
-spec needs_checking(jid(), jid()) -> boolean().
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
case gen_mod:is_loaded(LServer, ?MODULE) of
true ->
Access = gen_mod:get_module_opt(LServer, ?MODULE, access_spam),
case acl:match_rule(LServer, Access, To) of
allow ->
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
false;
deny ->
?DEBUG("Spam is filtered for ~s", [jid:encode(To)]),
not mod_roster:is_subscribed(From, To)
andalso not
mod_roster:is_subscribed(
jid:make(<<>>, FromHost),
To) % likely a gateway
end;
false ->
?DEBUG("~s not loaded for ~s", [?MODULE, LServer]),
false
end.
-spec check_from(binary(), jid()) -> ham | spam.
check_from(Host, From) ->
Proc = get_proc_name(Host),
LFrom =
{_, FromDomain, _} =
jid:remove_resource(
jid:tolower(From)),
try
case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of
true ->
?DEBUG("Spam JID found in blocked domains: ~p", [From]),
ejabberd_hooks:run(spam_found, Host, [{jid, From}]),
spam;
false ->
case gen_server:call(Proc, {check_jid, LFrom}) of
{spam_filter, Result} ->
Result
end
end
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers",
[jid:encode(From)]),
ham
end.
-spec check_body(binary(), jid(), binary()) -> ham | spam.
check_body(Host, From, Body) ->
case {extract_urls(Host, Body), extract_jids(Body)} of
{none, none} ->
?DEBUG("No JIDs/URLs found in message", []),
ham;
{URLs, JIDs} ->
Proc = get_proc_name(Host),
LFrom =
jid:remove_resource(
jid:tolower(From)),
try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of
{spam_filter, Result} ->
Result
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking body", []),
ham
end
end.
-spec extract_urls(binary(), binary()) -> {urls, [url()]} | none.
extract_urls(Host, Body) ->
RE = <<"https?://\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
Urls = resolve_redirects(Host, lists:flatten(Captured)),
{urls, Urls};
nomatch ->
none
end.
-spec resolve_redirects(binary(), [url()]) -> [url()].
resolve_redirects(Host, URLs) ->
Proc = get_proc_name(Host),
try gen_server:call(Proc, {resolve_redirects, URLs}) of
{spam_filter, ResolvedURLs} ->
ResolvedURLs
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]),
URLs
end.
-spec do_resolve_redirects([url()], [url()]) -> [url()].
do_resolve_redirects([], Result) ->
Result;
do_resolve_redirects([URL | Rest], Acc) ->
case httpc:request(get,
{URL, [{"user-agent", "curl/8.7.1"}]},
[{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}],
[])
of
{ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 ->
Location = proplists:get_value("location", Headers),
case Location == undefined orelse lists:member(Location, Acc) of
true ->
do_resolve_redirects(Rest, [URL | Acc]);
false ->
do_resolve_redirects([Location | Rest], [URL | Acc])
end;
_Res ->
do_resolve_redirects(Rest, [URL | Acc])
end.
-spec extract_jids(binary()) -> {jids, [ljid()]} | none.
extract_jids(Body) ->
RE = <<"\\S+@\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
{jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))};
nomatch ->
none
end.
-spec try_decode_jid(binary()) -> {true, ljid()} | false.
try_decode_jid(S) ->
try jid:decode(S) of
#jid{} = JID ->
{true,
jid:remove_resource(
jid:tolower(JID))}
catch
_:{bad_jid, _} ->
false
end.
-spec filter_jid(ljid(), jid_set(), state()) -> {ham | spam, state()}.
filter_jid(From, Set, #state{host = Host} = State) ->
case sets:is_element(From, Set) of
@ -777,31 +571,6 @@ parse_url(S) ->
trim(S) ->
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
-spec reject(stanza()) -> ok.
reject(#message{from = From,
to = To,
type = Type,
lang = Lang} =
Msg)
when Type /= groupchat, Type /= error ->
?INFO_MSG("Rejecting unsolicited message from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your message is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_hooks:run(spam_stanza_rejected, To#jid.lserver, [Msg]),
ejabberd_router:route_error(Msg, Err);
reject(#presence{from = From,
to = To,
lang = Lang} =
Presence) ->
?INFO_MSG("Rejecting unsolicited presence from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your traffic is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_router:route_error(Presence, Err);
reject(_) ->
ok.
-spec get_proc_name(binary()) -> atom().
get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE).

299
src/mod_antispam_filter.erl Normal file
View file

@ -0,0 +1,299 @@
%%%----------------------------------------------------------------------
%%% File : mod_antispam_filter.erl
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
%%% Author : Stefan Strigler <stefan@strigler.de>
%%% Purpose : Filter C2S and S2S stanzas
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
%%%
%%%
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%----------------------------------------------------------------------
%%| Definitions
%% @format-begin
-module(mod_antispam_filter).
-author('holger@zedat.fu-berlin.de').
-author('stefan@strigler.de').
-export([init_filtering/1, terminate_filtering/1]).
%% ejabberd_hooks callbacks
-export([s2s_in_handle_info/2, s2s_receive_packet/1, sm_receive_packet/1]).
-include("logger.hrl").
-include("translate.hrl").
-include_lib("xmpp/include/xmpp.hrl").
-type url() :: binary().
-type s2s_in_state() :: ejabberd_s2s_in:state().
-define(MODULE_PARENT, mod_antispam).
-define(HTTPC_TIMEOUT, timer:seconds(3)).
%%--------------------------------------------------------------------
%%| Exported
init_filtering(Host) ->
ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90),
ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50).
terminate_filtering(Host) ->
ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50),
ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50),
ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90).
%%--------------------------------------------------------------------
%%| Hook callbacks
-spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) ->
{stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}.
s2s_receive_packet({A, State}) ->
case sm_receive_packet(A) of
{stop, drop} ->
{stop, {drop, State}};
Result ->
{Result, State}
end.
-spec sm_receive_packet(stanza() | drop) -> stanza() | drop | {stop, drop}.
sm_receive_packet(drop = Acc) ->
Acc;
sm_receive_packet(#message{from = From,
to = #jid{lserver = LServer} = To,
type = Type} =
Msg)
when Type /= groupchat, Type /= error ->
do_check(From, To, LServer, Msg);
sm_receive_packet(#presence{from = From,
to = #jid{lserver = LServer} = To,
type = subscribe} =
Presence) ->
do_check(From, To, LServer, Presence);
sm_receive_packet(Acc) ->
Acc.
%%--------------------------------------------------------------------
%%| Filtering deciding
do_check(From, To, LServer, Stanza) ->
case needs_checking(From, To) of
true ->
case check_from(LServer, From) of
ham ->
case check_stanza(LServer, From, Stanza) of
ham ->
Stanza;
spam ->
reject(Stanza),
{stop, drop}
end;
spam ->
reject(Stanza),
{stop, drop}
end;
false ->
Stanza
end.
check_stanza(LServer, From, #message{body = Body}) ->
check_body(LServer, From, xmpp:get_text(Body));
check_stanza(_, _, _) ->
ham.
-spec s2s_in_handle_info(s2s_in_state(), any()) ->
s2s_in_state() | {stop, s2s_in_state()}.
s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) ->
?DEBUG("Dropping expired spam filter result", []),
{stop, State};
s2s_in_handle_info(State, _) ->
State.
-spec needs_checking(jid(), jid()) -> boolean().
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
case gen_mod:is_loaded(LServer, ?MODULE_PARENT) of
true ->
Access = gen_mod:get_module_opt(LServer, ?MODULE_PARENT, access_spam),
case acl:match_rule(LServer, Access, To) of
allow ->
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
false;
deny ->
?DEBUG("Spam is filtered for ~s", [jid:encode(To)]),
not mod_roster:is_subscribed(From, To)
andalso not
mod_roster:is_subscribed(
jid:make(<<>>, FromHost),
To) % likely a gateway
end;
false ->
?DEBUG("~s not loaded for ~s", [?MODULE_PARENT, LServer]),
false
end.
-spec check_from(binary(), jid()) -> ham | spam.
check_from(Host, From) ->
Proc = get_proc_name(Host),
LFrom =
{_, FromDomain, _} =
jid:remove_resource(
jid:tolower(From)),
try
case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of
true ->
?DEBUG("Spam JID found in blocked domains: ~p", [From]),
ejabberd_hooks:run(spam_found, Host, [{jid, From}]),
spam;
false ->
case gen_server:call(Proc, {check_jid, LFrom}) of
{spam_filter, Result} ->
Result
end
end
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers",
[jid:encode(From)]),
ham
end.
-spec check_body(binary(), jid(), binary()) -> ham | spam.
check_body(Host, From, Body) ->
case {extract_urls(Host, Body), extract_jids(Body)} of
{none, none} ->
?DEBUG("No JIDs/URLs found in message", []),
ham;
{URLs, JIDs} ->
Proc = get_proc_name(Host),
LFrom =
jid:remove_resource(
jid:tolower(From)),
try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of
{spam_filter, Result} ->
Result
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while checking body", []),
ham
end
end.
%%--------------------------------------------------------------------
%%| Auxiliary
-spec extract_urls(binary(), binary()) -> {urls, [url()]} | none.
extract_urls(Host, Body) ->
RE = <<"https?://\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
Urls = resolve_redirects(Host, lists:flatten(Captured)),
{urls, Urls};
nomatch ->
none
end.
-spec resolve_redirects(binary(), [url()]) -> [url()].
resolve_redirects(_Host, URLs) ->
try do_resolve_redirects(URLs, []) of
ResolvedURLs ->
ResolvedURLs
catch
exit:{timeout, _} ->
?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]),
URLs
end.
-spec do_resolve_redirects([url()], [url()]) -> [url()].
do_resolve_redirects([], Result) ->
Result;
do_resolve_redirects([URL | Rest], Acc) ->
case httpc:request(get,
{URL, [{"user-agent", "curl/8.7.1"}]},
[{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}],
[])
of
{ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 ->
Location = proplists:get_value("location", Headers),
case Location == undefined orelse lists:member(Location, Acc) of
true ->
do_resolve_redirects(Rest, [URL | Acc]);
false ->
do_resolve_redirects([Location | Rest], [URL | Acc])
end;
_Res ->
do_resolve_redirects(Rest, [URL | Acc])
end.
-spec extract_jids(binary()) -> {jids, [ljid()]} | none.
extract_jids(Body) ->
RE = <<"\\S+@\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
{jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))};
nomatch ->
none
end.
-spec try_decode_jid(binary()) -> {true, ljid()} | false.
try_decode_jid(S) ->
try jid:decode(S) of
#jid{} = JID ->
{true,
jid:remove_resource(
jid:tolower(JID))}
catch
_:{bad_jid, _} ->
false
end.
-spec reject(stanza()) -> ok.
reject(#message{from = From,
to = To,
type = Type,
lang = Lang} =
Msg)
when Type /= groupchat, Type /= error ->
?INFO_MSG("Rejecting unsolicited message from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your message is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_hooks:run(spam_stanza_rejected, To#jid.lserver, [Msg]),
ejabberd_router:route_error(Msg, Err);
reject(#presence{from = From,
to = To,
lang = Lang} =
Presence) ->
?INFO_MSG("Rejecting unsolicited presence from ~s to ~s",
[jid:encode(From), jid:encode(To)]),
Txt = <<"Your traffic is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang),
ejabberd_router:route_error(Presence, Err);
reject(_) ->
ok.
-spec get_proc_name(binary()) -> atom().
get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE_PARENT).
%%--------------------------------------------------------------------
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:

View file

@ -265,10 +265,11 @@ has_spam_domain(Domain) ->
fun(Host) -> lists:member(Domain, mod_antispam:get_blocked_domains(Host)) end.
is_not_spam(Msg) ->
?match({Msg, undefined}, mod_antispam:s2s_receive_packet({Msg, undefined})).
?match({Msg, undefined}, mod_antispam_filter:s2s_receive_packet({Msg, undefined})).
is_spam(Spam) ->
?match({stop, {drop, undefined}}, mod_antispam:s2s_receive_packet({Spam, undefined})).
?match({stop, {drop, undefined}},
mod_antispam_filter:s2s_receive_packet({Spam, undefined})).
message_hello(Username, Host, Config) ->
SpamFrom = jid:make(Username, Host, <<"spam_client">>),