mirror of
https://github.com/processone/ejabberd
synced 2025-10-03 09:49:18 +02:00
Merge pull request #3 from badlop/antispam_files
Move spam file management to a submodule
This commit is contained in:
commit
4a51bf90ab
6 changed files with 330 additions and 180 deletions
26
include/mod_antispam.hrl
Normal file
26
include/mod_antispam.hrl
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
%%%----------------------------------------------------------------------
|
||||||
|
%%%
|
||||||
|
%%% ejabberd, Copyright (C) 2002-2025 ProcessOne
|
||||||
|
%%%
|
||||||
|
%%% This program is free software; you can redistribute it and/or
|
||||||
|
%%% modify it under the terms of the GNU General Public License as
|
||||||
|
%%% published by the Free Software Foundation; either version 2 of the
|
||||||
|
%%% License, or (at your option) any later version.
|
||||||
|
%%%
|
||||||
|
%%% This program is distributed in the hope that it will be useful,
|
||||||
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
%%% General Public License for more details.
|
||||||
|
%%%
|
||||||
|
%%% You should have received a copy of the GNU General Public License along
|
||||||
|
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
%%%
|
||||||
|
%%%----------------------------------------------------------------------
|
||||||
|
|
||||||
|
-define(MODULE_ANTISPAM, mod_antispam).
|
||||||
|
|
||||||
|
-type url() :: binary().
|
||||||
|
-type filename() :: binary() | none | false.
|
||||||
|
-type jid_set() :: sets:set(ljid()).
|
||||||
|
-type url_set() :: sets:set(url()).
|
|
@ -63,15 +63,11 @@
|
||||||
|
|
||||||
-include("ejabberd_commands.hrl").
|
-include("ejabberd_commands.hrl").
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
|
-include("mod_antispam.hrl").
|
||||||
-include("translate.hrl").
|
-include("translate.hrl").
|
||||||
|
|
||||||
-include_lib("xmpp/include/xmpp.hrl").
|
-include_lib("xmpp/include/xmpp.hrl").
|
||||||
|
|
||||||
-type url() :: binary().
|
|
||||||
-type filename() :: binary() | none | false.
|
|
||||||
-type jid_set() :: sets:set(ljid()).
|
|
||||||
-type url_set() :: sets:set(url()).
|
|
||||||
|
|
||||||
-record(state,
|
-record(state,
|
||||||
{host = <<>> :: binary(),
|
{host = <<>> :: binary(),
|
||||||
dump_fd = undefined :: file:io_device() | undefined,
|
dump_fd = undefined :: file:io_device() | undefined,
|
||||||
|
@ -129,10 +125,17 @@ depends(_Host, _Opts) ->
|
||||||
[{mod_pubsub, soft}].
|
[{mod_pubsub, soft}].
|
||||||
|
|
||||||
-spec mod_opt_type(atom()) -> econf:validator().
|
-spec mod_opt_type(atom()) -> econf:validator().
|
||||||
mod_opt_type(spam_domains_file) ->
|
mod_opt_type(access_spam) ->
|
||||||
|
econf:acl();
|
||||||
|
mod_opt_type(cache_size) ->
|
||||||
|
econf:pos_int(unlimited);
|
||||||
|
mod_opt_type(rtbl_host) ->
|
||||||
econf:either(
|
econf:either(
|
||||||
econf:enum([none]), econf:file());
|
econf:enum([none]), econf:host());
|
||||||
mod_opt_type(whitelist_domains_file) ->
|
mod_opt_type(rtbl_domains_node) ->
|
||||||
|
econf:non_empty(
|
||||||
|
econf:binary());
|
||||||
|
mod_opt_type(spam_domains_file) ->
|
||||||
econf:either(
|
econf:either(
|
||||||
econf:enum([none]), econf:file());
|
econf:enum([none]), econf:file());
|
||||||
mod_opt_type(spam_dump_file) ->
|
mod_opt_type(spam_dump_file) ->
|
||||||
|
@ -144,28 +147,21 @@ mod_opt_type(spam_jids_file) ->
|
||||||
mod_opt_type(spam_urls_file) ->
|
mod_opt_type(spam_urls_file) ->
|
||||||
econf:either(
|
econf:either(
|
||||||
econf:enum([none]), econf:file());
|
econf:enum([none]), econf:file());
|
||||||
mod_opt_type(access_spam) ->
|
mod_opt_type(whitelist_domains_file) ->
|
||||||
econf:acl();
|
|
||||||
mod_opt_type(cache_size) ->
|
|
||||||
econf:pos_int(unlimited);
|
|
||||||
mod_opt_type(rtbl_host) ->
|
|
||||||
econf:either(
|
econf:either(
|
||||||
econf:enum([none]), econf:host());
|
econf:enum([none]), econf:file()).
|
||||||
mod_opt_type(rtbl_domains_node) ->
|
|
||||||
econf:non_empty(
|
|
||||||
econf:binary()).
|
|
||||||
|
|
||||||
-spec mod_options(binary()) -> [{atom(), any()}].
|
-spec mod_options(binary()) -> [{atom(), any()}].
|
||||||
mod_options(_Host) ->
|
mod_options(_Host) ->
|
||||||
[{spam_domains_file, none},
|
[{access_spam, none},
|
||||||
|
{cache_size, ?DEFAULT_CACHE_SIZE},
|
||||||
|
{rtbl_domains_node, ?DEFAULT_RTBL_DOMAINS_NODE},
|
||||||
|
{rtbl_host, none},
|
||||||
|
{spam_domains_file, none},
|
||||||
{spam_dump_file, false},
|
{spam_dump_file, false},
|
||||||
{spam_jids_file, none},
|
{spam_jids_file, none},
|
||||||
{spam_urls_file, none},
|
{spam_urls_file, none},
|
||||||
{whitelist_domains_file, none},
|
{whitelist_domains_file, none}].
|
||||||
{access_spam, none},
|
|
||||||
{cache_size, ?DEFAULT_CACHE_SIZE},
|
|
||||||
{rtbl_host, none},
|
|
||||||
{rtbl_domains_node, ?DEFAULT_RTBL_DOMAINS_NODE}].
|
|
||||||
|
|
||||||
mod_doc() ->
|
mod_doc() ->
|
||||||
#{desc => ?T("Reads from text file and RTBL, filters stanzas and writes dump file."),
|
#{desc => ?T("Reads from text file and RTBL, filters stanzas and writes dump file."),
|
||||||
|
@ -175,10 +171,20 @@ mod_doc() ->
|
||||||
#{value => ?T("Access"),
|
#{value => ?T("Access"),
|
||||||
desc =>
|
desc =>
|
||||||
?T("Access rule that controls what accounts may receive spam messages. "
|
?T("Access rule that controls what accounts may receive spam messages. "
|
||||||
"If the rule returns `allow` for a given recipient, "
|
"If the rule returns 'allow' for a given recipient, "
|
||||||
"spam messages aren't rejected for that recipient. "
|
"spam messages aren't rejected for that recipient. "
|
||||||
"The default value is 'none', which means that all recipients "
|
"The default value is 'none', which means that all recipients "
|
||||||
"are subject to spam filtering verification.")}},
|
"are subject to spam filtering verification.")}},
|
||||||
|
{spam_domains_file,
|
||||||
|
#{value => ?T("none | Path"),
|
||||||
|
desc =>
|
||||||
|
?T("Path to a plain text file containing a list of "
|
||||||
|
"known spam domains, one domain per line. "
|
||||||
|
"Messages and subscription requests sent from one of the listed domains "
|
||||||
|
"are classified as spam if sender is not in recipient's roster. "
|
||||||
|
"This list of domains gets merged with the one retrieved "
|
||||||
|
"by an RTBL host if any given. "
|
||||||
|
"The default value is 'none'.")}},
|
||||||
{spam_dump_file,
|
{spam_dump_file,
|
||||||
#{value => ?T("false | true | Path"),
|
#{value => ?T("false | true | Path"),
|
||||||
desc =>
|
desc =>
|
||||||
|
@ -186,7 +192,37 @@ mod_doc() ->
|
||||||
"Use an absolute path, or the '@LOG_PATH@' macro to store logs "
|
"Use an absolute path, or the '@LOG_PATH@' macro to store logs "
|
||||||
"in the same place that the other ejabberd log files. "
|
"in the same place that the other ejabberd log files. "
|
||||||
"If set to 'false', does not dump stanzas, this is the default. "
|
"If set to 'false', does not dump stanzas, this is the default. "
|
||||||
"If set to 'true', it stores in '\"@LOG_PATH@/spam_dump_@HOST@.log\"'.")}}],
|
"If set to 'true', it stores in '\"@LOG_PATH@/spam_dump_@HOST@.log\"'.")}},
|
||||||
|
{spam_jids_file,
|
||||||
|
#{value => ?T("none | Path"),
|
||||||
|
desc =>
|
||||||
|
?T("Path to a plain text file containing a list of "
|
||||||
|
"known spammer JIDs, one JID per line. "
|
||||||
|
"Messages and subscription requests sent from one of "
|
||||||
|
"the listed JIDs are classified as spam. "
|
||||||
|
"Messages containing at least one of the listed JIDs"
|
||||||
|
"are classified as spam as well. "
|
||||||
|
"Furthermore, the sender's JID will be cached, "
|
||||||
|
"so that future traffic originating from that JID will also be classified as spam. "
|
||||||
|
"The default value is 'none'.")}},
|
||||||
|
{spam_urls_file,
|
||||||
|
#{value => ?T("none | Path"),
|
||||||
|
desc =>
|
||||||
|
?T("Path to a plain text file containing a list of "
|
||||||
|
"URLs known to be mentioned in spam message bodies. "
|
||||||
|
"Messages containing at least one of the listed URLs are classified as spam. "
|
||||||
|
"Furthermore, the sender's JID will be cached, "
|
||||||
|
"so that future traffic originating from that JID will be classified as spam as well. "
|
||||||
|
"The default value is 'none'.")}},
|
||||||
|
{whitelist_domains_file,
|
||||||
|
#{value => ?T("none | Path"),
|
||||||
|
desc =>
|
||||||
|
?T("Path to a file containing a list of "
|
||||||
|
"domains to whitelist from being blocked, one per line. "
|
||||||
|
"If either it is in 'spam_domains_file' or more realistically "
|
||||||
|
"in a domain sent by a RTBL host (see option 'rtbl_host') "
|
||||||
|
"then this domain will be ignored and stanzas from there won't be blocked. "
|
||||||
|
"The default value is 'none'.")}}],
|
||||||
example =>
|
example =>
|
||||||
["modules:",
|
["modules:",
|
||||||
" mod_antispam:",
|
" mod_antispam:",
|
||||||
|
@ -198,16 +234,13 @@ mod_doc() ->
|
||||||
-spec init(list()) -> {ok, state()} | {stop, term()}.
|
-spec init(list()) -> {ok, state()} | {stop, term()}.
|
||||||
init([Host, Opts]) ->
|
init([Host, Opts]) ->
|
||||||
process_flag(trap_exit, true),
|
process_flag(trap_exit, true),
|
||||||
Files =
|
mod_antispam_files:init_files(Host),
|
||||||
#{domains => gen_mod:get_opt(spam_domains_file, Opts),
|
FilesResults = read_files(Host),
|
||||||
jid => gen_mod:get_opt(spam_jids_file, Opts),
|
|
||||||
url => gen_mod:get_opt(spam_urls_file, Opts),
|
|
||||||
whitelist_domains => gen_mod:get_opt(whitelist_domains_file, Opts)},
|
|
||||||
try read_files(Files) of
|
|
||||||
#{jid := JIDsSet,
|
#{jid := JIDsSet,
|
||||||
url := URLsSet,
|
url := URLsSet,
|
||||||
domains := SpamDomainsSet,
|
domains := SpamDomainsSet,
|
||||||
whitelist_domains := WhitelistDomains} ->
|
whitelist_domains := WhitelistDomains} =
|
||||||
|
FilesResults,
|
||||||
ejabberd_hooks:add(local_send_to_resource_hook,
|
ejabberd_hooks:add(local_send_to_resource_hook,
|
||||||
Host,
|
Host,
|
||||||
mod_antispam_rtbl,
|
mod_antispam_rtbl,
|
||||||
|
@ -227,12 +260,7 @@ init([Host, Opts]) ->
|
||||||
rtbl_host = RTBLHost,
|
rtbl_host = RTBLHost,
|
||||||
rtbl_domains_node = RTBLDomainsNode},
|
rtbl_domains_node = RTBLDomainsNode},
|
||||||
mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
||||||
{ok, InitState}
|
{ok, InitState}.
|
||||||
catch
|
|
||||||
{Op, File, Reason} when Op == open; Op == read ->
|
|
||||||
?CRITICAL_MSG("Cannot ~s ~s: ~s", [Op, File, format_error(Reason)]),
|
|
||||||
{stop, config_error}
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec handle_call(term(), {pid(), term()}, state()) ->
|
-spec handle_call(term(), {pid(), term()}, state()) ->
|
||||||
{reply, {spam_filter, term()}, state()} | {noreply, state()}.
|
{reply, {spam_filter, term()}, state()} | {noreply, state()}.
|
||||||
|
@ -251,8 +279,8 @@ handle_call({check_body, URLs, JIDs, From},
|
||||||
Result2
|
Result2
|
||||||
end,
|
end,
|
||||||
{reply, {spam_filter, Result}, State2};
|
{reply, {spam_filter, Result}, State2};
|
||||||
handle_call({reload_files, Files}, _From, State) ->
|
handle_call(reload_spam_files, _From, State) ->
|
||||||
{Result, State1} = reload_files(Files, State),
|
{Result, State1} = reload_files(State),
|
||||||
{reply, {spam_filter, Result}, State1};
|
{reply, {spam_filter, Result}, State1};
|
||||||
handle_call({expire_cache, Age}, _From, State) ->
|
handle_call({expire_cache, Age}, _From, State) ->
|
||||||
{Result, State1} = expire_cache(Age, State),
|
{Result, State1} = expire_cache(Age, State),
|
||||||
|
@ -319,12 +347,7 @@ handle_cast({reload, NewOpts, OldOpts},
|
||||||
State1
|
State1
|
||||||
end,
|
end,
|
||||||
ok = mod_antispam_rtbl:unsubscribe(OldRTBLHost, OldRTBLDomainsNode, Host),
|
ok = mod_antispam_rtbl:unsubscribe(OldRTBLHost, OldRTBLDomainsNode, Host),
|
||||||
Files =
|
{_Result, State3} = reload_files(State2#state{blocked_domains = #{}}),
|
||||||
#{domains => gen_mod:get_opt(spam_domains_file, NewOpts),
|
|
||||||
jid => gen_mod:get_opt(spam_jids_file, NewOpts),
|
|
||||||
url => gen_mod:get_opt(spam_urls_file, NewOpts),
|
|
||||||
whitelist_domains => gen_mod:get_opt(whitelist_domains_file, NewOpts)},
|
|
||||||
{_Result, State3} = reload_files(Files, State2#state{blocked_domains = #{}}),
|
|
||||||
RTBLHost = gen_mod:get_opt(rtbl_host, NewOpts),
|
RTBLHost = gen_mod:get_opt(rtbl_host, NewOpts),
|
||||||
RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, NewOpts),
|
RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, NewOpts),
|
||||||
ok = mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
ok = mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
||||||
|
@ -404,6 +427,7 @@ terminate(Reason,
|
||||||
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
|
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
|
||||||
misc:cancel_timer(RTBLRetryTimer),
|
misc:cancel_timer(RTBLRetryTimer),
|
||||||
mod_antispam_dump:terminate_dumping(Host, Fd),
|
mod_antispam_dump:terminate_dumping(Host, Fd),
|
||||||
|
mod_antispam_files:terminate_files(Host),
|
||||||
mod_antispam_filter:terminate_filtering(Host),
|
mod_antispam_filter:terminate_filtering(Host),
|
||||||
ejabberd_hooks:delete(local_send_to_resource_hook,
|
ejabberd_hooks:delete(local_send_to_resource_hook,
|
||||||
Host,
|
Host,
|
||||||
|
@ -458,10 +482,9 @@ filter_body({_, Addrs}, Set, From, #state{host = Host} = State) ->
|
||||||
filter_body(none, _Set, _From, State) ->
|
filter_body(none, _Set, _From, State) ->
|
||||||
{ham, State}.
|
{ham, State}.
|
||||||
|
|
||||||
-spec reload_files(#{Type :: atom() => filename()}, state()) ->
|
-spec reload_files(state()) -> {ok | {error, binary()}, state()}.
|
||||||
{ok | {error, binary()}, state()}.
|
reload_files(#state{host = Host, blocked_domains = BlockedDomains} = State) ->
|
||||||
reload_files(Files, #state{host = Host, blocked_domains = BlockedDomains} = State) ->
|
case read_files(Host) of
|
||||||
try read_files(Files) of
|
|
||||||
#{jid := JIDsSet,
|
#{jid := JIDsSet,
|
||||||
url := URLsSet,
|
url := URLsSet,
|
||||||
domains := SpamDomainsSet,
|
domains := SpamDomainsSet,
|
||||||
|
@ -482,12 +505,9 @@ reload_files(Files, #state{host = Host, blocked_domains = BlockedDomains} = Stat
|
||||||
State#state{jid_set = JIDsSet,
|
State#state{jid_set = JIDsSet,
|
||||||
url_set = URLsSet,
|
url_set = URLsSet,
|
||||||
blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)),
|
blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)),
|
||||||
whitelist_domains = set_to_map(WhitelistDomains, false)}}
|
whitelist_domains = set_to_map(WhitelistDomains, false)}};
|
||||||
catch
|
{config_error, ErrorText} ->
|
||||||
{Op, File, Reason} when Op == open; Op == read ->
|
{{error, ErrorText}, State}
|
||||||
Txt = format("Cannot ~s ~s for ~s: ~s", [Op, File, Host, format_error(Reason)]),
|
|
||||||
?ERROR_MSG("~s", [Txt]),
|
|
||||||
{{error, Txt}, State}
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
set_to_map(Set) ->
|
set_to_map(Set) ->
|
||||||
|
@ -496,80 +516,18 @@ set_to_map(Set) ->
|
||||||
set_to_map(Set, V) ->
|
set_to_map(Set, V) ->
|
||||||
sets:fold(fun(K, M) -> M#{K => V} end, #{}, Set).
|
sets:fold(fun(K, M) -> M#{K => V} end, #{}, Set).
|
||||||
|
|
||||||
-spec read_files(#{Type => filename()}) ->
|
read_files(Host) ->
|
||||||
#{jid => jid_set(),
|
AccInitial =
|
||||||
url => url_set(),
|
#{jid => sets:new(),
|
||||||
Type => sets:set(binary())}
|
url => sets:new(),
|
||||||
when Type :: atom().
|
domains => sets:new(),
|
||||||
read_files(Files) ->
|
whitelist_domains => sets:new()},
|
||||||
maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files).
|
Files =
|
||||||
|
#{jid => gen_mod:get_module_opt(Host, ?MODULE, spam_jids_file),
|
||||||
-spec line_parser(Type :: atom()) -> fun((binary()) -> binary()).
|
url => gen_mod:get_module_opt(Host, ?MODULE, spam_urls_file),
|
||||||
line_parser(jid) ->
|
domains => gen_mod:get_module_opt(Host, ?MODULE, spam_domains_file),
|
||||||
fun parse_jid/1;
|
whitelist_domains => gen_mod:get_module_opt(Host, ?MODULE, whitelist_domains_file)},
|
||||||
line_parser(url) ->
|
ejabberd_hooks:run_fold(antispam_get_lists, Host, AccInitial, [Files]).
|
||||||
fun parse_url/1;
|
|
||||||
line_parser(_) ->
|
|
||||||
fun trim/1.
|
|
||||||
|
|
||||||
-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set().
|
|
||||||
read_file(none, _ParseLine) ->
|
|
||||||
sets:new();
|
|
||||||
read_file(File, ParseLine) ->
|
|
||||||
case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of
|
|
||||||
{ok, Fd} ->
|
|
||||||
try
|
|
||||||
read_line(Fd, ParseLine, sets:new())
|
|
||||||
catch
|
|
||||||
E ->
|
|
||||||
throw({read, File, E})
|
|
||||||
after
|
|
||||||
ok = file:close(Fd)
|
|
||||||
end;
|
|
||||||
{error, Reason} ->
|
|
||||||
throw({open, File, Reason})
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec read_line(file:io_device(),
|
|
||||||
fun((binary()) -> ljid() | url()),
|
|
||||||
jid_set() | url_set()) ->
|
|
||||||
jid_set() | url_set().
|
|
||||||
read_line(Fd, ParseLine, Set) ->
|
|
||||||
case file:read_line(Fd) of
|
|
||||||
{ok, Line} ->
|
|
||||||
read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set));
|
|
||||||
{error, Reason} ->
|
|
||||||
throw(Reason);
|
|
||||||
eof ->
|
|
||||||
Set
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec parse_jid(binary()) -> ljid().
|
|
||||||
parse_jid(S) ->
|
|
||||||
try jid:decode(trim(S)) of
|
|
||||||
#jid{} = JID ->
|
|
||||||
jid:remove_resource(
|
|
||||||
jid:tolower(JID))
|
|
||||||
catch
|
|
||||||
_:{bad_jid, _} ->
|
|
||||||
throw({bad_jid, S})
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec parse_url(binary()) -> url().
|
|
||||||
parse_url(S) ->
|
|
||||||
URL = trim(S),
|
|
||||||
RE = <<"https?://\\S+$">>,
|
|
||||||
Options = [anchored, caseless, {capture, none}],
|
|
||||||
case re:run(URL, RE, Options) of
|
|
||||||
match ->
|
|
||||||
URL;
|
|
||||||
nomatch ->
|
|
||||||
throw({bad_url, S})
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec trim(binary()) -> binary().
|
|
||||||
trim(S) ->
|
|
||||||
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
|
|
||||||
|
|
||||||
-spec get_proc_name(binary()) -> atom().
|
-spec get_proc_name(binary()) -> atom().
|
||||||
get_proc_name(Host) ->
|
get_proc_name(Host) ->
|
||||||
|
@ -587,14 +545,6 @@ sets_equal(A, B) ->
|
||||||
format(Format, Data) ->
|
format(Format, Data) ->
|
||||||
iolist_to_binary(io_lib:format(Format, Data)).
|
iolist_to_binary(io_lib:format(Format, Data)).
|
||||||
|
|
||||||
-spec format_error(atom() | tuple()) -> binary().
|
|
||||||
format_error({bad_jid, JID}) ->
|
|
||||||
<<"Not a valid JID: ", JID/binary>>;
|
|
||||||
format_error({bad_url, URL}) ->
|
|
||||||
<<"Not an HTTP(S) URL: ", URL/binary>>;
|
|
||||||
format_error(Reason) ->
|
|
||||||
list_to_binary(file:format_error(Reason)).
|
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
%%| Caching
|
%%| Caching
|
||||||
|
|
||||||
|
@ -756,16 +706,11 @@ try_call_by_host(Host, Call) ->
|
||||||
reload_spam_filter_files(<<"global">>) ->
|
reload_spam_filter_files(<<"global">>) ->
|
||||||
for_all_hosts(fun reload_spam_filter_files/1, []);
|
for_all_hosts(fun reload_spam_filter_files/1, []);
|
||||||
reload_spam_filter_files(Host) ->
|
reload_spam_filter_files(Host) ->
|
||||||
LServer = jid:nameprep(Host),
|
case try_call_by_host(Host, reload_spam_files) of
|
||||||
Files =
|
|
||||||
#{domains => gen_mod:get_module_opt(LServer, ?MODULE, spam_domains_file),
|
|
||||||
jid => gen_mod:get_module_opt(LServer, ?MODULE, spam_jids_file),
|
|
||||||
url => gen_mod:get_module_opt(LServer, ?MODULE, spam_urls_file)},
|
|
||||||
case try_call_by_host(Host, {reload_files, Files}) of
|
|
||||||
{spam_filter, ok} ->
|
{spam_filter, ok} ->
|
||||||
ok;
|
ok;
|
||||||
{spam_filter, {error, Txt}} ->
|
{spam_filter, {error, Txt}} ->
|
||||||
{error, binary_to_list(Txt)};
|
{error, Txt};
|
||||||
{error, _R} = Error ->
|
{error, _R} = Error ->
|
||||||
Error
|
Error
|
||||||
end.
|
end.
|
||||||
|
|
|
@ -38,14 +38,11 @@
|
||||||
-export([dump_spam_stanza/1, reopen_log/0]).
|
-export([dump_spam_stanza/1, reopen_log/0]).
|
||||||
|
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
|
-include("mod_antispam.hrl").
|
||||||
-include("translate.hrl").
|
-include("translate.hrl").
|
||||||
|
|
||||||
-include_lib("xmpp/include/xmpp.hrl").
|
-include_lib("xmpp/include/xmpp.hrl").
|
||||||
|
|
||||||
-type filename() :: binary() | none | false.
|
|
||||||
|
|
||||||
-define(MODULE_PARENT, mod_antispam).
|
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
%%| Exported
|
%%| Exported
|
||||||
|
|
||||||
|
@ -157,7 +154,7 @@ write_stanza_dump(Fd, XML) ->
|
||||||
%%| Auxiliary
|
%%| Auxiliary
|
||||||
|
|
||||||
get_path_option(Host) ->
|
get_path_option(Host) ->
|
||||||
Opts = gen_mod:get_module_opts(Host, ?MODULE_PARENT),
|
Opts = gen_mod:get_module_opts(Host, ?MODULE_ANTISPAM),
|
||||||
get_path_option(Host, Opts).
|
get_path_option(Host, Opts).
|
||||||
|
|
||||||
get_path_option(Host, Opts) ->
|
get_path_option(Host, Opts) ->
|
||||||
|
@ -178,11 +175,11 @@ get_path_option(Host, Opts) ->
|
||||||
|
|
||||||
-spec get_proc_name(binary()) -> atom().
|
-spec get_proc_name(binary()) -> atom().
|
||||||
get_proc_name(Host) ->
|
get_proc_name(Host) ->
|
||||||
gen_mod:get_module_proc(Host, ?MODULE_PARENT).
|
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
|
||||||
|
|
||||||
-spec get_spam_filter_hosts() -> [binary()].
|
-spec get_spam_filter_hosts() -> [binary()].
|
||||||
get_spam_filter_hosts() ->
|
get_spam_filter_hosts() ->
|
||||||
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE_PARENT)].
|
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE_ANTISPAM)].
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
182
src/mod_antispam_files.erl
Normal file
182
src/mod_antispam_files.erl
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
%%%----------------------------------------------------------------------
|
||||||
|
%%% File : mod_antispam_files.erl
|
||||||
|
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
|
||||||
|
%%% Author : Stefan Strigler <stefan@strigler.de>
|
||||||
|
%%% Purpose : Filter spam messages based on sender JID and content
|
||||||
|
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
|
||||||
|
%%%
|
||||||
|
%%%
|
||||||
|
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
|
||||||
|
%%%
|
||||||
|
%%% This program is free software; you can redistribute it and/or
|
||||||
|
%%% modify it under the terms of the GNU General Public License as
|
||||||
|
%%% published by the Free Software Foundation; either version 2 of the
|
||||||
|
%%% License, or (at your option) any later version.
|
||||||
|
%%%
|
||||||
|
%%% This program is distributed in the hope that it will be useful,
|
||||||
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
%%% General Public License for more details.
|
||||||
|
%%%
|
||||||
|
%%% You should have received a copy of the GNU General Public License along
|
||||||
|
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
%%%
|
||||||
|
%%%----------------------------------------------------------------------
|
||||||
|
|
||||||
|
%%| definitions
|
||||||
|
%% @format-begin
|
||||||
|
|
||||||
|
-module(mod_antispam_files).
|
||||||
|
|
||||||
|
-author('holger@zedat.fu-berlin.de').
|
||||||
|
-author('stefan@strigler.de').
|
||||||
|
|
||||||
|
%% Exported
|
||||||
|
-export([init_files/1, terminate_files/1]).
|
||||||
|
% Hooks
|
||||||
|
-export([get_files_lists/2]).
|
||||||
|
|
||||||
|
-include("ejabberd_commands.hrl").
|
||||||
|
-include("logger.hrl").
|
||||||
|
-include("mod_antispam.hrl").
|
||||||
|
-include("translate.hrl").
|
||||||
|
|
||||||
|
-include_lib("xmpp/include/xmpp.hrl").
|
||||||
|
|
||||||
|
-type files_map() :: #{atom() => filename()}.
|
||||||
|
-type lists_map() ::
|
||||||
|
#{jid => jid_set(),
|
||||||
|
url => url_set(),
|
||||||
|
atom() => sets:set(binary())}.
|
||||||
|
|
||||||
|
-define(COMMAND_TIMEOUT, timer:seconds(30)).
|
||||||
|
-define(DEFAULT_CACHE_SIZE, 10000).
|
||||||
|
-define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>).
|
||||||
|
-define(HTTPC_TIMEOUT, timer:seconds(3)).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%%| Exported
|
||||||
|
|
||||||
|
init_files(Host) ->
|
||||||
|
ejabberd_hooks:add(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
|
||||||
|
|
||||||
|
terminate_files(Host) ->
|
||||||
|
ejabberd_hooks:delete(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%%| Hooks
|
||||||
|
|
||||||
|
-spec get_files_lists(lists_map(), files_map()) -> lists_map().
|
||||||
|
get_files_lists(#{jid := AccJids,
|
||||||
|
url := AccUrls,
|
||||||
|
domains := AccDomains,
|
||||||
|
whitelist_domains := AccWhitelist} =
|
||||||
|
Acc,
|
||||||
|
Files) ->
|
||||||
|
try read_files(Files) of
|
||||||
|
#{jid := JIDsSet,
|
||||||
|
url := URLsSet,
|
||||||
|
domains := SpamDomainsSet,
|
||||||
|
whitelist_domains := WhitelistDomains} ->
|
||||||
|
Acc#{jid => sets:union(AccJids, JIDsSet),
|
||||||
|
url => sets:union(AccUrls, URLsSet),
|
||||||
|
domains => sets:union(AccDomains, SpamDomainsSet),
|
||||||
|
whitelist_domains => sets:union(AccWhitelist, WhitelistDomains)}
|
||||||
|
catch
|
||||||
|
{Op, File, Reason} when Op == open; Op == read ->
|
||||||
|
ErrorText = format("Error trying to ~s file ~s: ~s", [Op, File, format_error(Reason)]),
|
||||||
|
?CRITICAL_MSG(ErrorText, []),
|
||||||
|
{stop, {config_error, ErrorText}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%%| read_files
|
||||||
|
|
||||||
|
-spec read_files(files_map()) -> lists_map().
|
||||||
|
read_files(Files) ->
|
||||||
|
maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files).
|
||||||
|
|
||||||
|
-spec line_parser(Type :: atom()) -> fun((binary()) -> binary()).
|
||||||
|
line_parser(jid) ->
|
||||||
|
fun parse_jid/1;
|
||||||
|
line_parser(url) ->
|
||||||
|
fun parse_url/1;
|
||||||
|
line_parser(_) ->
|
||||||
|
fun trim/1.
|
||||||
|
|
||||||
|
-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set().
|
||||||
|
read_file(none, _ParseLine) ->
|
||||||
|
sets:new();
|
||||||
|
read_file(File, ParseLine) ->
|
||||||
|
case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of
|
||||||
|
{ok, Fd} ->
|
||||||
|
try
|
||||||
|
read_line(Fd, ParseLine, sets:new())
|
||||||
|
catch
|
||||||
|
E ->
|
||||||
|
throw({read, File, E})
|
||||||
|
after
|
||||||
|
ok = file:close(Fd)
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
throw({open, File, Reason})
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec read_line(file:io_device(),
|
||||||
|
fun((binary()) -> ljid() | url()),
|
||||||
|
jid_set() | url_set()) ->
|
||||||
|
jid_set() | url_set().
|
||||||
|
read_line(Fd, ParseLine, Set) ->
|
||||||
|
case file:read_line(Fd) of
|
||||||
|
{ok, Line} ->
|
||||||
|
read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set));
|
||||||
|
{error, Reason} ->
|
||||||
|
throw(Reason);
|
||||||
|
eof ->
|
||||||
|
Set
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec parse_jid(binary()) -> ljid().
|
||||||
|
parse_jid(S) ->
|
||||||
|
try jid:decode(trim(S)) of
|
||||||
|
#jid{} = JID ->
|
||||||
|
jid:remove_resource(
|
||||||
|
jid:tolower(JID))
|
||||||
|
catch
|
||||||
|
_:{bad_jid, _} ->
|
||||||
|
throw({bad_jid, S})
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec parse_url(binary()) -> url().
|
||||||
|
parse_url(S) ->
|
||||||
|
URL = trim(S),
|
||||||
|
RE = <<"https?://\\S+$">>,
|
||||||
|
Options = [anchored, caseless, {capture, none}],
|
||||||
|
case re:run(URL, RE, Options) of
|
||||||
|
match ->
|
||||||
|
URL;
|
||||||
|
nomatch ->
|
||||||
|
throw({bad_url, S})
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec trim(binary()) -> binary().
|
||||||
|
trim(S) ->
|
||||||
|
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
|
||||||
|
|
||||||
|
%% Function copied from mod_antispam.erl
|
||||||
|
-spec format(io:format(), [term()]) -> binary().
|
||||||
|
format(Format, Data) ->
|
||||||
|
iolist_to_binary(io_lib:format(Format, Data)).
|
||||||
|
|
||||||
|
-spec format_error(atom() | tuple()) -> binary().
|
||||||
|
format_error({bad_jid, JID}) ->
|
||||||
|
<<"Not a valid JID: ", JID/binary>>;
|
||||||
|
format_error({bad_url, URL}) ->
|
||||||
|
<<"Not an HTTP(S) URL: ", URL/binary>>;
|
||||||
|
format_error(Reason) ->
|
||||||
|
list_to_binary(file:format_error(Reason)).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:
|
|
@ -38,13 +38,12 @@
|
||||||
|
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
-include("translate.hrl").
|
-include("translate.hrl").
|
||||||
|
-include("mod_antispam.hrl").
|
||||||
|
|
||||||
-include_lib("xmpp/include/xmpp.hrl").
|
-include_lib("xmpp/include/xmpp.hrl").
|
||||||
|
|
||||||
-type url() :: binary().
|
|
||||||
-type s2s_in_state() :: ejabberd_s2s_in:state().
|
-type s2s_in_state() :: ejabberd_s2s_in:state().
|
||||||
|
|
||||||
-define(MODULE_PARENT, mod_antispam).
|
|
||||||
-define(HTTPC_TIMEOUT, timer:seconds(3)).
|
-define(HTTPC_TIMEOUT, timer:seconds(3)).
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
@ -128,9 +127,9 @@ s2s_in_handle_info(State, _) ->
|
||||||
|
|
||||||
-spec needs_checking(jid(), jid()) -> boolean().
|
-spec needs_checking(jid(), jid()) -> boolean().
|
||||||
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
|
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
|
||||||
case gen_mod:is_loaded(LServer, ?MODULE_PARENT) of
|
case gen_mod:is_loaded(LServer, ?MODULE_ANTISPAM) of
|
||||||
true ->
|
true ->
|
||||||
Access = gen_mod:get_module_opt(LServer, ?MODULE_PARENT, access_spam),
|
Access = gen_mod:get_module_opt(LServer, ?MODULE_ANTISPAM, access_spam),
|
||||||
case acl:match_rule(LServer, Access, To) of
|
case acl:match_rule(LServer, Access, To) of
|
||||||
allow ->
|
allow ->
|
||||||
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
|
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
|
||||||
|
@ -144,7 +143,7 @@ needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
|
||||||
To) % likely a gateway
|
To) % likely a gateway
|
||||||
end;
|
end;
|
||||||
false ->
|
false ->
|
||||||
?DEBUG("~s not loaded for ~s", [?MODULE_PARENT, LServer]),
|
?DEBUG("~s not loaded for ~s", [?MODULE_ANTISPAM, LServer]),
|
||||||
false
|
false
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -292,7 +291,7 @@ reject(_) ->
|
||||||
|
|
||||||
-spec get_proc_name(binary()) -> atom().
|
-spec get_proc_name(binary()) -> atom().
|
||||||
get_proc_name(Host) ->
|
get_proc_name(Host) ->
|
||||||
gen_mod:get_module_proc(Host, ?MODULE_PARENT).
|
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
-include_lib("xmpp/include/xmpp.hrl").
|
-include_lib("xmpp/include/xmpp.hrl").
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
|
-include("mod_antispam.hrl").
|
||||||
|
|
||||||
-define(SERVICE_MODULE, mod_antispam).
|
-define(SERVICE_MODULE, mod_antispam).
|
||||||
-define(SERVICE_JID_PREFIX, "rtbl-").
|
-define(SERVICE_JID_PREFIX, "rtbl-").
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue