mirror of
https://github.com/processone/ejabberd
synced 2025-10-03 17:59:31 +02:00
Merge pull request #3 from badlop/antispam_files
Move spam file management to a submodule
This commit is contained in:
commit
4a51bf90ab
6 changed files with 330 additions and 180 deletions
26
include/mod_antispam.hrl
Normal file
26
include/mod_antispam.hrl
Normal file
|
@ -0,0 +1,26 @@
|
|||
%%%----------------------------------------------------------------------
|
||||
%%%
|
||||
%%% ejabberd, Copyright (C) 2002-2025 ProcessOne
|
||||
%%%
|
||||
%%% This program is free software; you can redistribute it and/or
|
||||
%%% modify it under the terms of the GNU General Public License as
|
||||
%%% published by the Free Software Foundation; either version 2 of the
|
||||
%%% License, or (at your option) any later version.
|
||||
%%%
|
||||
%%% This program is distributed in the hope that it will be useful,
|
||||
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
%%% General Public License for more details.
|
||||
%%%
|
||||
%%% You should have received a copy of the GNU General Public License along
|
||||
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
%%%
|
||||
%%%----------------------------------------------------------------------
|
||||
|
||||
-define(MODULE_ANTISPAM, mod_antispam).
|
||||
|
||||
-type url() :: binary().
|
||||
-type filename() :: binary() | none | false.
|
||||
-type jid_set() :: sets:set(ljid()).
|
||||
-type url_set() :: sets:set(url()).
|
|
@ -63,15 +63,11 @@
|
|||
|
||||
-include("ejabberd_commands.hrl").
|
||||
-include("logger.hrl").
|
||||
-include("mod_antispam.hrl").
|
||||
-include("translate.hrl").
|
||||
|
||||
-include_lib("xmpp/include/xmpp.hrl").
|
||||
|
||||
-type url() :: binary().
|
||||
-type filename() :: binary() | none | false.
|
||||
-type jid_set() :: sets:set(ljid()).
|
||||
-type url_set() :: sets:set(url()).
|
||||
|
||||
-record(state,
|
||||
{host = <<>> :: binary(),
|
||||
dump_fd = undefined :: file:io_device() | undefined,
|
||||
|
@ -129,10 +125,17 @@ depends(_Host, _Opts) ->
|
|||
[{mod_pubsub, soft}].
|
||||
|
||||
-spec mod_opt_type(atom()) -> econf:validator().
|
||||
mod_opt_type(spam_domains_file) ->
|
||||
mod_opt_type(access_spam) ->
|
||||
econf:acl();
|
||||
mod_opt_type(cache_size) ->
|
||||
econf:pos_int(unlimited);
|
||||
mod_opt_type(rtbl_host) ->
|
||||
econf:either(
|
||||
econf:enum([none]), econf:file());
|
||||
mod_opt_type(whitelist_domains_file) ->
|
||||
econf:enum([none]), econf:host());
|
||||
mod_opt_type(rtbl_domains_node) ->
|
||||
econf:non_empty(
|
||||
econf:binary());
|
||||
mod_opt_type(spam_domains_file) ->
|
||||
econf:either(
|
||||
econf:enum([none]), econf:file());
|
||||
mod_opt_type(spam_dump_file) ->
|
||||
|
@ -144,28 +147,21 @@ mod_opt_type(spam_jids_file) ->
|
|||
mod_opt_type(spam_urls_file) ->
|
||||
econf:either(
|
||||
econf:enum([none]), econf:file());
|
||||
mod_opt_type(access_spam) ->
|
||||
econf:acl();
|
||||
mod_opt_type(cache_size) ->
|
||||
econf:pos_int(unlimited);
|
||||
mod_opt_type(rtbl_host) ->
|
||||
mod_opt_type(whitelist_domains_file) ->
|
||||
econf:either(
|
||||
econf:enum([none]), econf:host());
|
||||
mod_opt_type(rtbl_domains_node) ->
|
||||
econf:non_empty(
|
||||
econf:binary()).
|
||||
econf:enum([none]), econf:file()).
|
||||
|
||||
-spec mod_options(binary()) -> [{atom(), any()}].
|
||||
mod_options(_Host) ->
|
||||
[{spam_domains_file, none},
|
||||
[{access_spam, none},
|
||||
{cache_size, ?DEFAULT_CACHE_SIZE},
|
||||
{rtbl_domains_node, ?DEFAULT_RTBL_DOMAINS_NODE},
|
||||
{rtbl_host, none},
|
||||
{spam_domains_file, none},
|
||||
{spam_dump_file, false},
|
||||
{spam_jids_file, none},
|
||||
{spam_urls_file, none},
|
||||
{whitelist_domains_file, none},
|
||||
{access_spam, none},
|
||||
{cache_size, ?DEFAULT_CACHE_SIZE},
|
||||
{rtbl_host, none},
|
||||
{rtbl_domains_node, ?DEFAULT_RTBL_DOMAINS_NODE}].
|
||||
{whitelist_domains_file, none}].
|
||||
|
||||
mod_doc() ->
|
||||
#{desc => ?T("Reads from text file and RTBL, filters stanzas and writes dump file."),
|
||||
|
@ -175,10 +171,20 @@ mod_doc() ->
|
|||
#{value => ?T("Access"),
|
||||
desc =>
|
||||
?T("Access rule that controls what accounts may receive spam messages. "
|
||||
"If the rule returns `allow` for a given recipient, "
|
||||
"If the rule returns 'allow' for a given recipient, "
|
||||
"spam messages aren't rejected for that recipient. "
|
||||
"The default value is 'none', which means that all recipients "
|
||||
"are subject to spam filtering verification.")}},
|
||||
{spam_domains_file,
|
||||
#{value => ?T("none | Path"),
|
||||
desc =>
|
||||
?T("Path to a plain text file containing a list of "
|
||||
"known spam domains, one domain per line. "
|
||||
"Messages and subscription requests sent from one of the listed domains "
|
||||
"are classified as spam if sender is not in recipient's roster. "
|
||||
"This list of domains gets merged with the one retrieved "
|
||||
"by an RTBL host if any given. "
|
||||
"The default value is 'none'.")}},
|
||||
{spam_dump_file,
|
||||
#{value => ?T("false | true | Path"),
|
||||
desc =>
|
||||
|
@ -186,7 +192,37 @@ mod_doc() ->
|
|||
"Use an absolute path, or the '@LOG_PATH@' macro to store logs "
|
||||
"in the same place that the other ejabberd log files. "
|
||||
"If set to 'false', does not dump stanzas, this is the default. "
|
||||
"If set to 'true', it stores in '\"@LOG_PATH@/spam_dump_@HOST@.log\"'.")}}],
|
||||
"If set to 'true', it stores in '\"@LOG_PATH@/spam_dump_@HOST@.log\"'.")}},
|
||||
{spam_jids_file,
|
||||
#{value => ?T("none | Path"),
|
||||
desc =>
|
||||
?T("Path to a plain text file containing a list of "
|
||||
"known spammer JIDs, one JID per line. "
|
||||
"Messages and subscription requests sent from one of "
|
||||
"the listed JIDs are classified as spam. "
|
||||
"Messages containing at least one of the listed JIDs"
|
||||
"are classified as spam as well. "
|
||||
"Furthermore, the sender's JID will be cached, "
|
||||
"so that future traffic originating from that JID will also be classified as spam. "
|
||||
"The default value is 'none'.")}},
|
||||
{spam_urls_file,
|
||||
#{value => ?T("none | Path"),
|
||||
desc =>
|
||||
?T("Path to a plain text file containing a list of "
|
||||
"URLs known to be mentioned in spam message bodies. "
|
||||
"Messages containing at least one of the listed URLs are classified as spam. "
|
||||
"Furthermore, the sender's JID will be cached, "
|
||||
"so that future traffic originating from that JID will be classified as spam as well. "
|
||||
"The default value is 'none'.")}},
|
||||
{whitelist_domains_file,
|
||||
#{value => ?T("none | Path"),
|
||||
desc =>
|
||||
?T("Path to a file containing a list of "
|
||||
"domains to whitelist from being blocked, one per line. "
|
||||
"If either it is in 'spam_domains_file' or more realistically "
|
||||
"in a domain sent by a RTBL host (see option 'rtbl_host') "
|
||||
"then this domain will be ignored and stanzas from there won't be blocked. "
|
||||
"The default value is 'none'.")}}],
|
||||
example =>
|
||||
["modules:",
|
||||
" mod_antispam:",
|
||||
|
@ -198,16 +234,13 @@ mod_doc() ->
|
|||
-spec init(list()) -> {ok, state()} | {stop, term()}.
|
||||
init([Host, Opts]) ->
|
||||
process_flag(trap_exit, true),
|
||||
Files =
|
||||
#{domains => gen_mod:get_opt(spam_domains_file, Opts),
|
||||
jid => gen_mod:get_opt(spam_jids_file, Opts),
|
||||
url => gen_mod:get_opt(spam_urls_file, Opts),
|
||||
whitelist_domains => gen_mod:get_opt(whitelist_domains_file, Opts)},
|
||||
try read_files(Files) of
|
||||
mod_antispam_files:init_files(Host),
|
||||
FilesResults = read_files(Host),
|
||||
#{jid := JIDsSet,
|
||||
url := URLsSet,
|
||||
domains := SpamDomainsSet,
|
||||
whitelist_domains := WhitelistDomains} ->
|
||||
whitelist_domains := WhitelistDomains} =
|
||||
FilesResults,
|
||||
ejabberd_hooks:add(local_send_to_resource_hook,
|
||||
Host,
|
||||
mod_antispam_rtbl,
|
||||
|
@ -227,12 +260,7 @@ init([Host, Opts]) ->
|
|||
rtbl_host = RTBLHost,
|
||||
rtbl_domains_node = RTBLDomainsNode},
|
||||
mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
||||
{ok, InitState}
|
||||
catch
|
||||
{Op, File, Reason} when Op == open; Op == read ->
|
||||
?CRITICAL_MSG("Cannot ~s ~s: ~s", [Op, File, format_error(Reason)]),
|
||||
{stop, config_error}
|
||||
end.
|
||||
{ok, InitState}.
|
||||
|
||||
-spec handle_call(term(), {pid(), term()}, state()) ->
|
||||
{reply, {spam_filter, term()}, state()} | {noreply, state()}.
|
||||
|
@ -251,8 +279,8 @@ handle_call({check_body, URLs, JIDs, From},
|
|||
Result2
|
||||
end,
|
||||
{reply, {spam_filter, Result}, State2};
|
||||
handle_call({reload_files, Files}, _From, State) ->
|
||||
{Result, State1} = reload_files(Files, State),
|
||||
handle_call(reload_spam_files, _From, State) ->
|
||||
{Result, State1} = reload_files(State),
|
||||
{reply, {spam_filter, Result}, State1};
|
||||
handle_call({expire_cache, Age}, _From, State) ->
|
||||
{Result, State1} = expire_cache(Age, State),
|
||||
|
@ -319,12 +347,7 @@ handle_cast({reload, NewOpts, OldOpts},
|
|||
State1
|
||||
end,
|
||||
ok = mod_antispam_rtbl:unsubscribe(OldRTBLHost, OldRTBLDomainsNode, Host),
|
||||
Files =
|
||||
#{domains => gen_mod:get_opt(spam_domains_file, NewOpts),
|
||||
jid => gen_mod:get_opt(spam_jids_file, NewOpts),
|
||||
url => gen_mod:get_opt(spam_urls_file, NewOpts),
|
||||
whitelist_domains => gen_mod:get_opt(whitelist_domains_file, NewOpts)},
|
||||
{_Result, State3} = reload_files(Files, State2#state{blocked_domains = #{}}),
|
||||
{_Result, State3} = reload_files(State2#state{blocked_domains = #{}}),
|
||||
RTBLHost = gen_mod:get_opt(rtbl_host, NewOpts),
|
||||
RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, NewOpts),
|
||||
ok = mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host),
|
||||
|
@ -404,6 +427,7 @@ terminate(Reason,
|
|||
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
|
||||
misc:cancel_timer(RTBLRetryTimer),
|
||||
mod_antispam_dump:terminate_dumping(Host, Fd),
|
||||
mod_antispam_files:terminate_files(Host),
|
||||
mod_antispam_filter:terminate_filtering(Host),
|
||||
ejabberd_hooks:delete(local_send_to_resource_hook,
|
||||
Host,
|
||||
|
@ -458,10 +482,9 @@ filter_body({_, Addrs}, Set, From, #state{host = Host} = State) ->
|
|||
filter_body(none, _Set, _From, State) ->
|
||||
{ham, State}.
|
||||
|
||||
-spec reload_files(#{Type :: atom() => filename()}, state()) ->
|
||||
{ok | {error, binary()}, state()}.
|
||||
reload_files(Files, #state{host = Host, blocked_domains = BlockedDomains} = State) ->
|
||||
try read_files(Files) of
|
||||
-spec reload_files(state()) -> {ok | {error, binary()}, state()}.
|
||||
reload_files(#state{host = Host, blocked_domains = BlockedDomains} = State) ->
|
||||
case read_files(Host) of
|
||||
#{jid := JIDsSet,
|
||||
url := URLsSet,
|
||||
domains := SpamDomainsSet,
|
||||
|
@ -482,12 +505,9 @@ reload_files(Files, #state{host = Host, blocked_domains = BlockedDomains} = Stat
|
|||
State#state{jid_set = JIDsSet,
|
||||
url_set = URLsSet,
|
||||
blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)),
|
||||
whitelist_domains = set_to_map(WhitelistDomains, false)}}
|
||||
catch
|
||||
{Op, File, Reason} when Op == open; Op == read ->
|
||||
Txt = format("Cannot ~s ~s for ~s: ~s", [Op, File, Host, format_error(Reason)]),
|
||||
?ERROR_MSG("~s", [Txt]),
|
||||
{{error, Txt}, State}
|
||||
whitelist_domains = set_to_map(WhitelistDomains, false)}};
|
||||
{config_error, ErrorText} ->
|
||||
{{error, ErrorText}, State}
|
||||
end.
|
||||
|
||||
set_to_map(Set) ->
|
||||
|
@ -496,80 +516,18 @@ set_to_map(Set) ->
|
|||
set_to_map(Set, V) ->
|
||||
sets:fold(fun(K, M) -> M#{K => V} end, #{}, Set).
|
||||
|
||||
-spec read_files(#{Type => filename()}) ->
|
||||
#{jid => jid_set(),
|
||||
url => url_set(),
|
||||
Type => sets:set(binary())}
|
||||
when Type :: atom().
|
||||
read_files(Files) ->
|
||||
maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files).
|
||||
|
||||
-spec line_parser(Type :: atom()) -> fun((binary()) -> binary()).
|
||||
line_parser(jid) ->
|
||||
fun parse_jid/1;
|
||||
line_parser(url) ->
|
||||
fun parse_url/1;
|
||||
line_parser(_) ->
|
||||
fun trim/1.
|
||||
|
||||
-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set().
|
||||
read_file(none, _ParseLine) ->
|
||||
sets:new();
|
||||
read_file(File, ParseLine) ->
|
||||
case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of
|
||||
{ok, Fd} ->
|
||||
try
|
||||
read_line(Fd, ParseLine, sets:new())
|
||||
catch
|
||||
E ->
|
||||
throw({read, File, E})
|
||||
after
|
||||
ok = file:close(Fd)
|
||||
end;
|
||||
{error, Reason} ->
|
||||
throw({open, File, Reason})
|
||||
end.
|
||||
|
||||
-spec read_line(file:io_device(),
|
||||
fun((binary()) -> ljid() | url()),
|
||||
jid_set() | url_set()) ->
|
||||
jid_set() | url_set().
|
||||
read_line(Fd, ParseLine, Set) ->
|
||||
case file:read_line(Fd) of
|
||||
{ok, Line} ->
|
||||
read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set));
|
||||
{error, Reason} ->
|
||||
throw(Reason);
|
||||
eof ->
|
||||
Set
|
||||
end.
|
||||
|
||||
-spec parse_jid(binary()) -> ljid().
|
||||
parse_jid(S) ->
|
||||
try jid:decode(trim(S)) of
|
||||
#jid{} = JID ->
|
||||
jid:remove_resource(
|
||||
jid:tolower(JID))
|
||||
catch
|
||||
_:{bad_jid, _} ->
|
||||
throw({bad_jid, S})
|
||||
end.
|
||||
|
||||
-spec parse_url(binary()) -> url().
|
||||
parse_url(S) ->
|
||||
URL = trim(S),
|
||||
RE = <<"https?://\\S+$">>,
|
||||
Options = [anchored, caseless, {capture, none}],
|
||||
case re:run(URL, RE, Options) of
|
||||
match ->
|
||||
URL;
|
||||
nomatch ->
|
||||
throw({bad_url, S})
|
||||
end.
|
||||
|
||||
-spec trim(binary()) -> binary().
|
||||
trim(S) ->
|
||||
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
|
||||
read_files(Host) ->
|
||||
AccInitial =
|
||||
#{jid => sets:new(),
|
||||
url => sets:new(),
|
||||
domains => sets:new(),
|
||||
whitelist_domains => sets:new()},
|
||||
Files =
|
||||
#{jid => gen_mod:get_module_opt(Host, ?MODULE, spam_jids_file),
|
||||
url => gen_mod:get_module_opt(Host, ?MODULE, spam_urls_file),
|
||||
domains => gen_mod:get_module_opt(Host, ?MODULE, spam_domains_file),
|
||||
whitelist_domains => gen_mod:get_module_opt(Host, ?MODULE, whitelist_domains_file)},
|
||||
ejabberd_hooks:run_fold(antispam_get_lists, Host, AccInitial, [Files]).
|
||||
|
||||
-spec get_proc_name(binary()) -> atom().
|
||||
get_proc_name(Host) ->
|
||||
|
@ -587,14 +545,6 @@ sets_equal(A, B) ->
|
|||
format(Format, Data) ->
|
||||
iolist_to_binary(io_lib:format(Format, Data)).
|
||||
|
||||
-spec format_error(atom() | tuple()) -> binary().
|
||||
format_error({bad_jid, JID}) ->
|
||||
<<"Not a valid JID: ", JID/binary>>;
|
||||
format_error({bad_url, URL}) ->
|
||||
<<"Not an HTTP(S) URL: ", URL/binary>>;
|
||||
format_error(Reason) ->
|
||||
list_to_binary(file:format_error(Reason)).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%%| Caching
|
||||
|
||||
|
@ -756,16 +706,11 @@ try_call_by_host(Host, Call) ->
|
|||
reload_spam_filter_files(<<"global">>) ->
|
||||
for_all_hosts(fun reload_spam_filter_files/1, []);
|
||||
reload_spam_filter_files(Host) ->
|
||||
LServer = jid:nameprep(Host),
|
||||
Files =
|
||||
#{domains => gen_mod:get_module_opt(LServer, ?MODULE, spam_domains_file),
|
||||
jid => gen_mod:get_module_opt(LServer, ?MODULE, spam_jids_file),
|
||||
url => gen_mod:get_module_opt(LServer, ?MODULE, spam_urls_file)},
|
||||
case try_call_by_host(Host, {reload_files, Files}) of
|
||||
case try_call_by_host(Host, reload_spam_files) of
|
||||
{spam_filter, ok} ->
|
||||
ok;
|
||||
{spam_filter, {error, Txt}} ->
|
||||
{error, binary_to_list(Txt)};
|
||||
{error, Txt};
|
||||
{error, _R} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
|
|
@ -38,14 +38,11 @@
|
|||
-export([dump_spam_stanza/1, reopen_log/0]).
|
||||
|
||||
-include("logger.hrl").
|
||||
-include("mod_antispam.hrl").
|
||||
-include("translate.hrl").
|
||||
|
||||
-include_lib("xmpp/include/xmpp.hrl").
|
||||
|
||||
-type filename() :: binary() | none | false.
|
||||
|
||||
-define(MODULE_PARENT, mod_antispam).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%%| Exported
|
||||
|
||||
|
@ -157,7 +154,7 @@ write_stanza_dump(Fd, XML) ->
|
|||
%%| Auxiliary
|
||||
|
||||
get_path_option(Host) ->
|
||||
Opts = gen_mod:get_module_opts(Host, ?MODULE_PARENT),
|
||||
Opts = gen_mod:get_module_opts(Host, ?MODULE_ANTISPAM),
|
||||
get_path_option(Host, Opts).
|
||||
|
||||
get_path_option(Host, Opts) ->
|
||||
|
@ -178,11 +175,11 @@ get_path_option(Host, Opts) ->
|
|||
|
||||
-spec get_proc_name(binary()) -> atom().
|
||||
get_proc_name(Host) ->
|
||||
gen_mod:get_module_proc(Host, ?MODULE_PARENT).
|
||||
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
|
||||
|
||||
-spec get_spam_filter_hosts() -> [binary()].
|
||||
get_spam_filter_hosts() ->
|
||||
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE_PARENT)].
|
||||
[H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, ?MODULE_ANTISPAM)].
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
|
|
182
src/mod_antispam_files.erl
Normal file
182
src/mod_antispam_files.erl
Normal file
|
@ -0,0 +1,182 @@
|
|||
%%%----------------------------------------------------------------------
|
||||
%%% File : mod_antispam_files.erl
|
||||
%%% Author : Holger Weiss <holger@zedat.fu-berlin.de>
|
||||
%%% Author : Stefan Strigler <stefan@strigler.de>
|
||||
%%% Purpose : Filter spam messages based on sender JID and content
|
||||
%%% Created : 31 Mar 2019 by Holger Weiss <holger@zedat.fu-berlin.de>
|
||||
%%%
|
||||
%%%
|
||||
%%% ejabberd, Copyright (C) 2019-2025 ProcessOne
|
||||
%%%
|
||||
%%% This program is free software; you can redistribute it and/or
|
||||
%%% modify it under the terms of the GNU General Public License as
|
||||
%%% published by the Free Software Foundation; either version 2 of the
|
||||
%%% License, or (at your option) any later version.
|
||||
%%%
|
||||
%%% This program is distributed in the hope that it will be useful,
|
||||
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
%%% General Public License for more details.
|
||||
%%%
|
||||
%%% You should have received a copy of the GNU General Public License along
|
||||
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
%%%
|
||||
%%%----------------------------------------------------------------------
|
||||
|
||||
%%| definitions
|
||||
%% @format-begin
|
||||
|
||||
-module(mod_antispam_files).
|
||||
|
||||
-author('holger@zedat.fu-berlin.de').
|
||||
-author('stefan@strigler.de').
|
||||
|
||||
%% Exported
|
||||
-export([init_files/1, terminate_files/1]).
|
||||
% Hooks
|
||||
-export([get_files_lists/2]).
|
||||
|
||||
-include("ejabberd_commands.hrl").
|
||||
-include("logger.hrl").
|
||||
-include("mod_antispam.hrl").
|
||||
-include("translate.hrl").
|
||||
|
||||
-include_lib("xmpp/include/xmpp.hrl").
|
||||
|
||||
-type files_map() :: #{atom() => filename()}.
|
||||
-type lists_map() ::
|
||||
#{jid => jid_set(),
|
||||
url => url_set(),
|
||||
atom() => sets:set(binary())}.
|
||||
|
||||
-define(COMMAND_TIMEOUT, timer:seconds(30)).
|
||||
-define(DEFAULT_CACHE_SIZE, 10000).
|
||||
-define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>).
|
||||
-define(HTTPC_TIMEOUT, timer:seconds(3)).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%%| Exported
|
||||
|
||||
init_files(Host) ->
|
||||
ejabberd_hooks:add(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
|
||||
|
||||
terminate_files(Host) ->
|
||||
ejabberd_hooks:delete(antispam_get_lists, Host, ?MODULE, get_files_lists, 50).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%%| Hooks
|
||||
|
||||
-spec get_files_lists(lists_map(), files_map()) -> lists_map().
|
||||
get_files_lists(#{jid := AccJids,
|
||||
url := AccUrls,
|
||||
domains := AccDomains,
|
||||
whitelist_domains := AccWhitelist} =
|
||||
Acc,
|
||||
Files) ->
|
||||
try read_files(Files) of
|
||||
#{jid := JIDsSet,
|
||||
url := URLsSet,
|
||||
domains := SpamDomainsSet,
|
||||
whitelist_domains := WhitelistDomains} ->
|
||||
Acc#{jid => sets:union(AccJids, JIDsSet),
|
||||
url => sets:union(AccUrls, URLsSet),
|
||||
domains => sets:union(AccDomains, SpamDomainsSet),
|
||||
whitelist_domains => sets:union(AccWhitelist, WhitelistDomains)}
|
||||
catch
|
||||
{Op, File, Reason} when Op == open; Op == read ->
|
||||
ErrorText = format("Error trying to ~s file ~s: ~s", [Op, File, format_error(Reason)]),
|
||||
?CRITICAL_MSG(ErrorText, []),
|
||||
{stop, {config_error, ErrorText}}
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%%| read_files
|
||||
|
||||
-spec read_files(files_map()) -> lists_map().
|
||||
read_files(Files) ->
|
||||
maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files).
|
||||
|
||||
-spec line_parser(Type :: atom()) -> fun((binary()) -> binary()).
|
||||
line_parser(jid) ->
|
||||
fun parse_jid/1;
|
||||
line_parser(url) ->
|
||||
fun parse_url/1;
|
||||
line_parser(_) ->
|
||||
fun trim/1.
|
||||
|
||||
-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set().
|
||||
read_file(none, _ParseLine) ->
|
||||
sets:new();
|
||||
read_file(File, ParseLine) ->
|
||||
case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of
|
||||
{ok, Fd} ->
|
||||
try
|
||||
read_line(Fd, ParseLine, sets:new())
|
||||
catch
|
||||
E ->
|
||||
throw({read, File, E})
|
||||
after
|
||||
ok = file:close(Fd)
|
||||
end;
|
||||
{error, Reason} ->
|
||||
throw({open, File, Reason})
|
||||
end.
|
||||
|
||||
-spec read_line(file:io_device(),
|
||||
fun((binary()) -> ljid() | url()),
|
||||
jid_set() | url_set()) ->
|
||||
jid_set() | url_set().
|
||||
read_line(Fd, ParseLine, Set) ->
|
||||
case file:read_line(Fd) of
|
||||
{ok, Line} ->
|
||||
read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set));
|
||||
{error, Reason} ->
|
||||
throw(Reason);
|
||||
eof ->
|
||||
Set
|
||||
end.
|
||||
|
||||
-spec parse_jid(binary()) -> ljid().
|
||||
parse_jid(S) ->
|
||||
try jid:decode(trim(S)) of
|
||||
#jid{} = JID ->
|
||||
jid:remove_resource(
|
||||
jid:tolower(JID))
|
||||
catch
|
||||
_:{bad_jid, _} ->
|
||||
throw({bad_jid, S})
|
||||
end.
|
||||
|
||||
-spec parse_url(binary()) -> url().
|
||||
parse_url(S) ->
|
||||
URL = trim(S),
|
||||
RE = <<"https?://\\S+$">>,
|
||||
Options = [anchored, caseless, {capture, none}],
|
||||
case re:run(URL, RE, Options) of
|
||||
match ->
|
||||
URL;
|
||||
nomatch ->
|
||||
throw({bad_url, S})
|
||||
end.
|
||||
|
||||
-spec trim(binary()) -> binary().
|
||||
trim(S) ->
|
||||
re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]).
|
||||
|
||||
%% Function copied from mod_antispam.erl
|
||||
-spec format(io:format(), [term()]) -> binary().
|
||||
format(Format, Data) ->
|
||||
iolist_to_binary(io_lib:format(Format, Data)).
|
||||
|
||||
-spec format_error(atom() | tuple()) -> binary().
|
||||
format_error({bad_jid, JID}) ->
|
||||
<<"Not a valid JID: ", JID/binary>>;
|
||||
format_error({bad_url, URL}) ->
|
||||
<<"Not an HTTP(S) URL: ", URL/binary>>;
|
||||
format_error(Reason) ->
|
||||
list_to_binary(file:format_error(Reason)).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%%| vim: set foldmethod=marker foldmarker=%%|,%%-:
|
|
@ -38,13 +38,12 @@
|
|||
|
||||
-include("logger.hrl").
|
||||
-include("translate.hrl").
|
||||
-include("mod_antispam.hrl").
|
||||
|
||||
-include_lib("xmpp/include/xmpp.hrl").
|
||||
|
||||
-type url() :: binary().
|
||||
-type s2s_in_state() :: ejabberd_s2s_in:state().
|
||||
|
||||
-define(MODULE_PARENT, mod_antispam).
|
||||
-define(HTTPC_TIMEOUT, timer:seconds(3)).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
|
@ -128,9 +127,9 @@ s2s_in_handle_info(State, _) ->
|
|||
|
||||
-spec needs_checking(jid(), jid()) -> boolean().
|
||||
needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
|
||||
case gen_mod:is_loaded(LServer, ?MODULE_PARENT) of
|
||||
case gen_mod:is_loaded(LServer, ?MODULE_ANTISPAM) of
|
||||
true ->
|
||||
Access = gen_mod:get_module_opt(LServer, ?MODULE_PARENT, access_spam),
|
||||
Access = gen_mod:get_module_opt(LServer, ?MODULE_ANTISPAM, access_spam),
|
||||
case acl:match_rule(LServer, Access, To) of
|
||||
allow ->
|
||||
?DEBUG("Spam not filtered for ~s", [jid:encode(To)]),
|
||||
|
@ -144,7 +143,7 @@ needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) ->
|
|||
To) % likely a gateway
|
||||
end;
|
||||
false ->
|
||||
?DEBUG("~s not loaded for ~s", [?MODULE_PARENT, LServer]),
|
||||
?DEBUG("~s not loaded for ~s", [?MODULE_ANTISPAM, LServer]),
|
||||
false
|
||||
end.
|
||||
|
||||
|
@ -292,7 +291,7 @@ reject(_) ->
|
|||
|
||||
-spec get_proc_name(binary()) -> atom().
|
||||
get_proc_name(Host) ->
|
||||
gen_mod:get_module_proc(Host, ?MODULE_PARENT).
|
||||
gen_mod:get_module_proc(Host, ?MODULE_ANTISPAM).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
-include_lib("xmpp/include/xmpp.hrl").
|
||||
-include("logger.hrl").
|
||||
-include("mod_antispam.hrl").
|
||||
|
||||
-define(SERVICE_MODULE, mod_antispam).
|
||||
-define(SERVICE_JID_PREFIX, "rtbl-").
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue