From 34b40aec663b4d08b5a80da7dd8fc84c4e691fdc Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Thu, 5 Jun 2025 14:31:02 +0200 Subject: [PATCH] mod_antispam: add format instructions --- src/mod_antispam.erl | 1007 ++++++++++++++++++++----------------- src/mod_antispam_rtbl.erl | 85 ++-- test/antispam_tests.erl | 102 ++-- 3 files changed, 675 insertions(+), 519 deletions(-) diff --git a/src/mod_antispam.erl b/src/mod_antispam.erl index 986f21066..c3489aefa 100644 --- a/src/mod_antispam.erl +++ b/src/mod_antispam.erl @@ -98,6 +98,8 @@ -define(DEFAULT_RTBL_DOMAINS_NODE, <<"spam_source_domains">>). -define(DEFAULT_CACHE_SIZE, 10000). +%% @format-begin + %%-------------------------------------------------------------------- %% gen_mod callbacks. %%-------------------------------------------------------------------- @@ -114,10 +116,10 @@ start(Host, Opts) -> -spec stop(binary()) -> ok | {error, any()}. stop(Host) -> case gen_mod:is_loaded_elsewhere(Host, ?MODULE) of - false -> - ejabberd_commands:unregister_commands(get_commands_spec()); - true -> - ok + false -> + ejabberd_commands:unregister_commands(get_commands_spec()); + true -> + ok end, gen_mod:stop_child(?MODULE, Host). @@ -134,34 +136,28 @@ depends(_Host, _Opts) -> -spec mod_opt_type(atom()) -> econf:validator(). mod_opt_type(spam_domains_file) -> econf:either( - econf:enum([none]), - econf:file()); + econf:enum([none]), econf:file()); mod_opt_type(whitelist_domains_file) -> - econf:either( - none, - econf:binary()); + econf:either(none, econf:binary()); mod_opt_type(spam_dump_file) -> econf:either( - econf:enum([none]), - econf:binary()); + econf:enum([none]), econf:binary()); mod_opt_type(spam_jids_file) -> econf:either( - econf:enum([none]), - econf:file()); + econf:enum([none]), econf:file()); mod_opt_type(spam_urls_file) -> econf:either( - econf:enum([none]), - econf:file()); + econf:enum([none]), econf:file()); mod_opt_type(access_spam) -> econf:acl(); mod_opt_type(cache_size) -> econf:pos_int(unlimited); mod_opt_type(rtbl_host) -> econf:either( - econf:enum([none]), - econf:host()); + econf:enum([none]), econf:host()); mod_opt_type(rtbl_domains_node) -> - econf:non_empty(econf:binary()). + econf:non_empty( + econf:binary()). -spec mod_options(binary()) -> [{atom(), any()}]. mod_options(_Host) -> @@ -175,7 +171,8 @@ mod_options(_Host) -> {rtbl_host, none}, {rtbl_domains_node, ?DEFAULT_RTBL_DOMAINS_NODE}]. -mod_doc() -> #{}. +mod_doc() -> + #{}. %%-------------------------------------------------------------------- %% gen_server callbacks. @@ -184,67 +181,73 @@ mod_doc() -> #{}. init([Host, Opts]) -> process_flag(trap_exit, true), DumpFile = expand_host(gen_mod:get_opt(spam_dump_file, Opts), Host), - Files = #{domains => gen_mod:get_opt(spam_domains_file, Opts), - jid => gen_mod:get_opt(spam_jids_file, Opts), - url => gen_mod:get_opt(spam_urls_file, Opts), - whitelist_domains => gen_mod:get_opt(whitelist_domains_file, Opts)}, + Files = + #{domains => gen_mod:get_opt(spam_domains_file, Opts), + jid => gen_mod:get_opt(spam_jids_file, Opts), + url => gen_mod:get_opt(spam_urls_file, Opts), + whitelist_domains => gen_mod:get_opt(whitelist_domains_file, Opts)}, try read_files(Files) of - #{jid := JIDsSet, url := URLsSet, domains := SpamDomainsSet, whitelist_domains := WhitelistDomains} -> - ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, - s2s_in_handle_info, 90), - ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, - s2s_receive_packet, 50), - ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, - sm_receive_packet, 50), - ejabberd_hooks:add(reopen_log_hook, ?MODULE, - reopen_log, 50), - ejabberd_hooks:add(local_send_to_resource_hook, Host, - mod_antispam_rtbl, pubsub_event_handler, 50), - RTBLHost = gen_mod:get_opt(rtbl_host, Opts), - RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, Opts), - InitState0 = #state{host = Host, - jid_set = JIDsSet, - url_set = URLsSet, - max_cache_size = gen_mod:get_opt(cache_size, Opts), - blocked_domains = set_to_map(SpamDomainsSet), - whitelist_domains = set_to_map(WhitelistDomains, false), - rtbl_host = RTBLHost, - rtbl_domains_node = RTBLDomainsNode}, - mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host), - InitState = init_open_dump_file(DumpFile, InitState0), - {ok, InitState} - catch {Op, File, Reason} when Op == open; - Op == read -> - ?CRITICAL_MSG("Cannot ~s ~s: ~s", [Op, File, format_error(Reason)]), - {stop, config_error} + #{jid := JIDsSet, + url := URLsSet, + domains := SpamDomainsSet, + whitelist_domains := WhitelistDomains} -> + ejabberd_hooks:add(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90), + ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), + ejabberd_hooks:add(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50), + ejabberd_hooks:add(reopen_log_hook, ?MODULE, reopen_log, 50), + ejabberd_hooks:add(local_send_to_resource_hook, + Host, + mod_antispam_rtbl, + pubsub_event_handler, + 50), + RTBLHost = gen_mod:get_opt(rtbl_host, Opts), + RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, Opts), + InitState0 = + #state{host = Host, + jid_set = JIDsSet, + url_set = URLsSet, + max_cache_size = gen_mod:get_opt(cache_size, Opts), + blocked_domains = set_to_map(SpamDomainsSet), + whitelist_domains = set_to_map(WhitelistDomains, false), + rtbl_host = RTBLHost, + rtbl_domains_node = RTBLDomainsNode}, + mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host), + InitState = init_open_dump_file(DumpFile, InitState0), + {ok, InitState} + catch + {Op, File, Reason} when Op == open; Op == read -> + ?CRITICAL_MSG("Cannot ~s ~s: ~s", [Op, File, format_error(Reason)]), + {stop, config_error} end. init_open_dump_file(none, State) -> State; init_open_dump_file(DumpFile, State) -> case filelib:ensure_dir(DumpFile) of - ok -> - ok; - {error, Reason} -> - Dirname = filename:dirname(DumpFile), - throw({open, Dirname, Reason}) + ok -> + ok; + {error, Reason} -> + Dirname = filename:dirname(DumpFile), + throw({open, Dirname, Reason}) end, open_dump_file(DumpFile, State). --spec handle_call(term(), {pid(), term()}, state()) - -> {reply, {spam_filter, term()}, state()} | {noreply, state()}. +-spec handle_call(term(), {pid(), term()}, state()) -> + {reply, {spam_filter, term()}, state()} | {noreply, state()}. handle_call({check_jid, From}, _From, #state{jid_set = JIDsSet} = State) -> {Result, State1} = filter_jid(From, JIDsSet, State), {reply, {spam_filter, Result}, State1}; -handle_call({check_body, URLs, JIDs, From}, _From, - #state{url_set = URLsSet, jid_set = JIDsSet} = State) -> +handle_call({check_body, URLs, JIDs, From}, + _From, + #state{url_set = URLsSet, jid_set = JIDsSet} = State) -> {Result1, State1} = filter_body(URLs, URLsSet, From, State), {Result2, State2} = filter_body(JIDs, JIDsSet, From, State1), - Result = if Result1 == spam -> - Result1; - true -> - Result2 - end, + Result = + if Result1 == spam -> + Result1; + true -> + Result2 + end, {reply, {spam_filter, Result}, State2}; handle_call({resolve_redirects, URLs}, _From, State) -> ResolvedURLs = do_resolve_redirects(URLs, []), @@ -263,18 +266,30 @@ handle_call({drop_from_cache, JID}, _From, State) -> {reply, {spam_filter, Result}, State1}; handle_call(get_cache, _From, #state{jid_cache = Cache} = State) -> {reply, {spam_filter, maps:to_list(Cache)}, State}; -handle_call({add_blocked_domain, Domain}, _From, #state{blocked_domains = BlockedDomains} = State) -> +handle_call({add_blocked_domain, Domain}, + _From, + #state{blocked_domains = BlockedDomains} = State) -> BlockedDomains1 = maps:merge(BlockedDomains, #{Domain => true}), Txt = format("~s added to blocked domains", [Domain]), {reply, {spam_filter, {ok, Txt}}, State#state{blocked_domains = BlockedDomains1}}; -handle_call({remove_blocked_domain, Domain}, _From, #state{blocked_domains = BlockedDomains} = State) -> +handle_call({remove_blocked_domain, Domain}, + _From, + #state{blocked_domains = BlockedDomains} = State) -> BlockedDomains1 = maps:remove(Domain, BlockedDomains), Txt = format("~s removed from blocked domains", [Domain]), {reply, {spam_filter, {ok, Txt}}, State#state{blocked_domains = BlockedDomains1}}; -handle_call(get_blocked_domains, _From, #state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} = State) -> +handle_call(get_blocked_domains, + _From, + #state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} = + State) -> {reply, {blocked_domains, maps:merge(BlockedDomains, WhitelistDomains)}, State}; -handle_call({is_blocked_domain, Domain}, _From, #state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} = State) -> - {reply, maps:get(Domain, maps:merge(BlockedDomains, WhitelistDomains), false) =/= false, State}; +handle_call({is_blocked_domain, Domain}, + _From, + #state{blocked_domains = BlockedDomains, whitelist_domains = WhitelistDomains} = + State) -> + {reply, + maps:get(Domain, maps:merge(BlockedDomains, WhitelistDomains), false) =/= false, + State}; handle_call(Request, From, State) -> ?ERROR_MSG("Got unexpected request from ~p: ~p", [From, Request]), {noreply, State}. @@ -284,41 +299,43 @@ handle_cast({dump, _XML}, #state{dump_fd = undefined} = State) -> {noreply, State}; handle_cast({dump, XML}, #state{dump_fd = Fd} = State) -> case file:write(Fd, [XML, <<$\n>>]) of - ok -> - ok; - {error, Reason} -> - ?ERROR_MSG("Cannot write spam to dump file: ~s", - [file:format_error(Reason)]) + ok -> + ok; + {error, Reason} -> + ?ERROR_MSG("Cannot write spam to dump file: ~s", [file:format_error(Reason)]) end, {noreply, State}; handle_cast({reload, NewOpts, OldOpts}, - #state{host = Host, - rtbl_host = OldRTBLHost, - rtbl_domains_node = OldRTBLDomainsNode, - rtbl_retry_timer = RTBLRetryTimer} = State) -> + #state{host = Host, + rtbl_host = OldRTBLHost, + rtbl_domains_node = OldRTBLDomainsNode, + rtbl_retry_timer = RTBLRetryTimer} = + State) -> misc:cancel_timer(RTBLRetryTimer), - State1 = case {gen_mod:get_opt(spam_dump_file, OldOpts), - gen_mod:get_opt(spam_dump_file, NewOpts)} of - {OldDumpFile, NewDumpFile} when NewDumpFile /= OldDumpFile -> - close_dump_file(expand_host(OldDumpFile, Host), State), - open_dump_file(expand_host(NewDumpFile, Host), State); - {_OldDumpFile, _NewDumpFile} -> - State - end, - State2 = case {gen_mod:get_opt(cache_size, OldOpts), - gen_mod:get_opt(cache_size, NewOpts)} of - {OldMax, NewMax} when NewMax < OldMax -> - shrink_cache(State1#state{max_cache_size = NewMax}); - {OldMax, NewMax} when NewMax > OldMax -> - State1#state{max_cache_size = NewMax}; - {_OldMax, _NewMax} -> - State1 - end, + State1 = + case {gen_mod:get_opt(spam_dump_file, OldOpts), gen_mod:get_opt(spam_dump_file, NewOpts)} + of + {OldDumpFile, NewDumpFile} when NewDumpFile /= OldDumpFile -> + close_dump_file(expand_host(OldDumpFile, Host), State), + open_dump_file(expand_host(NewDumpFile, Host), State); + {_OldDumpFile, _NewDumpFile} -> + State + end, + State2 = + case {gen_mod:get_opt(cache_size, OldOpts), gen_mod:get_opt(cache_size, NewOpts)} of + {OldMax, NewMax} when NewMax < OldMax -> + shrink_cache(State1#state{max_cache_size = NewMax}); + {OldMax, NewMax} when NewMax > OldMax -> + State1#state{max_cache_size = NewMax}; + {_OldMax, _NewMax} -> + State1 + end, ok = mod_antispam_rtbl:unsubscribe(OldRTBLHost, OldRTBLDomainsNode, Host), - Files = #{domains => gen_mod:get_opt(spam_domains_file, NewOpts), - jid => gen_mod:get_opt(spam_jids_file, NewOpts), - url => gen_mod:get_opt(spam_urls_file, NewOpts), - whitelist_domains => gen_mod:get_opt(whitelist_domains_file, NewOpts)}, + Files = + #{domains => gen_mod:get_opt(spam_domains_file, NewOpts), + jid => gen_mod:get_opt(spam_jids_file, NewOpts), + url => gen_mod:get_opt(spam_urls_file, NewOpts), + whitelist_domains => gen_mod:get_opt(whitelist_domains_file, NewOpts)}, {_Result, State3} = reload_files(Files, State2#state{blocked_domains = #{}}), RTBLHost = gen_mod:get_opt(rtbl_host, NewOpts), RTBLDomainsNode = gen_mod:get_opt(rtbl_domains_node, NewOpts), @@ -326,7 +343,8 @@ handle_cast({reload, NewOpts, OldOpts}, {noreply, State3#state{rtbl_host = RTBLHost, rtbl_domains_node = RTBLDomainsNode}}; handle_cast(reopen_log, State) -> {noreply, reopen_dump_file(State)}; -handle_cast({update_blocked_domains, NewItems}, #state{blocked_domains = BlockedDomains} = State) -> +handle_cast({update_blocked_domains, NewItems}, + #state{blocked_domains = BlockedDomains} = State) -> {noreply, State#state{blocked_domains = maps:merge(BlockedDomains, NewItems)}}; handle_cast(Request, State) -> ?ERROR_MSG("Got unexpected request from: ~p", [Request]), @@ -334,32 +352,42 @@ handle_cast(Request, State) -> -spec handle_info(term(), state()) -> {noreply, state()}. handle_info({iq_reply, timeout, blocked_domains}, State) -> - ?WARNING_MSG("Fetching blocked domains failed: fetch timeout. Retrying in 60 seconds", []), - {noreply, State#state{rtbl_retry_timer = erlang:send_after(60000, self(), request_blocked_domains)}}; + ?WARNING_MSG("Fetching blocked domains failed: fetch timeout. Retrying in 60 seconds", + []), + {noreply, + State#state{rtbl_retry_timer = + erlang:send_after(60000, self(), request_blocked_domains)}}; handle_info({iq_reply, #iq{type = error} = IQ, blocked_domains}, State) -> ?WARNING_MSG("Fetching blocked domains failed: ~p. Retrying in 60 seconds", - [xmpp:format_stanza_error(xmpp:get_error(IQ))]), - {noreply, State#state{rtbl_retry_timer = erlang:send_after(60000, self(), request_blocked_domains)}}; + [xmpp:format_stanza_error( + xmpp:get_error(IQ))]), + {noreply, + State#state{rtbl_retry_timer = + erlang:send_after(60000, self(), request_blocked_domains)}}; handle_info({iq_reply, IQReply, blocked_domains}, - #state{blocked_domains = OldBlockedDomains, - rtbl_host = RTBLHost, - rtbl_domains_node = RTBLDomainsNode, - host = Host} = State) -> + #state{blocked_domains = OldBlockedDomains, + rtbl_host = RTBLHost, + rtbl_domains_node = RTBLDomainsNode, + host = Host} = + State) -> case mod_antispam_rtbl:parse_blocked_domains(IQReply) of - undefined -> - ?WARNING_MSG("Fetching initial list failed: invalid result payload", []), - {noreply, State#state{rtbl_retry_timer = undefined}}; - NewBlockedDomains -> - ok = mod_antispam_rtbl:subscribe(RTBLHost, RTBLDomainsNode, Host), - {noreply, State#state{rtbl_retry_timer = undefined, - rtbl_subscribed = true, - blocked_domains = maps:merge(OldBlockedDomains, NewBlockedDomains)}} + undefined -> + ?WARNING_MSG("Fetching initial list failed: invalid result payload", []), + {noreply, State#state{rtbl_retry_timer = undefined}}; + NewBlockedDomains -> + ok = mod_antispam_rtbl:subscribe(RTBLHost, RTBLDomainsNode, Host), + {noreply, + State#state{rtbl_retry_timer = undefined, + rtbl_subscribed = true, + blocked_domains = maps:merge(OldBlockedDomains, NewBlockedDomains)}} end; handle_info({iq_reply, timeout, subscribe_result}, State) -> ?WARNING_MSG("Subscription error: request timeout", []), {noreply, State#state{rtbl_subscribed = false}}; handle_info({iq_reply, #iq{type = error} = IQ, subscribe_result}, State) -> - ?WARNING_MSG("Subscription error: ~p", [xmpp:format_stanza_error(xmpp:get_error(IQ))]), + ?WARNING_MSG("Subscription error: ~p", + [xmpp:format_stanza_error( + xmpp:get_error(IQ))]), {noreply, State#state{rtbl_subscribed = false}}; handle_info({iq_reply, IQReply, subscribe_result}, State) -> ?DEBUG("Got subscribe result: ~p", [IQReply]), @@ -368,7 +396,11 @@ handle_info({iq_reply, _IQReply, unsubscribe_result}, State) -> %% FIXME: we should check it's true (of type `result`, not `error`), but at that point, what %% would we do? {noreply, State#state{rtbl_subscribed = false}}; -handle_info(request_blocked_domains, #state{host = Host, rtbl_host = RTBLHost, rtbl_domains_node = RTBLDomainsNode} = State) -> +handle_info(request_blocked_domains, + #state{host = Host, + rtbl_host = RTBLHost, + rtbl_domains_node = RTBLDomainsNode} = + State) -> mod_antispam_rtbl:request_blocked_domains(RTBLHost, RTBLDomainsNode, Host), {noreply, State}; handle_info(Info, State) -> @@ -377,30 +409,30 @@ handle_info(Info, State) -> -spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok. terminate(Reason, - #state{host = Host, - rtbl_host = RTBLHost, - rtbl_domains_node = RTBLDomainsNode, - rtbl_retry_timer = RTBLRetryTimer} = State) -> + #state{host = Host, + rtbl_host = RTBLHost, + rtbl_domains_node = RTBLDomainsNode, + rtbl_retry_timer = RTBLRetryTimer} = + State) -> ?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]), misc:cancel_timer(RTBLRetryTimer), DumpFile = gen_mod:get_module_opt(Host, ?MODULE, spam_dump_file), DumpFile1 = expand_host(DumpFile, Host), close_dump_file(DumpFile1, State), - ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, - s2s_receive_packet, 50), - ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, - sm_receive_packet, 50), - ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, - s2s_in_handle_info, 90), - ejabberd_hooks:delete(local_send_to_resource_hook, Host, - mod_antispam_rtbl, pubsub_event_handler, 50), - mod_antispam_rtbl:unsubscribe(RTBLHost, RTBLDomainsNode,Host), + ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, s2s_receive_packet, 50), + ejabberd_hooks:delete(sm_receive_packet, Host, ?MODULE, sm_receive_packet, 50), + ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, s2s_in_handle_info, 90), + ejabberd_hooks:delete(local_send_to_resource_hook, + Host, + mod_antispam_rtbl, + pubsub_event_handler, + 50), + mod_antispam_rtbl:unsubscribe(RTBLHost, RTBLDomainsNode, Host), case gen_mod:is_loaded_elsewhere(Host, ?MODULE) of - false -> - ejabberd_hooks:delete(reopen_log_hook, ?MODULE, - reopen_log, 50); - true -> - ok + false -> + ejabberd_hooks:delete(reopen_log_hook, ?MODULE, reopen_log, 50); + true -> + ok end. -spec code_change({down, term()} | term(), state(), term()) -> {ok, state()}. @@ -412,8 +444,8 @@ code_change(_OldVsn, #state{host = Host} = State, _Extra) -> %% Hook callbacks. %%-------------------------------------------------------------------- --spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) - -> {stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}. +-spec s2s_receive_packet({stanza() | drop, s2s_in_state()}) -> + {stanza() | drop, s2s_in_state()} | {stop, {drop, s2s_in_state()}}. s2s_receive_packet({A, State}) -> case sm_receive_packet(A) of {stop, drop} -> @@ -426,36 +458,37 @@ s2s_receive_packet({A, State}) -> sm_receive_packet(drop = Acc) -> Acc; sm_receive_packet(#message{from = From, - to = #jid{lserver = LServer} = To, - type = Type} = Msg) - when Type /= groupchat, - Type /= error -> + to = #jid{lserver = LServer} = To, + type = Type} = + Msg) + when Type /= groupchat, Type /= error -> do_check(From, To, LServer, Msg); sm_receive_packet(#presence{from = From, - to = #jid{lserver = LServer} = To, - type = subscribe} = Presence) -> + to = #jid{lserver = LServer} = To, + type = subscribe} = + Presence) -> do_check(From, To, LServer, Presence); sm_receive_packet(Acc) -> Acc. do_check(From, To, LServer, Stanza) -> case needs_checking(From, To) of - true -> - case check_from(LServer, From) of - ham -> - case check_stanza(LServer, From, Stanza) of - ham -> - Stanza; - spam -> - reject(Stanza), - {stop, drop} - end; - spam -> - reject(Stanza), - {stop, drop} - end; - false -> - Stanza + true -> + case check_from(LServer, From) of + ham -> + case check_stanza(LServer, From, Stanza) of + ham -> + Stanza; + spam -> + reject(Stanza), + {stop, drop} + end; + spam -> + reject(Stanza), + {stop, drop} + end; + false -> + Stanza end. check_stanza(LServer, From, #message{body = Body}) -> @@ -463,8 +496,8 @@ check_stanza(LServer, From, #message{body = Body}) -> check_stanza(_, _, _) -> ham. --spec s2s_in_handle_info(s2s_in_state(), any()) - -> s2s_in_state() | {stop, s2s_in_state()}. +-spec s2s_in_handle_info(s2s_in_state(), any()) -> + s2s_in_state() | {stop, s2s_in_state()}. s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) -> ?DEBUG("Dropping expired spam filter result", []), {stop, State}; @@ -474,9 +507,10 @@ s2s_in_handle_info(State, _) -> -spec reopen_log() -> ok. reopen_log() -> lists:foreach(fun(Host) -> - Proc = get_proc_name(Host), - gen_server:cast(Proc, reopen_log) - end, get_spam_filter_hosts()). + Proc = get_proc_name(Host), + gen_server:cast(Proc, reopen_log) + end, + get_spam_filter_hosts()). %%-------------------------------------------------------------------- %% Internal functions. @@ -484,60 +518,70 @@ reopen_log() -> -spec needs_checking(jid(), jid()) -> boolean(). needs_checking(#jid{lserver = FromHost} = From, #jid{lserver = LServer} = To) -> case gen_mod:is_loaded(LServer, ?MODULE) of - true -> - Access = gen_mod:get_module_opt(LServer, ?MODULE, access_spam), - case acl:match_rule(LServer, Access, To) of - allow -> - ?DEBUG("Spam not filtered for ~s", [jid:encode(To)]), - false; - deny -> - ?DEBUG("Spam is filtered for ~s", [jid:encode(To)]), - not mod_roster:is_subscribed(From, To) andalso - not mod_roster:is_subscribed(jid:make(<<>>, FromHost), To) % likely a gateway - end; - false -> - ?DEBUG("~s not loaded for ~s", [?MODULE, LServer]), - false + true -> + Access = gen_mod:get_module_opt(LServer, ?MODULE, access_spam), + case acl:match_rule(LServer, Access, To) of + allow -> + ?DEBUG("Spam not filtered for ~s", [jid:encode(To)]), + false; + deny -> + ?DEBUG("Spam is filtered for ~s", [jid:encode(To)]), + not mod_roster:is_subscribed(From, To) + andalso not + mod_roster:is_subscribed( + jid:make(<<>>, FromHost), + To) % likely a gateway + end; + false -> + ?DEBUG("~s not loaded for ~s", [?MODULE, LServer]), + false end. -spec check_from(binary(), jid()) -> ham | spam. check_from(Host, From) -> Proc = get_proc_name(Host), - LFrom = {_, FromDomain, _} = jid:remove_resource(jid:tolower(From)), + LFrom = + {_, FromDomain, _} = + jid:remove_resource( + jid:tolower(From)), try - case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of - true -> + case gen_server:call(Proc, {is_blocked_domain, FromDomain}) of + true -> ?DEBUG("Spam JID found in blocked domains: ~p", [From]), ejabberd_hooks:run(spam_found, Host, [{jid, From}]), spam; - false -> - case gen_server:call(Proc, {check_jid, LFrom}) of - {spam_filter, Result} -> - Result - end - end - catch exit:{timeout, _} -> - ?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers", - [jid:encode(From)]), - ham + false -> + case gen_server:call(Proc, {check_jid, LFrom}) of + {spam_filter, Result} -> + Result + end + end + catch + exit:{timeout, _} -> + ?WARNING_MSG("Timeout while checking ~s against list of blocked domains or spammers", + [jid:encode(From)]), + ham end. -spec check_body(binary(), jid(), binary()) -> ham | spam. check_body(Host, From, Body) -> case {extract_urls(Host, Body), extract_jids(Body)} of - {none, none} -> - ?DEBUG("No JIDs/URLs found in message", []), - ham; - {URLs, JIDs} -> - Proc = get_proc_name(Host), - LFrom = jid:remove_resource(jid:tolower(From)), - try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of - {spam_filter, Result} -> - Result - catch exit:{timeout, _} -> - ?WARNING_MSG("Timeout while checking body", []), - ham - end + {none, none} -> + ?DEBUG("No JIDs/URLs found in message", []), + ham; + {URLs, JIDs} -> + Proc = get_proc_name(Host), + LFrom = + jid:remove_resource( + jid:tolower(From)), + try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of + {spam_filter, Result} -> + Result + catch + exit:{timeout, _} -> + ?WARNING_MSG("Timeout while checking body", []), + ham + end end. -spec extract_urls(binary(), binary()) -> {urls, [url()]} | none. @@ -545,11 +589,11 @@ extract_urls(Host, Body) -> RE = <<"https?://\\S+">>, Options = [global, {capture, all, binary}], case re:run(Body, RE, Options) of - {match, Captured} when is_list(Captured) -> - Urls = resolve_redirects(Host, lists:flatten(Captured)), - {urls, Urls}; - nomatch -> - none + {match, Captured} when is_list(Captured) -> + Urls = resolve_redirects(Host, lists:flatten(Captured)), + {urls, Urls}; + nomatch -> + none end. -spec resolve_redirects(binary(), [url()]) -> [url()]. @@ -558,17 +602,20 @@ resolve_redirects(Host, URLs) -> try gen_server:call(Proc, {resolve_redirects, URLs}) of {spam_filter, ResolvedURLs} -> ResolvedURLs - catch exit:{timeout, _} -> + catch + exit:{timeout, _} -> ?WARNING_MSG("Timeout while resolving redirects: ~p", [URLs]), URLs end. -spec do_resolve_redirects([url()], [url()]) -> [url()]. -do_resolve_redirects([], Result) -> Result; +do_resolve_redirects([], Result) -> + Result; do_resolve_redirects([URL | Rest], Acc) -> - case - httpc:request(get, {URL, [{"user-agent", "curl/8.7.1"}]}, - [{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}], []) + case httpc:request(get, + {URL, [{"user-agent", "curl/8.7.1"}]}, + [{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}], + []) of {ok, {{_, StatusCode, _}, Headers, _Body}} when StatusCode >= 300, StatusCode < 400 -> Location = proplists:get_value("location", Headers), @@ -577,7 +624,7 @@ do_resolve_redirects([URL | Rest], Acc) -> do_resolve_redirects(Rest, [URL | Acc]); false -> do_resolve_redirects([Location | Rest], [URL | Acc]) - end; + end; _Res -> do_resolve_redirects(Rest, [URL | Acc]) end. @@ -587,85 +634,91 @@ extract_jids(Body) -> RE = <<"\\S+@\\S+">>, Options = [global, {capture, all, binary}], case re:run(Body, RE, Options) of - {match, Captured} when is_list(Captured) -> - {jids, lists:filtermap(fun try_decode_jid/1, - lists:flatten(Captured))}; - nomatch -> - none + {match, Captured} when is_list(Captured) -> + {jids, lists:filtermap(fun try_decode_jid/1, lists:flatten(Captured))}; + nomatch -> + none end. -spec try_decode_jid(binary()) -> {true, ljid()} | false. try_decode_jid(S) -> try jid:decode(S) of - #jid{} = JID -> - {true, jid:remove_resource(jid:tolower(JID))} - catch _:{bad_jid, _} -> - false + #jid{} = JID -> + {true, + jid:remove_resource( + jid:tolower(JID))} + catch + _:{bad_jid, _} -> + false end. -spec filter_jid(ljid(), jid_set(), state()) -> {ham | spam, state()}. filter_jid(From, Set, #state{host = Host} = State) -> case sets:is_element(From, Set) of - true -> - ?DEBUG("Spam JID found: ~s", [jid:encode(From)]), + true -> + ?DEBUG("Spam JID found: ~s", [jid:encode(From)]), ejabberd_hooks:run(spam_found, Host, [{jid, From}]), - {spam, State}; - false -> - case cache_lookup(From, State) of - {true, State1} -> - ?DEBUG("Spam JID found: ~s", [jid:encode(From)]), + {spam, State}; + false -> + case cache_lookup(From, State) of + {true, State1} -> + ?DEBUG("Spam JID found: ~s", [jid:encode(From)]), ejabberd_hooks:run(spam_found, Host, [{jid, From}]), - {spam, State1}; - {false, State1} -> - ?DEBUG("JID not listed: ~s", [jid:encode(From)]), - {ham, State1} - end + {spam, State1}; + {false, State1} -> + ?DEBUG("JID not listed: ~s", [jid:encode(From)]), + {ham, State1} + end end. -spec filter_body({urls, [url()]} | {jids, [ljid()]} | none, - url_set() | jid_set(), jid(), state()) - -> {ham | spam, state()}. + url_set() | jid_set(), + jid(), + state()) -> + {ham | spam, state()}. filter_body({_, Addrs}, Set, From, #state{host = Host} = State) -> case lists:any(fun(Addr) -> sets:is_element(Addr, Set) end, Addrs) of - true -> - ?DEBUG("Spam addresses found: ~p", [Addrs]), + true -> + ?DEBUG("Spam addresses found: ~p", [Addrs]), ejabberd_hooks:run(spam_found, Host, [{body, Addrs}]), - {spam, cache_insert(From, State)}; - false -> - ?DEBUG("Addresses not listed: ~p", [Addrs]), - {ham, State} + {spam, cache_insert(From, State)}; + false -> + ?DEBUG("Addresses not listed: ~p", [Addrs]), + {ham, State} end; filter_body(none, _Set, _From, State) -> {ham, State}. --spec reload_files(#{Type :: atom() => filename()}, state()) - -> {ok | {error, binary()}, state()}. +-spec reload_files(#{Type :: atom() => filename()}, state()) -> + {ok | {error, binary()}, state()}. reload_files(Files, #state{host = Host, blocked_domains = BlockedDomains} = State) -> try read_files(Files) of - #{jid := JIDsSet, url := URLsSet, domains := SpamDomainsSet, whitelist_domains := WhitelistDomains} -> - case sets_equal(JIDsSet, State#state.jid_set) of - true -> - ?INFO_MSG("Reloaded spam JIDs for ~s (unchanged)", [Host]); - false -> - ?INFO_MSG("Reloaded spam JIDs for ~s (changed)", [Host]) - end, - case sets_equal(URLsSet, State#state.url_set) of - true -> - ?INFO_MSG("Reloaded spam URLs for ~s (unchanged)", [Host]); - false -> - ?INFO_MSG("Reloaded spam URLs for ~s (changed)", [Host]) - end, - {ok, State#state{jid_set = JIDsSet, - url_set = URLsSet, - blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)), - whitelist_domains = set_to_map(WhitelistDomains, false)} - } - catch {Op, File, Reason} when Op == open; - Op == read -> - Txt = format("Cannot ~s ~s for ~s: ~s", - [Op, File, Host, format_error(Reason)]), - ?ERROR_MSG("~s", [Txt]), - {{error, Txt}, State} + #{jid := JIDsSet, + url := URLsSet, + domains := SpamDomainsSet, + whitelist_domains := WhitelistDomains} -> + case sets_equal(JIDsSet, State#state.jid_set) of + true -> + ?INFO_MSG("Reloaded spam JIDs for ~s (unchanged)", [Host]); + false -> + ?INFO_MSG("Reloaded spam JIDs for ~s (changed)", [Host]) + end, + case sets_equal(URLsSet, State#state.url_set) of + true -> + ?INFO_MSG("Reloaded spam URLs for ~s (unchanged)", [Host]); + false -> + ?INFO_MSG("Reloaded spam URLs for ~s (changed)", [Host]) + end, + {ok, + State#state{jid_set = JIDsSet, + url_set = URLsSet, + blocked_domains = maps:merge(BlockedDomains, set_to_map(SpamDomainsSet)), + whitelist_domains = set_to_map(WhitelistDomains, false)}} + catch + {Op, File, Reason} when Op == open; Op == read -> + Txt = format("Cannot ~s ~s for ~s: ~s", [Op, File, Host, format_error(Reason)]), + ?ERROR_MSG("~s", [Txt]), + {{error, Txt}, State} end. set_to_map(Set) -> @@ -674,54 +727,63 @@ set_to_map(Set) -> set_to_map(Set, V) -> sets:fold(fun(K, M) -> M#{K => V} end, #{}, Set). --spec read_files(#{Type => filename()}) -> #{jid => jid_set(), url => url_set(), Type => sets:set(binary())} - when Type :: atom(). +-spec read_files(#{Type => filename()}) -> + #{jid => jid_set(), + url => url_set(), + Type => sets:set(binary())} + when Type :: atom(). read_files(Files) -> - maps:map(fun(Type, Filename) -> - read_file(Filename, line_parser(Type)) - end, - Files). + maps:map(fun(Type, Filename) -> read_file(Filename, line_parser(Type)) end, Files). -spec line_parser(Type :: atom()) -> fun((binary()) -> binary()). -line_parser(jid) -> fun parse_jid/1; -line_parser(url) -> fun parse_url/1; -line_parser(_) -> fun trim/1. +line_parser(jid) -> + fun parse_jid/1; +line_parser(url) -> + fun parse_url/1; +line_parser(_) -> + fun trim/1. --spec read_file(filename(), fun((binary()) -> ljid() | url())) - -> jid_set() | url_set(). +-spec read_file(filename(), fun((binary()) -> ljid() | url())) -> jid_set() | url_set(). read_file(none, _ParseLine) -> sets:new(); read_file(File, ParseLine) -> case file:open(File, [read, binary, raw, {read_ahead, 65536}]) of - {ok, Fd} -> - try read_line(Fd, ParseLine, sets:new()) - catch throw:E -> throw({read, File, E}) - after ok = file:close(Fd) - end; - {error, Reason} -> - throw({open, File, Reason}) + {ok, Fd} -> + try + read_line(Fd, ParseLine, sets:new()) + catch + E -> + throw({read, File, E}) + after + ok = file:close(Fd) + end; + {error, Reason} -> + throw({open, File, Reason}) end. --spec read_line(file:io_device(), fun((binary()) -> ljid() | url()), - jid_set() | url_set()) - -> jid_set() | url_set(). +-spec read_line(file:io_device(), + fun((binary()) -> ljid() | url()), + jid_set() | url_set()) -> + jid_set() | url_set(). read_line(Fd, ParseLine, Set) -> case file:read_line(Fd) of - {ok, Line} -> - read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set)); - {error, Reason} -> - throw(Reason); - eof -> - Set + {ok, Line} -> + read_line(Fd, ParseLine, sets:add_element(ParseLine(Line), Set)); + {error, Reason} -> + throw(Reason); + eof -> + Set end. -spec parse_jid(binary()) -> ljid(). parse_jid(S) -> try jid:decode(trim(S)) of - #jid{} = JID -> - jid:remove_resource(jid:tolower(JID)) - catch _:{bad_jid, _} -> - throw({bad_jid, S}) + #jid{} = JID -> + jid:remove_resource( + jid:tolower(JID)) + catch + _:{bad_jid, _} -> + throw({bad_jid, S}) end. -spec parse_url(binary()) -> url(). @@ -730,10 +792,10 @@ parse_url(S) -> RE = <<"https?://\\S+$">>, Options = [anchored, caseless, {capture, none}], case re:run(URL, RE, Options) of - match -> - URL; - nomatch -> - throw({bad_url, S}) + match -> + URL; + nomatch -> + throw({bad_url, S}) end. -spec trim(binary()) -> binary(). @@ -741,18 +803,24 @@ trim(S) -> re:replace(S, <<"\\s+$">>, <<>>, [{return, binary}]). -spec reject(stanza()) -> ok. -reject(#message{from = From, to = To, type = Type, lang = Lang} = Msg) - when Type /= groupchat, - Type /= error -> +reject(#message{from = From, + to = To, + type = Type, + lang = Lang} = + Msg) + when Type /= groupchat, Type /= error -> ?INFO_MSG("Rejecting unsolicited message from ~s to ~s", - [jid:encode(From), jid:encode(To)]), + [jid:encode(From), jid:encode(To)]), Txt = <<"Your message is unsolicited">>, Err = xmpp:err_policy_violation(Txt, Lang), maybe_dump_spam(Msg), ejabberd_router:route_error(Msg, Err); -reject(#presence{from = From, to = To, lang = Lang} = Presence) -> +reject(#presence{from = From, + to = To, + lang = Lang} = + Presence) -> ?INFO_MSG("Rejecting unsolicited presence from ~s to ~s", - [jid:encode(From), jid:encode(To)]), + [jid:encode(From), jid:encode(To)]), Txt = <<"Your traffic is unsolicited">>, Err = xmpp:err_policy_violation(Txt, Lang), ejabberd_router:route_error(Presence, Err); @@ -765,12 +833,12 @@ open_dump_file(none, State) -> open_dump_file(Name, State) -> Modes = [append, raw, binary, delayed_write], case file:open(Name, Modes) of - {ok, Fd} -> - ?DEBUG("Opened ~s", [Name]), - State#state{dump_fd = Fd}; - {error, Reason} -> - ?ERROR_MSG("Cannot open dump file ~s: ~s", [Name, file:format_error(Reason)]), - State#state{dump_fd = undefined} + {ok, Fd} -> + ?DEBUG("Opened ~s", [Name]), + State#state{dump_fd = Fd}; + {error, Reason} -> + ?ERROR_MSG("Cannot open dump file ~s: ~s", [Name, file:format_error(Reason)]), + State#state{dump_fd = undefined} end. -spec close_dump_file(filename(), state()) -> ok. @@ -778,10 +846,10 @@ close_dump_file(_Name, #state{dump_fd = undefined}) -> ok; close_dump_file(Name, #state{dump_fd = Fd}) -> case file:close(Fd) of - ok -> - ?DEBUG("Closed ~s", [Name]); - {error, Reason} -> - ?ERROR_MSG("Cannot close ~s: ~s", [Name, file:format_error(Reason)]) + ok -> + ?DEBUG("Closed ~s", [Name]); + {error, Reason} -> + ?ERROR_MSG("Cannot close ~s: ~s", [Name, file:format_error(Reason)]) end. -spec reopen_dump_file(state()) -> state(). @@ -797,7 +865,8 @@ maybe_dump_spam(#message{to = #jid{lserver = LServer}} = Msg) -> Proc = get_proc_name(LServer), Time = erlang:timestamp(), Msg1 = misc:add_delay_info(Msg, By, Time), - XML = fxml:element_to_binary(xmpp:encode(Msg1)), + XML = fxml:element_to_binary( + xmpp:encode(Msg1)), gen_server:cast(Proc, {dump, XML}). -spec get_proc_name(binary()) -> atom(). @@ -837,7 +906,7 @@ format_error(Reason) -> cache_insert(_LJID, #state{max_cache_size = 0} = State) -> State; cache_insert(LJID, #state{jid_cache = Cache, max_cache_size = MaxSize} = State) - when MaxSize /= unlimited, map_size(Cache) >= MaxSize -> + when MaxSize /= unlimited, map_size(Cache) >= MaxSize -> cache_insert(LJID, shrink_cache(State)); cache_insert(LJID, #state{jid_cache = Cache} = State) -> ?INFO_MSG("Caching spam JID: ~s", [jid:encode(LJID)]), @@ -847,12 +916,12 @@ cache_insert(LJID, #state{jid_cache = Cache} = State) -> -spec cache_lookup(ljid(), state()) -> {boolean(), state()}. cache_lookup(LJID, #state{jid_cache = Cache} = State) -> case Cache of - #{LJID := _Timestamp} -> - Cache1 = Cache#{LJID => erlang:monotonic_time(second)}, - State1 = State#state{jid_cache = Cache1}, - {true, State1}; - #{} -> - {false, State} + #{LJID := _Timestamp} -> + Cache1 = Cache#{LJID => erlang:monotonic_time(second)}, + State1 = State#state{jid_cache = Cache1}, + {true, State1}; + #{} -> + {false, State} end. -spec shrink_cache(state()) -> state(). @@ -860,7 +929,9 @@ shrink_cache(#state{jid_cache = Cache, max_cache_size = MaxSize} = State) -> ShrinkedSize = round(MaxSize / 2), N = map_size(Cache) - ShrinkedSize, L = lists:keysort(2, maps:to_list(Cache)), - Cache1 = maps:from_list(lists:nthtail(N, L)), + Cache1 = + maps:from_list( + lists:nthtail(N, L)), State#state{jid_cache = Cache1}. -spec expire_cache(integer(), state()) -> {{ok, binary()}, state()}. @@ -881,11 +952,11 @@ add_to_cache(LJID, State) -> drop_from_cache(LJID, #state{jid_cache = Cache} = State) -> Cache1 = maps:remove(LJID, Cache), if map_size(Cache1) < map_size(Cache) -> - Txt = format("~s removed from cache", [jid:encode(LJID)]), - {{ok, Txt}, State#state{jid_cache = Cache1}}; + Txt = format("~s removed from cache", [jid:encode(LJID)]), + {{ok, Txt}, State#state{jid_cache = Cache1}}; true -> - Txt = format("~s wasn't cached", [jid:encode(LJID)]), - {{ok, Txt}, State} + Txt = format("~s wasn't cached", [jid:encode(LJID)]), + {{ok, Txt}, State} end. %%-------------------------------------------------------------------- @@ -893,78 +964,96 @@ drop_from_cache(LJID, #state{jid_cache = Cache} = State) -> %%-------------------------------------------------------------------- -spec get_commands_spec() -> [ejabberd_commands()]. get_commands_spec() -> - [#ejabberd_commands{name = reload_spam_filter_files, tags = [filter], - desc = "Reload spam JID/URL files", - module = ?MODULE, function = reload_spam_filter_files, - args = [{host, binary}], - result = {res, rescode}}, - #ejabberd_commands{name = get_spam_filter_cache, tags = [filter], - desc = "Show spam filter cache contents", - module = ?MODULE, function = get_spam_filter_cache, - args = [{host, binary}], - result = {spammers, {list, {spammer, {tuple, - [{jid, string}, {timestamp, integer}]}}}}}, - #ejabberd_commands{name = expire_spam_filter_cache, tags = [filter], - desc = "Remove old/unused spam JIDs from cache", - module = ?MODULE, function = expire_spam_filter_cache, - args = [{host, binary}, {seconds, integer}], - result = {res, restuple}}, - #ejabberd_commands{name = add_to_spam_filter_cache, tags = [filter], - desc = "Add JID to spam filter cache", - module = ?MODULE, - function = add_to_spam_filter_cache, - args = [{host, binary}, {jid, binary}], - result = {res, restuple}}, - #ejabberd_commands{name = drop_from_spam_filter_cache, tags = [filter], - desc = "Drop JID from spam filter cache", - module = ?MODULE, - function = drop_from_spam_filter_cache, - args = [{host, binary}, {jid, binary}], - result = {res, restuple}}, - #ejabberd_commands{name = get_blocked_domains, tags = [filter], - desc = "Get list of domains being blocked", - module = ?MODULE, - function = get_blocked_domains, - args = [{host, binary}], - result = {blocked_domains, {list, {jid, string}}}}, - #ejabberd_commands{name = add_blocked_domain, tags = [filter], - desc = "Add domain to list of blocked domains", - module = ?MODULE, - function = add_blocked_domain, - args = [{host, binary}, {domain, binary}], - result = {res, restuple}}, - #ejabberd_commands{name = remove_blocked_domain, tags = [filter], - desc = "Remove domain from list of blocked domains", - module = ?MODULE, - function = remove_blocked_domain, - args = [{host, binary}, {domain, binary}], - result = {res, restuple}} - ]. + [#ejabberd_commands{name = reload_spam_filter_files, + tags = [filter], + desc = "Reload spam JID/URL files", + module = ?MODULE, + function = reload_spam_filter_files, + args = [{host, binary}], + result = {res, rescode}}, + #ejabberd_commands{name = get_spam_filter_cache, + tags = [filter], + desc = "Show spam filter cache contents", + module = ?MODULE, + function = get_spam_filter_cache, + args = [{host, binary}], + result = + {spammers, + {list, {spammer, {tuple, [{jid, string}, {timestamp, integer}]}}}}}, + #ejabberd_commands{name = expire_spam_filter_cache, + tags = [filter], + desc = "Remove old/unused spam JIDs from cache", + module = ?MODULE, + function = expire_spam_filter_cache, + args = [{host, binary}, {seconds, integer}], + result = {res, restuple}}, + #ejabberd_commands{name = add_to_spam_filter_cache, + tags = [filter], + desc = "Add JID to spam filter cache", + module = ?MODULE, + function = add_to_spam_filter_cache, + args = [{host, binary}, {jid, binary}], + result = {res, restuple}}, + #ejabberd_commands{name = drop_from_spam_filter_cache, + tags = [filter], + desc = "Drop JID from spam filter cache", + module = ?MODULE, + function = drop_from_spam_filter_cache, + args = [{host, binary}, {jid, binary}], + result = {res, restuple}}, + #ejabberd_commands{name = get_blocked_domains, + tags = [filter], + desc = "Get list of domains being blocked", + module = ?MODULE, + function = get_blocked_domains, + args = [{host, binary}], + result = {blocked_domains, {list, {jid, string}}}}, + #ejabberd_commands{name = add_blocked_domain, + tags = [filter], + desc = "Add domain to list of blocked domains", + module = ?MODULE, + function = add_blocked_domain, + args = [{host, binary}, {domain, binary}], + result = {res, restuple}}, + #ejabberd_commands{name = remove_blocked_domain, + tags = [filter], + desc = "Remove domain from list of blocked domains", + module = ?MODULE, + function = remove_blocked_domain, + args = [{host, binary}, {domain, binary}], + result = {res, restuple}}]. for_all_hosts(F, A) -> - try lists:map( - fun(Host) -> - apply(F, [Host | A]) - end, get_spam_filter_hosts()) of - List -> - case lists:filter(fun({error, _}) -> true; (_) -> false end, List) of - [] -> hd(List); - Errors -> hd(Errors) - end - catch error:{badmatch, {error, _Reason} = Error} -> - Error + try lists:map(fun(Host) -> apply(F, [Host | A]) end, get_spam_filter_hosts()) of + List -> + case lists:filter(fun ({error, _}) -> + true; + (_) -> + false + end, + List) + of + [] -> + hd(List); + Errors -> + hd(Errors) + end + catch + error:{badmatch, {error, _Reason} = Error} -> + Error end. try_call_by_host(Host, Call) -> LServer = jid:nameprep(Host), Proc = get_proc_name(LServer), try gen_server:call(Proc, Call, ?COMMAND_TIMEOUT) of - Result -> - Result - catch exit:{noproc, _} -> - {error, "Not configured for " ++ binary_to_list(Host)}; - exit:{timeout, _} -> - {error, "Timeout while querying ejabberd"} + Result -> + Result + catch + exit:{noproc, _} -> + {error, "Not configured for " ++ binary_to_list(Host)}; + exit:{timeout, _} -> + {error, "Timeout while querying ejabberd"} end. -spec reload_spam_filter_files(binary()) -> ok | {error, string()}. @@ -972,25 +1061,32 @@ reload_spam_filter_files(<<"global">>) -> for_all_hosts(fun reload_spam_filter_files/1, []); reload_spam_filter_files(Host) -> LServer = jid:nameprep(Host), - Files = #{domains => gen_mod:get_module_opt(LServer, ?MODULE, spam_domains_file), - jid => gen_mod:get_module_opt(LServer, ?MODULE, spam_jids_file), - url => gen_mod:get_module_opt(LServer, ?MODULE, spam_urls_file)}, + Files = + #{domains => gen_mod:get_module_opt(LServer, ?MODULE, spam_domains_file), + jid => gen_mod:get_module_opt(LServer, ?MODULE, spam_jids_file), + url => gen_mod:get_module_opt(LServer, ?MODULE, spam_urls_file)}, case try_call_by_host(Host, {reload_files, Files}) of - {spam_filter, ok} -> - ok; - {spam_filter, {error, Txt}} -> - {error, binary_to_list(Txt)}; - {error, _R} = Error -> - Error + {spam_filter, ok} -> + ok; + {spam_filter, {error, Txt}} -> + {error, binary_to_list(Txt)}; + {error, _R} = Error -> + Error end. -spec get_blocked_domains(binary()) -> [binary()]. get_blocked_domains(Host) -> case try_call_by_host(Host, get_blocked_domains) of - {blocked_domains, BlockedDomains} -> - maps:keys(maps:filter(fun(_, false) -> false; (_, _) -> true end, BlockedDomains)); - {error, _R} = Error -> - Error + {blocked_domains, BlockedDomains} -> + maps:keys( + maps:filter(fun (_, false) -> + false; + (_, _) -> + true + end, + BlockedDomains)); + {error, _R} = Error -> + Error end. -spec add_blocked_domain(binary(), binary()) -> {ok, string()}. @@ -998,10 +1094,10 @@ add_blocked_domain(<<"global">>, Domain) -> for_all_hosts(fun add_blocked_domain/2, [Domain]); add_blocked_domain(Host, Domain) -> case try_call_by_host(Host, {add_blocked_domain, Domain}) of - {spam_filter, {Status, Txt}} -> - {Status, binary_to_list(Txt)}; - {error, _R} = Error -> - Error + {spam_filter, {Status, Txt}} -> + {Status, binary_to_list(Txt)}; + {error, _R} = Error -> + Error end. -spec remove_blocked_domain(binary(), binary()) -> {ok, string()}. @@ -1009,21 +1105,19 @@ remove_blocked_domain(<<"global">>, Domain) -> for_all_hosts(fun remove_blocked_domain/2, [Domain]); remove_blocked_domain(Host, Domain) -> case try_call_by_host(Host, {remove_blocked_domain, Domain}) of - {spam_filter, {Status, Txt}} -> - {Status, binary_to_list(Txt)}; - {error, _R} = Error -> - Error + {spam_filter, {Status, Txt}} -> + {Status, binary_to_list(Txt)}; + {error, _R} = Error -> + Error end. --spec get_spam_filter_cache(binary()) - -> [{binary(), integer()}] | {error, string()}. +-spec get_spam_filter_cache(binary()) -> [{binary(), integer()}] | {error, string()}. get_spam_filter_cache(Host) -> case try_call_by_host(Host, get_cache) of - {spam_filter, Cache} -> - [{jid:encode(JID), TS + erlang:time_offset(second)} || - {JID, TS} <- Cache]; - {error, _R} = Error -> - Error + {spam_filter, Cache} -> + [{jid:encode(JID), TS + erlang:time_offset(second)} || {JID, TS} <- Cache]; + {error, _R} = Error -> + Error end. -spec expire_spam_filter_cache(binary(), integer()) -> {ok | error, string()}. @@ -1031,27 +1125,31 @@ expire_spam_filter_cache(<<"global">>, Age) -> for_all_hosts(fun expire_spam_filter_cache/2, [Age]); expire_spam_filter_cache(Host, Age) -> case try_call_by_host(Host, {expire_cache, Age}) of - {spam_filter, {Status, Txt}} -> - {Status, binary_to_list(Txt)}; - {error, _R} = Error -> - Error + {spam_filter, {Status, Txt}} -> + {Status, binary_to_list(Txt)}; + {error, _R} = Error -> + Error end. --spec add_to_spam_filter_cache(binary(), binary()) -> [{binary(), integer()}] | {error, string()}. +-spec add_to_spam_filter_cache(binary(), binary()) -> + [{binary(), integer()}] | {error, string()}. add_to_spam_filter_cache(<<"global">>, JID) -> for_all_hosts(fun add_to_spam_filter_cache/2, [JID]); add_to_spam_filter_cache(Host, EncJID) -> try jid:decode(EncJID) of - #jid{} = JID -> - LJID = jid:remove_resource(jid:tolower(JID)), - case try_call_by_host(Host, {add_to_cache, LJID}) of - {spam_filter, {Status, Txt}} -> - {Status, binary_to_list(Txt)}; - {error, _R} = Error -> - Error - end - catch _:{bad_jid, _} -> - {error, "Not a valid JID: " ++ binary_to_list(EncJID)} + #jid{} = JID -> + LJID = + jid:remove_resource( + jid:tolower(JID)), + case try_call_by_host(Host, {add_to_cache, LJID}) of + {spam_filter, {Status, Txt}} -> + {Status, binary_to_list(Txt)}; + {error, _R} = Error -> + Error + end + catch + _:{bad_jid, _} -> + {error, "Not a valid JID: " ++ binary_to_list(EncJID)} end. -spec drop_from_spam_filter_cache(binary(), binary()) -> {ok | error, string()}. @@ -1059,14 +1157,17 @@ drop_from_spam_filter_cache(<<"global">>, JID) -> for_all_hosts(fun drop_from_spam_filter_cache/2, [JID]); drop_from_spam_filter_cache(Host, EncJID) -> try jid:decode(EncJID) of - #jid{} = JID -> - LJID = jid:remove_resource(jid:tolower(JID)), - case try_call_by_host(Host, {drop_from_cache, LJID}) of - {spam_filter, {Status, Txt}} -> - {Status, binary_to_list(Txt)}; - {error, _R} = Error -> - Error - end - catch _:{bad_jid, _} -> - {error, "Not a valid JID: " ++ binary_to_list(EncJID)} + #jid{} = JID -> + LJID = + jid:remove_resource( + jid:tolower(JID)), + case try_call_by_host(Host, {drop_from_cache, LJID}) of + {spam_filter, {Status, Txt}} -> + {Status, binary_to_list(Txt)}; + {error, _R} = Error -> + Error + end + catch + _:{bad_jid, _} -> + {error, "Not a valid JID: " ++ binary_to_list(EncJID)} end. diff --git a/src/mod_antispam_rtbl.erl b/src/mod_antispam_rtbl.erl index d977aa0c6..93b346631 100644 --- a/src/mod_antispam_rtbl.erl +++ b/src/mod_antispam_rtbl.erl @@ -38,11 +38,15 @@ subscribe/3, unsubscribe/3]). +%% @format-begin + subscribe(RTBLHost, RTBLDomainsNode, From) -> FromJID = service_jid(From), - SubIQ = #iq{type = set, to = jid:make(RTBLHost), from = FromJID, - sub_els = [ - #pubsub{subscribe = #ps_subscribe{jid = FromJID, node = RTBLDomainsNode}}]}, + SubIQ = + #iq{type = set, + to = jid:make(RTBLHost), + from = FromJID, + sub_els = [#pubsub{subscribe = #ps_subscribe{jid = FromJID, node = RTBLDomainsNode}}]}, ?DEBUG("Sending subscription request:~n~p", [xmpp:encode(SubIQ)]), ejabberd_router:route_iq(SubIQ, subscribe_result, self()). @@ -51,19 +55,22 @@ unsubscribe(none, _PSNode, _From) -> ok; unsubscribe(RTBLHost, RTBLDomainsNode, From) -> FromJID = jid:make(From), - SubIQ = #iq{type = set, to = jid:make(RTBLHost), from = FromJID, - sub_els = [ - #pubsub{unsubscribe = #ps_unsubscribe{jid = FromJID, node = RTBLDomainsNode}}]}, + SubIQ = + #iq{type = set, + to = jid:make(RTBLHost), + from = FromJID, + sub_els = + [#pubsub{unsubscribe = #ps_unsubscribe{jid = FromJID, node = RTBLDomainsNode}}]}, ejabberd_router:route_iq(SubIQ, unsubscribe_result, self()). -spec request_blocked_domains(binary() | none, binary(), binary()) -> ok. request_blocked_domains(none, _PSNode, _From) -> ok; request_blocked_domains(RTBLHost, RTBLDomainsNode, From) -> - IQ = #iq{type = get, from = jid:make(From), - to = jid:make(RTBLHost), - sub_els = [ - #pubsub{items = #ps_items{node = RTBLDomainsNode}}]}, + IQ = #iq{type = get, + from = jid:make(From), + to = jid:make(RTBLHost), + sub_els = [#pubsub{items = #ps_items{node = RTBLDomainsNode}}]}, ?DEBUG("Requesting RTBL blocked domains from ~s:~n~p", [RTBLHost, xmpp:encode(IQ)]), ejabberd_router:route_iq(IQ, blocked_domains, self()). @@ -72,31 +79,35 @@ parse_blocked_domains(#iq{to = #jid{lserver = LServer}, type = result} = IQ) -> ?DEBUG("parsing iq-result items: ~p", [IQ]), RTBLDomainsNode = gen_mod:get_module_opt(LServer, ?SERVICE_MODULE, rtbl_domains_node), case xmpp:get_subtag(IQ, #pubsub{}) of - #pubsub{items = #ps_items{node = RTBLDomainsNode, items = Items}} -> - ?DEBUG("Got items:~n~p", [Items]), - parse_items(Items); - _ -> - undefined + #pubsub{items = #ps_items{node = RTBLDomainsNode, items = Items}} -> + ?DEBUG("Got items:~n~p", [Items]), + parse_items(Items); + _ -> + undefined end. -spec parse_pubsub_event(stanza()) -> #{binary() => any()}. parse_pubsub_event(#message{to = #jid{lserver = LServer}} = Msg) -> RTBLDomainsNode = gen_mod:get_module_opt(LServer, ?SERVICE_MODULE, rtbl_domains_node), case xmpp:get_subtag(Msg, #ps_event{}) of - #ps_event{items = #ps_items{node = RTBLDomainsNode, items = Items, retract = RetractIds}} -> - maps:merge(retract_items(RetractIds), parse_items(Items)); - Other -> - ?WARNING_MSG("Couldn't extract items: ~p", [Other]), - #{} + #ps_event{items = + #ps_items{node = RTBLDomainsNode, + items = Items, + retract = RetractIds}} -> + maps:merge(retract_items(RetractIds), parse_items(Items)); + Other -> + ?WARNING_MSG("Couldn't extract items: ~p", [Other]), + #{} end. -spec parse_items([ps_item()]) -> #{binary() => any()}. parse_items(Items) -> - lists:foldl( - fun(#ps_item{id = ID}, Acc) -> - %% TODO extract meta/extra instructions - maps:put(ID, true, Acc) - end, #{}, Items). + lists:foldl(fun(#ps_item{id = ID}, Acc) -> + %% TODO extract meta/extra instructions + maps:put(ID, true, Acc) + end, + #{}, + Items). -spec retract_items([binary()]) -> #{binary() => false}. retract_items(Ids) -> @@ -112,20 +123,22 @@ service_jid(Host) -> -spec pubsub_event_handler(stanza()) -> drop | stanza(). pubsub_event_handler(#message{from = FromJid, - to = #jid{lserver = LServer, - lresource = <>}} = Msg) -> + to = + #jid{lserver = LServer, + lresource = <>}} = + Msg) -> ?DEBUG("Got RTBL message:~n~p", [Msg]), From = jid:encode(FromJid), case gen_mod:get_module_opt(LServer, ?SERVICE_MODULE, rtbl_host) of - From -> - ParsedItems = parse_pubsub_event(Msg), - Proc = gen_mod:get_module_proc(LServer, ?SERVICE_MODULE), - gen_server:cast(Proc, {update_blocked_domains, ParsedItems}), - %% FIXME what's the difference between `{drop, ...}` and `{stop, {drop, ...}}`? - drop; - _Other -> - ?INFO_MSG("Got unexpected message from ~s to rtbl resource:~n~p", [From, Msg]), - Msg + From -> + ParsedItems = parse_pubsub_event(Msg), + Proc = gen_mod:get_module_proc(LServer, ?SERVICE_MODULE), + gen_server:cast(Proc, {update_blocked_domains, ParsedItems}), + %% FIXME what's the difference between `{drop, ...}` and `{stop, {drop, ...}}`? + drop; + _Other -> + ?INFO_MSG("Got unexpected message from ~s to rtbl resource:~n~p", [From, Msg]), + Msg end; pubsub_event_handler(Acc) -> ?DEBUG("unexpected something on pubsub_event_handler: ~p", [Acc]), diff --git a/test/antispam_tests.erl b/test/antispam_tests.erl index d7ce196c0..6d94b8750 100644 --- a/test/antispam_tests.erl +++ b/test/antispam_tests.erl @@ -31,6 +31,8 @@ my_muc_jid/1, get_features/2, set_opt/3]). -include("suite.hrl"). +%% @format-begin + %%%=================================================================== %%% API %%%=================================================================== @@ -38,35 +40,51 @@ %%% Single tests %%%=================================================================== single_cases() -> - {antispam_single, [sequence], - [single_test(spam_files), - single_test(blocked_domains), - single_test(jid_cache), - single_test(rtbl_domains)]}. + {antispam_single, + [sequence], + [single_test(spam_files), + single_test(blocked_domains), + single_test(jid_cache), + single_test(rtbl_domains)]}. spam_files(Config) -> Host = ?config(server, Config), To = my_jid(Config), SpamJID = jid:make(<<"spammer_jid">>, <<"localhost">>, <<"spam_client">>), - SpamJIDMsg = #message{from = SpamJID, to = To, type = chat, body = [#text{data = <<"hello world">>}]}, + SpamJIDMsg = + #message{from = SpamJID, + to = To, + type = chat, + body = [#text{data = <<"hello world">>}]}, is_spam(SpamJIDMsg), Spammer = jid:make(<<"spammer">>, <<"localhost">>, <<"spam_client">>), - NoSpamMsg = #message{from = Spammer, to = To, type = chat, body = [#text{data = <<"hello world">>}]}, + NoSpamMsg = + #message{from = Spammer, + to = To, + type = chat, + body = [#text{data = <<"hello world">>}]}, is_not_spam(NoSpamMsg), - SpamMsg = #message{from = Spammer, to = To, type = chat, body = [#text{data = <<"hello world\nhttps://spam.domain.url">>}]}, + SpamMsg = + #message{from = Spammer, + to = To, + type = chat, + body = [#text{data = <<"hello world\nhttps://spam.domain.url">>}]}, is_spam(SpamMsg), %% now check this mischief is in jid_cache is_spam(NoSpamMsg), mod_antispam:drop_from_spam_filter_cache(Host, jid:to_string(Spammer)), is_not_spam(NoSpamMsg), - ?retry(100, 10, - ?match(true, (has_spam_domain(<<"spam_domain.org">>))(Host))), + ?retry(100, 10, ?match(true, (has_spam_domain(<<"spam_domain.org">>))(Host))), SpamDomain = jid:make(<<"spammer">>, <<"spam_domain.org">>, <<"spam_client">>), - SpamDomainMsg = #message{from = SpamDomain, to = To, type = chat, body = [#text{data = <<"hello world">>}]}, + SpamDomainMsg = + #message{from = SpamDomain, + to = To, + type = chat, + body = [#text{data = <<"hello world">>}]}, is_spam(SpamDomainMsg), ?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam_domain.org">>)), ?match([], mod_antispam:get_blocked_domains(Host)), @@ -78,9 +96,12 @@ blocked_domains(Config) -> ?match([], mod_antispam:get_blocked_domains(Host)), SpamFrom = jid:make(<<"spammer">>, <<"spam.domain">>, <<"spam_client">>), To = my_jid(Config), - Msg = #message{from = SpamFrom, to = To, type = chat, body = [#text{data = <<"hello world">>}]}, + Msg = #message{from = SpamFrom, + to = To, + type = chat, + body = [#text{data = <<"hello world">>}]}, is_not_spam(Msg), - ?match({ok, _}, mod_antispam:add_blocked_domain(<<"global">>, <<"spam.domain">>)), + ?match({ok, _}, mod_antispam:add_blocked_domain(<<"global">>, <<"spam.domain">>)), is_spam(Msg), Vhosts = [H || H <- ejabberd_option:hosts(), gen_mod:is_loaded(H, mod_antispam)], NumVhosts = length(Vhosts), @@ -102,7 +123,10 @@ jid_cache(Config) -> Host = ?config(server, Config), SpamFrom = jid:make(<<"spammer">>, Host, <<"spam_client">>), To = my_jid(Config), - Msg = #message{from = SpamFrom, to = To, type = chat, body = [#text{data = <<"hello world">>}]}, + Msg = #message{from = SpamFrom, + to = To, + type = chat, + body = [#text{data = <<"hello world">>}]}, is_not_spam(Msg), mod_antispam:add_to_spam_filter_cache(Host, jid:to_string(SpamFrom)), is_spam(Msg), @@ -112,25 +136,45 @@ jid_cache(Config) -> rtbl_domains(Config) -> Host = ?config(server, Config), - RTBLHost = jid:to_string(suite:pubsub_jid(Config)), + RTBLHost = + jid:to_string( + suite:pubsub_jid(Config)), RTBLDomainsNode = <<"spam_source_domains">>, OldOpts = gen_mod:get_module_opts(Host, mod_antispam), - NewOpts = maps:merge(OldOpts, #{rtbl_host => RTBLHost, rtbl_domains_node => RTBLDomainsNode}), + NewOpts = + maps:merge(OldOpts, #{rtbl_host => RTBLHost, rtbl_domains_node => RTBLDomainsNode}), Owner = jid:make(?config(user, Config), ?config(server, Config), <<>>), - {result, _} = mod_pubsub:create_node(RTBLHost, ?config(server, Config), RTBLDomainsNode, Owner, <<"flat">>), - {result, _} = mod_pubsub:publish_item(RTBLHost, ?config(server, Config), RTBLDomainsNode, Owner, <<"spam.source.domain">>, - [xmpp:encode(#ps_item{id = <<"spam.source.domain">>, sub_els = []})]), + {result, _} = + mod_pubsub:create_node(RTBLHost, + ?config(server, Config), + RTBLDomainsNode, + Owner, + <<"flat">>), + {result, _} = + mod_pubsub:publish_item(RTBLHost, + ?config(server, Config), + RTBLDomainsNode, + Owner, + <<"spam.source.domain">>, + [xmpp:encode(#ps_item{id = <<"spam.source.domain">>, + sub_els = []})]), mod_antispam:reload(Host, OldOpts, NewOpts), ?match({ok, _}, mod_antispam:remove_blocked_domain(Host, <<"spam_domain.org">>)), - ?retry(100, 10, + ?retry(100, + 10, ?match([<<"spam.source.domain">>], mod_antispam:get_blocked_domains(Host))), - {result, _} = mod_pubsub:publish_item(RTBLHost, ?config(server, Config), RTBLDomainsNode, Owner, <<"spam.source.another">>, - [xmpp:encode(#ps_item{id = <<"spam.source.another">>, sub_els = []})]), - ?retry(100, 10, - ?match(true, (has_spam_domain(<<"spam.source.another">>))(Host))), - {result, _} = mod_pubsub:delete_item(RTBLHost, RTBLDomainsNode, Owner, <<"spam.source.another">>, true), - ?retry(100, 10, - ?match(false, (has_spam_domain(<<"spam.source.another">>))(Host))), + {result, _} = + mod_pubsub:publish_item(RTBLHost, + ?config(server, Config), + RTBLDomainsNode, + Owner, + <<"spam.source.another">>, + [xmpp:encode(#ps_item{id = <<"spam.source.another">>, + sub_els = []})]), + ?retry(100, 10, ?match(true, (has_spam_domain(<<"spam.source.another">>))(Host))), + {result, _} = + mod_pubsub:delete_item(RTBLHost, RTBLDomainsNode, Owner, <<"spam.source.another">>, true), + ?retry(100, 10, ?match(false, (has_spam_domain(<<"spam.source.another">>))(Host))), {result, _} = mod_pubsub:delete_node(RTBLHost, RTBLDomainsNode, Owner), disconnect(Config). @@ -141,9 +185,7 @@ single_test(T) -> list_to_atom("antispam_" ++ atom_to_list(T)). has_spam_domain(Domain) -> - fun(Host) -> - lists:member(Domain, mod_antispam:get_blocked_domains(Host)) - end. + fun(Host) -> lists:member(Domain, mod_antispam:get_blocked_domains(Host)) end. is_not_spam(Msg) -> ?match({Msg, undefined}, mod_antispam:s2s_receive_packet({Msg, undefined})).