2021/07/19

Distributed OTP Applications

OTP application 可以轉換為 distributed application,用途是在多個 erlang cluster nodes 之間,distributed application 能夠在這些節點中間,只運作一個 application。

OTP distributed application 能夠設定為一個運作的主節點,其他節點則是在該主節點失效時,能夠接手選擇產生另一個 application 繼續運作,這是 failover。當主節點恢復時,這個 application 會重新在主節點啟動,原本接手的節點會停止該 application,這是 takeover。運作的細節可參考 Distributed OTP Applications

以該文章的 8ball 實例,測試三個節點運作 OTP distributed application 的狀況。

產生 app

# 透過 rebar 產生 app
rebar create-app appid=m8ball

修改 src 裡面的檔案

m8ball.app.src

{application, m8ball,
 [{vsn, "1.0.0"},
  {description, "Answer vital questions"},
%%  {modules, [m8ball, m8ball_sup, m8ball_server]},
  {applications, [stdlib, kernel, crypto]},
%%  {registered, [m8ball, m8ball_sup, m8ball_server]},
  {mod, {m8ball, []}},
  {env, [
    {answers, {<<"Yes">>, <<"No">>, <<"Doubtful">>,
               <<"I don't like your tone">>, <<"Of course">>,
               <<"Of course not">>, <<"*backs away slowly and runs away*">>}}
  ]}
 ]}.

m8ball.erl

-module(m8ball).
-behaviour(application).
-export([start/2, stop/1]).
-export([ask/1]).

%%%%%%%%%%%%%%%%%
%%% CALLBACKS %%%
%%%%%%%%%%%%%%%%%

%% start({failover, Node}, Args) is only called
%% when a start_phase key is defined.
%% application:which_applications().
start(normal, []) ->
    io:format("application normal start m8ball~n"),
    m8ball_sup:start_link();
start({takeover, OtherNode}, []) ->
    io:format("application takeover m8ball from ~p~n", [OtherNode]),
    m8ball_sup:start_link().

stop(_State) ->
    io:format("application stop m8ball~n"),
    ok.

%%%%%%%%%%%%%%%%%
%%% INTERFACE %%%
%%%%%%%%%%%%%%%%%
ask(Question) ->
    m8ball_server:ask(Question).

m8ball_server.erl

-module(m8ball_server).
-behaviour(gen_server).
-export([start_link/0, stop/0, ask/1]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
         code_change/3, terminate/2]).

%%%%%%%%%%%%%%%%%
%%% INTERFACE %%%
%%%%%%%%%%%%%%%%%
start_link() ->
    gen_server:start_link({global, ?MODULE}, ?MODULE, [], []).

stop() ->
    gen_server:call({global, ?MODULE}, stop).

ask(_Question) -> % the question doesn't matter!
    gen_server:call({global, ?MODULE}, question).

%%%%%%%%%%%%%%%%%
%%% CALLBACKS %%%
%%%%%%%%%%%%%%%%%
init([]) ->
    % <<A:32, B:32, C:32>> = crypto:strong_rand_bytes(12),
    % rand:seed(A,B,C),
    <<I1:32/unsigned-integer, I2:32/unsigned-integer, I3:32/unsigned-integer>> = crypto:strong_rand_bytes(12),
    rand:seed(exsplus, {I1, I2, I3}),
    {ok, []}.

handle_call(question, _From, State) ->
    {ok, Answers} = application:get_env(m8ball, answers),
    Answer = element(rand:uniform(tuple_size(Answers)), Answers),
    {reply, Answer, State};
handle_call(stop, _From, State) ->
    {stop, normal, ok, State};
handle_call(_Call, _From, State) ->
    {noreply, State}.

handle_cast(_Cast, State) ->
    {noreply, State}.

handle_info(_Info, State) ->
    {noreply, State}.

code_change(_OldVsn, State, _Extra) ->
    {ok, State}.

terminate(_Reason, _State) ->
    ok.

m8ball_sup.erl

-module(m8ball_sup).
-behaviour(supervisor).
-export([start_link/0, init/1]).

start_link() ->
    supervisor:start_link({global,?MODULE}, ?MODULE, []).

init([]) ->
    {ok, {{one_for_one, 1, 10},
          [{m8ball,
            {m8ball_server, start_link, []},
            permanent,
            5000,
            worker,
            [m8ball_server]
          }]}}.

config files

a.config

[{kernel,
  [{distributed, [{m8ball,
                   3000,
                  [a@cmbp, {b@cmbp, c@cmbp}]}]},
   {sync_nodes_mandatory, []},
   {sync_nodes_optional, [b@cmbp, c@cmbp]},
   {sync_nodes_timeout, 5000}
  ]
 }
].

b.config

[{kernel,
  [{distributed, [{m8ball,
                   3000,
                  [a@cmbp, {b@cmbp, c@cmbp}]}]},
   {sync_nodes_mandatory, []},
   {sync_nodes_optional, [a@cmbp, c@cmbp]},
   {sync_nodes_timeout, 5000}
  ]
 }
].

c.config

[{kernel,
  [{distributed, [{m8ball,
                   3000,
                  [a@cmbp, {b@cmbp, c@cmbp}]}]},
   {sync_nodes_mandatory, []},
   {sync_nodes_optional, [a@cmbp, b@cmbp]},
   {sync_nodes_timeout, 5000}
  ]
 }
].

startup script

runa.sh

# erl -sname a -config a.config -pa ebin -eval 'application:start(crypto), application:start(m8ball)'

erl -sname a -config a.config -pa ebin -eval 'application:ensure_all_started(m8ball)'

runb.sh

# erl -sname b -config b.config -pa ebin -eval 'application:start(crypto), application:start(m8ball)'

erl -sname b -config b.config -pa ebin -eval 'application:ensure_all_started(m8ball)'

runc.sh

# erl -sname c -config c.config -pa ebin -eval 'application:start(crypto), application:start(m8ball)'

erl -sname c -config c.config -pa ebin -eval 'application:ensure_all_started(m8ball)'

compile

rebar compile

測試

先啟動 node a,因為 b, c 還沒有啟動,這時候, a 在啟動時,會等 5s,看看 b, c 是不是有啟動。 5s 後,才會進入 console。

application:which_applications(). 可以查詢目前運作的 applications

./runa.sh

Erlang/OTP 20 [erts-9.3] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:10] [kernel-poll:false]

Eshell V9.3  (abort with ^G)
(a@cmbp)1> application:which_applications().
[{m8ball,"Answer vital questions","1.0.0"},
 {crypto,"CRYPTO","4.2.1"},
 {stdlib,"ERTS  CXC 138 10","3.4.4"},
 {kernel,"ERTS  CXC 138 10","5.4.3"}]

如果啟動 b,一樣會等 5s。但如果同時再啟動 c,就會直接進入 console。

./runb.sh
Erlang/OTP 20 [erts-9.3] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:10] [kernel-poll:false]

Eshell V9.3  (abort with ^G)
(b@cmbp)1> application:which_applications().
[{crypto,"CRYPTO","4.2.1"},
 {stdlib,"ERTS  CXC 138 10","3.4.4"},
 {kernel,"ERTS  CXC 138 10","5.4.3"}]
./runc.sh
Erlang/OTP 20 [erts-9.3] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:10] [kernel-poll:false]

Eshell V9.3  (abort with ^G)
(c@cmbp)1> application:which_applications().
[{crypto,"CRYPTO","4.2.1"},
 {stdlib,"ERTS  CXC 138 10","3.4.4"},
 {kernel,"ERTS  CXC 138 10","5.4.3"}]

透過 application 查詢,可得知 m8ball 運作在 node a

如果把 node a 關掉,等待 5s 後,進行 application 查詢,可發現 m8ball 運作在 node b

(b@cmbp)2> application:which_applications().
[{m8ball,"Answer vital questions","1.0.0"},
 {crypto,"CRYPTO","4.2.1"},
 {stdlib,"ERTS  CXC 138 10","3.4.4"},
 {kernel,"ERTS  CXC 138 10","5.4.3"}]

再把 node b 關掉,等待 5s 後,進行 application 查詢,可發現 m8ball 運作在 node c。

重新啟動 node a,這時候 node c 的 application 會被停止

(c@cmbp)6>
=INFO REPORT==== 26-Mar-2021::15:45:28 ===
    application: m8ball
    exited: stopped
    type: temporary

但是並沒有回到 node a 運作。這邊認為有可能是 erlang 的問題。

(a@cmbp)1>
=INFO REPORT==== 26-Mar-2021::15:45:28 ===
    application: m8ball
    exited: {{already_started,<5269.88.0>},
             {m8ball,start,[{takeover,c@cmbp},[]]}}
    type: temporary

=INFO REPORT==== 26-Mar-2021::15:45:28 ===
    application: crypto
    exited: stopped
    type: temporary

在 takeover 時, b,c 兩個 node 如果少了一個,就會發生問題。

再重新執行一次 a

$ ./runa.sh
Erlang/OTP 20 [erts-9.3] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:10] [kernel-poll:false]

Eshell V9.3  (abort with ^G)
(a@cmbp)1> application:which_applications().
[{m8ball,"Answer vital questions","1.0.0"},
 {crypto,"CRYPTO","4.2.1"},
 {stdlib,"ERTS  CXC 138 10","3.4.4"},
 {kernel,"ERTS  CXC 138 10","5.4.3"}]

Note:如果只做兩個 Node,這時候就沒有發生上面 takeover 的問題。


在任意一個節點,只要 m8ball 有在某一個節點運作,就可以使用 m8ball

(b@cmbp)3> m8ball:ask("Questions?").
<<"*backs away slowly and runs away*">>
(b@cmbp)4> m8ball:ask("Questions?").
<<"No">>
(b@cmbp)5> m8ball:ask("Questions?").
<<"Of course">>

References

Distributed OTP Applications

沒有留言:

張貼留言