diff --git a/.github/workflows/ci-verification.yml b/.github/workflows/ci-verification.yml index 196899a4ad02..c3e513bd9a4f 100644 --- a/.github/workflows/ci-verification.yml +++ b/.github/workflows/ci-verification.yml @@ -41,7 +41,7 @@ jobs: - name: Install TLC dependencies run: | tdnf install -y jre wget - python3 tla/install_deps.py --skip-apt-packages + python3 tla/install_deps.py - run: cd tla && ./tlc.py mc consistency/MCSingleNode.tla - run: cd tla && ./tlc.py mc consistency/MCSingleNodeReads.tla @@ -70,7 +70,7 @@ jobs: - name: Install TLC dependencies run: | sudo apt update - sudo apt install -y default-jre + sudo apt install -y default-jre wget python3 install_deps.py - run: ./tlc_debug.sh --config consistency/MCSingleNodeCommitReachability.cfg mc consistency/MCSingleNodeReads.tla @@ -90,7 +90,7 @@ jobs: - name: Install TLC dependencies run: | sudo apt update - sudo apt install -y default-jre + sudo apt install -y default-jre wget python3 install_deps.py - run: ./tlc.py sim --num 500 --depth 50 consistency/MultiNodeReads.tla @@ -128,7 +128,7 @@ jobs: - name: Install TLC dependencies run: | tdnf install -y jre wget - python3 tla/install_deps.py --skip-apt-packages + python3 tla/install_deps.py - run: cd tla && ./tlc.py mc consensus/MCabs.tla - run: cd tla && ./tlc.py --trace-name 1C2N mc --term-count 2 --request-count 2 --raft-configs 1C2N consensus/MCccfraft.tla @@ -155,7 +155,7 @@ jobs: - name: Install TLC dependencies run: | sudo apt update - sudo apt install -y default-jre + sudo apt install -y default-jre wget python3 install_deps.py - run: ./tlc.py sim consensus/SIMccfraft.tla @@ -193,22 +193,16 @@ jobs: with: fetch-depth: 0 - - name: Install TLC dependencies - run: | - tdnf install -y jre wget - python3 tla/install_deps.py --skip-apt-packages - - name: "Install dependencies" shell: bash run: | set -ex ./scripts/setup-ci.sh - # Parallel - wget https://ftp.gnu.org/gnu/parallel/parallel-latest.tar.bz2 - tar -xjf parallel-latest.tar.bz2 - cd $(ls | grep 'parallel' | grep -v 'tar' | grep -v 'rpm') - ./configure && make && make install + - name: Install TLC dependencies + run: | + tdnf install -y jre wget + python3 tla/install_deps.py --tdnf-extended - name: "Build" run: | diff --git a/.github/workflows/long-verification.yml b/.github/workflows/long-verification.yml index 09bd4126accc..bc8afc2d21bc 100644 --- a/.github/workflows/long-verification.yml +++ b/.github/workflows/long-verification.yml @@ -39,7 +39,7 @@ jobs: - name: Install TLC dependencies run: | tdnf install -y jre wget - python3 tla/install_deps.py --skip-apt-packages + python3 tla/install_deps.py - run: cd tla && ./tlc.py --trace-name 2C2N mc --term-count 2 --request-count 0 --raft-configs 2C2N --disable-check-quorum consensus/MCccfraft.tla @@ -77,7 +77,7 @@ jobs: - name: Install TLC dependencies run: | tdnf install -y jre wget - python3 tla/install_deps.py --skip-apt-packages + python3 tla/install_deps.py - run: cd tla && ./tlc.py --trace-name 3C2N mc --term-count 2 --request-count 0 --raft-configs 3C2N --disable-check-quorum consensus/MCccfraft.tla @@ -102,7 +102,7 @@ jobs: - uses: actions/checkout@v5 - run: | sudo apt update - sudo apt install -y default-jre + sudo apt install -y default-jre wget python3 install_deps.py - run: ./tlc.py sim --max-seconds 3000 --depth 500 consensus/SIMccfraft.tla diff --git a/CHANGELOG.md b/CHANGELOG.md index 7660f6ac1440..b59a2afb0bcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- Support for PreVote optimisation. Nodes understand and are able to respond to PreVote messages, but will not become pre-vote candidates themselves. (#7419) +- Support for PreVote optimisation. Nodes understand and are able to respond to PreVote messages, but will not become pre-vote candidates themselves. (#7419, #7445) ### Changed diff --git a/doc/architecture/raft_tla.rst b/doc/architecture/raft_tla.rst index e3f5b11ed30a..fdde71565601 100644 --- a/doc/architecture/raft_tla.rst +++ b/doc/architecture/raft_tla.rst @@ -44,7 +44,7 @@ It is possible to produce fresh traces quickly from the driver by running the `` Calling the trace validation on, for example, the ``append`` scenario can then be done with ``./tlc.py --driver-trace ../build/append.ndjson consensus/Traceccfraft.tla``. -Generating a trace of a scenario and validate it in one go can be done with ``./tlc.py --workers 1 tv --scenario ../tests/raft_scenarios/append consensus/Traceccfraft.tla``. +Generating a trace of a scenario and validating it in one go can be done with ``./tlc.py --workers 1 tv --scenario ../tests/raft_scenarios/append consensus/Traceccfraft.tla``. This runs the raft_driver on the scenario, cleans the trace and then validates it against the TLA+ specification. CCF also provides a command line trace visualizer to aid debugging, for example, the ``append`` scenario can be visualized with ``python ../tests/trace_viz.py ../build/append.ndjson``. diff --git a/src/consensus/aft/raft.h b/src/consensus/aft/raft.h index db8bb15fa5ef..8d92f239da6a 100644 --- a/src/consensus/aft/raft.h +++ b/src/consensus/aft/raft.h @@ -772,6 +772,15 @@ namespace aft break; } + case raft_request_pre_vote: + { + RequestPreVote r = + channels->template recv_authenticated( + from, data, size); + recv_request_pre_vote(from, r); + break; + } + case raft_request_vote: { RequestVote r = channels->template recv_authenticated( @@ -780,6 +789,15 @@ namespace aft break; } + case raft_request_pre_vote_response: + { + RequestPreVoteResponse r = + channels->template recv_authenticated( + from, data, size); + recv_request_pre_vote_response(from, r); + break; + } + case raft_request_vote_response: { RequestVoteResponse r = @@ -1040,16 +1058,6 @@ namespace aft const auto prev_term = get_term_internal(prev_idx); const auto term_of_idx = get_term_internal(end_idx); - RAFT_DEBUG_FMT( - "Send append entries from {} to {}: ({}.{}, {}.{}] ({})", - state->node_id, - to, - prev_term, - prev_idx, - term_of_idx, - end_idx, - state->commit_idx); - #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wc99-designator" AppendEntries ae{ @@ -1062,6 +1070,17 @@ namespace aft }; #pragma clang diagnostic pop + RAFT_DEBUG_FMT( + "Send {} from {} to {}: ({}.{}, {}.{}] ({})", + ae.msg, + state->node_id, + to, + prev_term, + prev_idx, + term_of_idx, + end_idx, + state->commit_idx); + auto& node = all_other_nodes.at(to); #ifdef CCF_RAFT_TRACING @@ -1097,12 +1116,14 @@ namespace aft std::unique_lock guard(state->lock); RAFT_DEBUG_FMT( - "Received append entries: {}.{} to {}.{} (from {} in term {})", + "Recv {} to {} from {}: {}.{} to {}.{} in term {}", + r.msg, + state->node_id, + from, r.prev_term, r.prev_idx, r.term_of_idx, r.idx, - from, r.term); #ifdef CCF_RAFT_TRACING @@ -1136,7 +1157,8 @@ namespace aft { // Reply false, since our term is later than the received term. RAFT_INFO_FMT( - "Recv append entries to {} from {} but our term is later ({} > {})", + "Recv {} to {} from {} but our term is later ({} > {})", + r.msg, state->node_id, from, state->current_view, @@ -1157,8 +1179,9 @@ namespace aft if (prev_term == 0) { RAFT_DEBUG_FMT( - "Recv append entries to {} from {} but our log does not yet " + "Recv {} to {} from {} but our log does not yet " "contain index {}", + r.msg, state->node_id, from, r.prev_idx); @@ -1167,8 +1190,9 @@ namespace aft else { RAFT_DEBUG_FMT( - "Recv append entries to {} from {} but our log at {} has the wrong " + "Recv {} to {} from {} but our log at {} has the wrong " "previous term (ours: {}, theirs: {})", + r.msg, state->node_id, from, r.prev_idx, @@ -1194,8 +1218,9 @@ namespace aft if (r.prev_idx < state->commit_idx) { RAFT_DEBUG_FMT( - "Recv append entries to {} from {} but prev_idx ({}) < commit_idx " + "Recv {} to {} from {} but prev_idx ({}) < commit_idx " "({})", + r.msg, state->node_id, from, r.prev_idx, @@ -1209,7 +1234,8 @@ namespace aft else if (r.prev_idx > state->last_idx) { RAFT_FAIL_FMT( - "Recv append entries to {} from {} but prev_idx ({}) > last_idx ({})", + "Recv {} to {} from {} but prev_idx ({}) > last_idx ({})", + r.msg, state->node_id, from, r.prev_idx, @@ -1218,7 +1244,8 @@ namespace aft } RAFT_DEBUG_FMT( - "Recv append entries to {} from {} for index {} and previous index {}", + "Recv {} to {} from {} for index {} and previous index {}", + r.msg, state->node_id, from, r.idx, @@ -1300,7 +1327,8 @@ namespace aft { // This should only fail if there is malformed data. RAFT_FAIL_FMT( - "Recv append entries to {} from {} but the data is malformed: {}", + "Recv {} to {} from {} but the data is malformed: {}", + r.msg, state->node_id, from, e.what()); @@ -1313,8 +1341,9 @@ namespace aft if (ds == nullptr) { RAFT_FAIL_FMT( - "Recv append entries to {} from {} but the entry could not be " + "Recv {} to {} from {} but the entry could not be " "deserialised", + r.msg, state->node_id, from); send_append_entries_response_nack(from); @@ -1513,19 +1542,20 @@ namespace aft aft::Term response_term, aft::Index response_idx) { - RAFT_DEBUG_FMT( - "Send append entries response from {} to {} for index {}: {}", - state->node_id, - to, - response_idx, - (answer == AppendEntriesResponseType::OK ? "ACK" : "NACK")); - AppendEntriesResponse response{ .term = response_term, .last_log_idx = response_idx, .success = answer, }; + RAFT_DEBUG_FMT( + "Send {} from {} to {} for index {}: {}", + response.msg, + state->node_id, + to, + response_idx, + (answer == AppendEntriesResponseType::OK ? "ACK" : "NACK")); + #ifdef CCF_RAFT_TRACING nlohmann::json j = {}; j["function"] = "send_append_entries_response"; @@ -1572,7 +1602,8 @@ namespace aft if (state->leadership_state != ccf::kv::LeadershipState::Leader) { RAFT_INFO_FMT( - "Recv append entries response to {} from {}: no longer leader", + "Recv {} to {} from {}: no longer leader", + r.msg, state->node_id, from); return; @@ -1585,8 +1616,9 @@ namespace aft { // We are behind, update our state. RAFT_DEBUG_FMT( - "Recv append entries response to {} from {}: more recent term ({} " + "Recv {} to {} from {}: more recent term ({} " "> {})", + r.msg, state->node_id, from, r.term, @@ -1604,7 +1636,8 @@ namespace aft if (r.success == AppendEntriesResponseType::OK) { RAFT_DEBUG_FMT( - "Recv append entries response to {} from {}: stale term ({} != {})", + "Recv {} to {} from {}: stale term ({} != {})", + r.msg, state->node_id, from, r.term, @@ -1622,9 +1655,7 @@ namespace aft if (r.success == AppendEntriesResponseType::OK) { RAFT_DEBUG_FMT( - "Recv append entries response to {} from {}: stale idx", - state->node_id, - from); + "Recv {} to {} from {}: stale idx", r.msg, state->node_id, from); return; } } @@ -1634,9 +1665,7 @@ namespace aft { // Failed due to log inconsistency. Reset sent_idx, and try again soon. RAFT_DEBUG_FMT( - "Recv append entries response to {} from {}: failed", - state->node_id, - from); + "Recv {} to {} from {}: failed", r.msg, state->node_id, from); const auto this_match = find_highest_possible_match({r.term, r.last_log_idx}); node->second.sent_idx = std::max( @@ -1652,29 +1681,48 @@ namespace aft } RAFT_DEBUG_FMT( - "Recv append entries response to {} from {} for index {}: success", + "Recv {} to {} from {} for index {}: success", + r.msg, state->node_id, from, r.last_log_idx); update_commit(); } - void send_request_vote(const ccf::NodeId& to, ElectionType election_type) + void send_request_pre_vote(const ccf::NodeId& to) + { + auto last_committable_idx = last_committable_index(); + CCF_ASSERT(last_committable_idx >= state->commit_idx, "lci < ci"); + + RequestPreVote rpv{ + .term = state->current_view, + .last_committable_idx = last_committable_idx, + .term_of_last_committable_idx = + get_term_internal(last_committable_idx)}; + +#ifdef CCF_RAFT_TRACING + nlohmann::json j = {}; + j["function"] = "send_request_vote"; + j["packet"] = rpv; + j["state"] = *state; + COMMITTABLE_INDICES(j["state"], state); + j["to_node_id"] = to; + RAFT_TRACE_JSON_OUT(j); +#endif + + channels->send_authenticated(to, ccf::NodeMsgType::consensus_msg, rpv); + } + + void send_request_vote(const ccf::NodeId& to) { auto last_committable_idx = last_committable_index(); - RAFT_INFO_FMT( - "Send {}request vote from {} to {} at {}", - election_type == ElectionType::PreVote ? "pre-vote " : "", - state->node_id, - to, - last_committable_idx); CCF_ASSERT(last_committable_idx >= state->commit_idx, "lci < ci"); RequestVote rv{ .term = state->current_view, .last_committable_idx = last_committable_idx, - .term_of_last_committable_idx = get_term_internal(last_committable_idx), - .election_type = election_type}; + .term_of_last_committable_idx = + get_term_internal(last_committable_idx)}; #ifdef CCF_RAFT_TRACING nlohmann::json j = {}; @@ -1689,10 +1737,9 @@ namespace aft channels->send_authenticated(to, ccf::NodeMsgType::consensus_msg, rv); } - void recv_request_vote(const ccf::NodeId& from, RequestVote r) + void recv_request_vote_unsafe( + const ccf::NodeId& from, RequestVote r, ElectionType election_type) { - std::lock_guard guard(state->lock); - // Do not check that from is a known node. It is possible to receive // RequestVotes from nodes that this node doesn't yet know, just as it // receives AppendEntries from those nodes. These should be obeyed just @@ -1700,32 +1747,24 @@ namespace aft // produce a primary in the new term, who will then help this node catch // up. -#ifdef CCF_RAFT_TRACING - nlohmann::json j = {}; - j["function"] = "recv_request_vote"; - j["packet"] = r; - j["state"] = *state; - COMMITTABLE_INDICES(j["state"], state); - j["from_node_id"] = from; - RAFT_TRACE_JSON_OUT(j); -#endif - if (state->current_view > r.term) { // Reply false, since our term is later than the received term. RAFT_DEBUG_FMT( - "Recv request vote to {} from {}: our term is later ({} > {})", + "Recv {} to {} from {}: our term is later ({} > {})", + r.msg, state->node_id, from, state->current_view, r.term); - send_request_vote_response(from, false, r.election_type); + send_request_vote_response(from, false, election_type); return; } if (state->current_view < r.term) { RAFT_DEBUG_FMT( - "Recv request vote to {} from {}: their term is later ({} < {})", + "Recv {} to {} from {}: their term is later ({} < {})", + r.msg, state->node_id, from, state->current_view, @@ -1740,12 +1779,12 @@ namespace aft bool grant_vote = true; - if ( - (r.election_type == ElectionType::RegularVote) && leader_id.has_value()) + if ((election_type == ElectionType::RegularVote) && leader_id.has_value()) { // Reply false, since we already know the leader in the current term. RAFT_DEBUG_FMT( - "Recv request vote to {} from {}: leader {} already known in term {}", + "Recv {} to {} from {}: leader {} already known in term {}", + r.msg, state->node_id, from, leader_id.value(), @@ -1755,11 +1794,12 @@ namespace aft auto voted_for_other = (voted_for.has_value()) && (voted_for.value() != from); - if ((r.election_type == ElectionType::RegularVote) && voted_for_other) + if ((election_type == ElectionType::RegularVote) && voted_for_other) { // Reply false, since we already voted for someone else. RAFT_DEBUG_FMT( - "Recv request vote to {} from {}: already voted for {}", + "Recv {} to {} from {}: already voted for {}", + r.msg, state->node_id, from, voted_for.value()); @@ -1779,8 +1819,9 @@ namespace aft if (!log_up_to_date) { RAFT_DEBUG_FMT( - "Request vote to {} from {}: candidate log {}.{} is not up-to-date " + "Recv {} to {} from {}: candidate log {}.{} is not up-to-date " "with ours {}.{}", + r.msg, state->node_id, from, r.term_of_last_committable_idx, @@ -1790,7 +1831,7 @@ namespace aft grant_vote = false; } - if (grant_vote && r.election_type == ElectionType::RegularVote) + if (grant_vote && election_type == ElectionType::RegularVote) { // If we grant our vote to a candidate, then an election is in progress restart_election_timeout(); @@ -1799,9 +1840,9 @@ namespace aft } RAFT_INFO_FMT( - "Request {}vote to {} from {}: {} vote to candidate at {}.{} with " + "Recv {} to {} from {}: {} vote to candidate at {}.{} with " "local state at {}.{}", - r.election_type == ElectionType::PreVote ? "pre-" : "", + r.msg, state->node_id, from, grant_vote ? "granted" : "denied", @@ -1810,30 +1851,89 @@ namespace aft term_of_last_committable_idx, last_committable_idx); - send_request_vote_response(from, grant_vote, r.election_type); + send_request_vote_response(from, grant_vote, election_type); + } + + void recv_request_vote(const ccf::NodeId& from, RequestVote r) + { + std::lock_guard guard(state->lock); + +#ifdef CCF_RAFT_TRACING + nlohmann::json j = {}; + j["function"] = "recv_request_vote"; + j["packet"] = r; + j["state"] = *state; + COMMITTABLE_INDICES(j["state"], state); + j["from_node_id"] = from; + RAFT_TRACE_JSON_OUT(j); +#endif + + recv_request_vote_unsafe(from, r, ElectionType::RegularVote); + } + + void recv_request_pre_vote(const ccf::NodeId& from, RequestPreVote r) + { + std::lock_guard guard(state->lock); + +#ifdef CCF_RAFT_TRACING + nlohmann::json j = {}; + j["function"] = "recv_request_vote"; + j["packet"] = r; + j["state"] = *state; + COMMITTABLE_INDICES(j["state"], state); + j["from_node_id"] = from; + RAFT_TRACE_JSON_OUT(j); +#endif + + // A pre-vote is a speculative request vote, so we translate it back to a + // RequestVote to avoid duplicating the logic. + RequestVote rv{ + .term = r.term, + .last_committable_idx = r.last_committable_idx, + .term_of_last_committable_idx = r.term_of_last_committable_idx, + }; + recv_request_vote_unsafe(from, rv, ElectionType::PreVote); } void send_request_vote_response( const ccf::NodeId& to, bool answer, ElectionType election_type) { - RAFT_INFO_FMT( - "Send request {}vote response from {} to {}: {}", - election_type == ElectionType::PreVote ? "pre-" : "", - state->node_id, - to, - answer); + if (election_type == ElectionType::RegularVote) + { + RequestVoteResponse response{ + .term = state->current_view, .vote_granted = answer}; - RequestVoteResponse response{ - .term = state->current_view, - .vote_granted = answer, - .election_type = election_type}; + RAFT_INFO_FMT( + "Send {} from {} to {}: {}", + response.msg, + state->node_id, + to, + answer); - channels->send_authenticated( - to, ccf::NodeMsgType::consensus_msg, response); + channels->send_authenticated( + to, ccf::NodeMsgType::consensus_msg, response); + } + else + { + RequestPreVoteResponse response{ + .term = state->current_view, .vote_granted = answer}; + + RAFT_INFO_FMT( + "Send {} from {} to {}: {}", + response.msg, + state->node_id, + to, + answer); + + channels->send_authenticated( + to, ccf::NodeMsgType::consensus_msg, response); + } } void recv_request_vote_response( - const ccf::NodeId& from, RequestVoteResponse r) + const ccf::NodeId& from, + RequestVoteResponse r, + ElectionType election_type) { std::lock_guard guard(state->lock); @@ -1852,7 +1952,8 @@ namespace aft state->leadership_state != ccf::kv::LeadershipState::Candidate) { RAFT_INFO_FMT( - "Recv request vote response to {} from: {}: we aren't a candidate", + "Recv {} to {} from: {}: we aren't a candidate", + r.msg, state->node_id, from); return; @@ -1860,11 +1961,12 @@ namespace aft // Stale message if ( - r.election_type == ElectionType::PreVote && + election_type == ElectionType::PreVote && state->leadership_state == ccf::kv::LeadershipState::Candidate) { RAFT_INFO_FMT( - "Recv pre-vote response to {} from {}: no longer in pre-vote", + "Recv {} to {} from {}: no longer in pre-vote", + r.msg, state->node_id, from); return; @@ -1877,12 +1979,13 @@ namespace aft // while still in PreVoteCandidate state something illegal must have // happened. if ( - r.election_type == ElectionType::RegularVote && + election_type == ElectionType::RegularVote && state->leadership_state == ccf::kv::LeadershipState::PreVoteCandidate) { RAFT_FAIL_FMT( - "Recv vote response to {} from {}: We should not yet sent a request " - "vote", + "Recv {} to {} from {}: We should not yet have sent a request " + "vote, as we are still a PreVoteCandidate yet received a response", + r.msg, state->node_id, from); return; @@ -1893,17 +1996,16 @@ namespace aft if (node == all_other_nodes.end()) { RAFT_INFO_FMT( - "Recv request vote response to {} from {}: unknown node", - state->node_id, - from); + "Recv {} to {} from {}: unknown node", r.msg, state->node_id, from); return; } if (state->current_view < r.term) { RAFT_INFO_FMT( - "Recv request vote response to {} from {}: their term is more recent " + "Recv {} to {} from {}: their term is more recent " "({} < {})", + r.msg, state->node_id, from, state->current_view, @@ -1940,6 +2042,19 @@ namespace aft add_vote_for_me(from); } + void recv_request_vote_response( + const ccf::NodeId& from, RequestVoteResponse r) + { + recv_request_vote_response(from, r, ElectionType::RegularVote); + } + + void recv_request_pre_vote_response( + const ccf::NodeId& from, RequestPreVoteResponse r) + { + RequestVoteResponse rvr{.term = r.term, .vote_granted = r.vote_granted}; + recv_request_vote_response(from, rvr, ElectionType::PreVote); + } + void recv_propose_request_vote( const ccf::NodeId& from, ProposeRequestVote r) { @@ -2020,7 +2135,7 @@ namespace aft for (auto const& node_id : other_nodes_in_active_configs()) { // ccfraft!RequestVote - send_request_vote(node_id, ElectionType::PreVote); + send_request_pre_vote(node_id); } } @@ -2067,7 +2182,7 @@ namespace aft for (auto const& node_id : other_nodes_in_active_configs()) { // ccfraft!RequestVote - send_request_vote(node_id, ElectionType::RegularVote); + send_request_vote(node_id); } } diff --git a/src/consensus/aft/raft_types.h b/src/consensus/aft/raft_types.h index 89f022c2c8b7..803d24ba09c8 100644 --- a/src/consensus/aft/raft_types.h +++ b/src/consensus/aft/raft_types.h @@ -102,6 +102,8 @@ namespace aft raft_request_vote, raft_request_vote_response, raft_propose_request_vote, + raft_request_pre_vote, + raft_request_pre_vote_response, }; DECLARE_JSON_ENUM( RaftMsgType, @@ -114,6 +116,9 @@ namespace aft {RaftMsgType::raft_request_vote, "raft_request_vote"}, {RaftMsgType::raft_request_vote_response, "raft_request_vote_response"}, {RaftMsgType::raft_propose_request_vote, "raft_propose_request_vote"}, + {RaftMsgType::raft_request_pre_vote, "raft_request_pre_vote"}, + {RaftMsgType::raft_request_pre_vote_response, + "raft_request_pre_vote_response"}, }); #pragma pack(push, 1) @@ -183,9 +188,6 @@ namespace aft DECLARE_JSON_REQUIRED_FIELDS( AppendEntriesResponse, term, last_log_idx, success); - DECLARE_JSON_TYPE(RaftHeader) - DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg) - enum ElectionType { PreVote = 0, @@ -195,18 +197,31 @@ namespace aft ElectionType, {{ElectionType::PreVote, "PreVote"}, {ElectionType::RegularVote, "RegularVote"}}); + + DECLARE_JSON_TYPE(RaftHeader) + DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg) struct RequestVote : RaftHeader { Term term; Index last_committable_idx; Term term_of_last_committable_idx; - ElectionType election_type = RegularVote; }; - DECLARE_JSON_TYPE_WITH_BASE_AND_OPTIONAL_FIELDS( - RequestVote, RaftHeader); + DECLARE_JSON_TYPE_WITH_BASE(RequestVote, RaftHeader); DECLARE_JSON_REQUIRED_FIELDS( RequestVote, term, last_committable_idx, term_of_last_committable_idx); - DECLARE_JSON_OPTIONAL_FIELDS(RequestVote, election_type) + + DECLARE_JSON_TYPE(RaftHeader); + DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg); + struct RequestPreVote : RaftHeader + { + Term term; + Index last_committable_idx; + Term term_of_last_committable_idx; + }; + DECLARE_JSON_TYPE_WITH_BASE( + RequestPreVote, RaftHeader); + DECLARE_JSON_REQUIRED_FIELDS( + RequestPreVote, term, last_committable_idx, term_of_last_committable_idx); DECLARE_JSON_TYPE(RaftHeader) DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg) @@ -214,12 +229,21 @@ namespace aft { Term term; bool vote_granted; - ElectionType election_type = RegularVote; }; - DECLARE_JSON_TYPE_WITH_BASE_AND_OPTIONAL_FIELDS( + DECLARE_JSON_TYPE_WITH_BASE( RequestVoteResponse, RaftHeader); DECLARE_JSON_REQUIRED_FIELDS(RequestVoteResponse, term, vote_granted); - DECLARE_JSON_OPTIONAL_FIELDS(RequestVoteResponse, election_type) + + DECLARE_JSON_TYPE(RaftHeader) + DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg) + struct RequestPreVoteResponse : RaftHeader + { + Term term; + bool vote_granted; + }; + DECLARE_JSON_TYPE_WITH_BASE( + RequestPreVoteResponse, RaftHeader); + DECLARE_JSON_REQUIRED_FIELDS(RequestPreVoteResponse, term, vote_granted); DECLARE_JSON_TYPE(RaftHeader) DECLARE_JSON_REQUIRED_FIELDS(RaftHeader, msg) @@ -235,3 +259,59 @@ namespace aft #pragma pack(pop) } + +FMT_BEGIN_NAMESPACE +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const aft::RaftMsgType& msg_type, FormatContext& ctx) const + -> decltype(ctx.out()) + { + switch (msg_type) + { + case (aft::RaftMsgType::raft_append_entries): + { + return fmt::format_to(ctx.out(), "append_entries"); + } + case (aft::RaftMsgType::raft_append_entries_response): + { + return fmt::format_to(ctx.out(), "append_entries_response"); + } + case (aft::RaftMsgType::raft_append_entries_signed_response): + { + return fmt::format_to(ctx.out(), "append_entries_signed_response"); + } + case (aft::RaftMsgType::raft_request_vote): + { + return fmt::format_to(ctx.out(), "request_vote"); + } + case (aft::RaftMsgType::raft_request_vote_response): + { + return fmt::format_to(ctx.out(), "request_vote_response"); + } + case (aft::RaftMsgType::raft_propose_request_vote): + { + return fmt::format_to(ctx.out(), "propose_request_vote"); + } + case (aft::RaftMsgType::raft_request_pre_vote): + { + return fmt::format_to(ctx.out(), "request_pre_vote"); + } + case (aft::RaftMsgType::raft_request_pre_vote_response): + { + return fmt::format_to(ctx.out(), "request_pre_vote_response"); + } + default: + throw std::runtime_error( + fmt::format("Unhandled RaftMsgType: {}", uint64_t(msg_type))); + } + } +}; +FMT_END_NAMESPACE diff --git a/src/consensus/aft/test/driver.h b/src/consensus/aft/test/driver.h index 70f5ff27c0e4..9e1217b0d601 100644 --- a/src/consensus/aft/test/driver.h +++ b/src/consensus/aft/test/driver.h @@ -3,6 +3,7 @@ #pragma once #include "consensus/aft/raft.h" +#include "consensus/aft/raft_types.h" #include "ds/internal_logger.h" #include "logging_stub.h" @@ -390,6 +391,21 @@ class RaftDriver "{}--{}{}: {}", first, (dropped ? "X" : ">>"), second, message); } + void log_msg_details( + ccf::NodeId node_id, + ccf::NodeId tgt_node_id, + aft::RequestPreVote rv, + bool dropped) + { + const auto s = fmt::format( + "{} for term {}, at tx {}.{}", + rv.msg, + rv.term, + rv.term_of_last_committable_idx, + rv.last_committable_idx); + log(node_id, tgt_node_id, s, dropped); + } + void log_msg_details( ccf::NodeId node_id, ccf::NodeId tgt_node_id, @@ -397,13 +413,25 @@ class RaftDriver bool dropped) { const auto s = fmt::format( - "request_vote for term {}, at tx {}.{}", + "{} for term {}, at tx {}.{}", + rv.msg, rv.term, rv.term_of_last_committable_idx, rv.last_committable_idx); log(node_id, tgt_node_id, s, dropped); } + void log_msg_details( + ccf::NodeId node_id, + ccf::NodeId tgt_node_id, + aft::RequestPreVoteResponse rv, + bool dropped) + { + const auto s = fmt::format( + "{} for term {} = {}", rv.msg, rv.term, (rv.vote_granted ? "Y" : "N")); + rlog(node_id, tgt_node_id, s, dropped); + } + void log_msg_details( ccf::NodeId node_id, ccf::NodeId tgt_node_id, @@ -411,9 +439,7 @@ class RaftDriver bool dropped) { const auto s = fmt::format( - "request_vote_response for term {} = {}", - rv.term, - (rv.vote_granted ? "Y" : "N")); + "{} for term {} = {}", rv.msg, rv.term, (rv.vote_granted ? "Y" : "N")); rlog(node_id, tgt_node_id, s, dropped); } @@ -424,7 +450,8 @@ class RaftDriver bool dropped) { const auto s = fmt::format( - "append_entries ({}.{}, {}.{}] (term {}, commit {})", + "{} ({}.{}, {}.{}] (term {}, commit {})", + ae.msg, ae.prev_term, ae.prev_idx, ae.term_of_idx, @@ -455,10 +482,7 @@ class RaftDriver } } const auto s = fmt::format( - "append_entries_response {} for {}.{}", - success, - aer.term, - aer.last_log_idx); + "{} {} for {}.{}", aer.msg, success, aer.term, aer.last_log_idx); rlog(node_id, tgt_node_id, s, dropped); } @@ -468,7 +492,7 @@ class RaftDriver aft::ProposeRequestVote prv, bool dropped) { - const auto s = fmt::format("propose_request_vote for term {}", prv.term); + const auto s = fmt::format("{} for term {}", prv.msg, prv.term); log(node_id, tgt_node_id, s, dropped); } @@ -493,7 +517,21 @@ class RaftDriver log_msg_details(node_id, tgt_node_id, rv, dropped); break; } + case (aft::RaftMsgType::raft_request_pre_vote): + { + auto rpv = *(aft::RequestPreVote*)data; + packet = rpv; + log_msg_details(node_id, tgt_node_id, rpv, dropped); + break; + } case (aft::RaftMsgType::raft_request_vote_response): + { + auto rvr = *(aft::RequestPreVoteResponse*)data; + packet = rvr; + log_msg_details(node_id, tgt_node_id, rvr, dropped); + break; + } + case (aft::RaftMsgType::raft_request_pre_vote_response): { auto rvr = *(aft::RequestVoteResponse*)data; packet = rvr; diff --git a/tla/consensus/Traceccfraft.tla b/tla/consensus/Traceccfraft.tla index 5281b6c324df..a539b7216d98 100644 --- a/tla/consensus/Traceccfraft.tla +++ b/tla/consensus/Traceccfraft.tla @@ -4,7 +4,8 @@ EXTENDS ccfraft, Json, IOUtils, Sequences, MCAliases \* raft_types.h enum RaftMsgType RaftMsgType == "raft_append_entries" :> AppendEntriesRequest @@ "raft_append_entries_response" :> AppendEntriesResponse @@ - "raft_request_vote" :> RequestVoteRequest @@ "raft_request_vote_response" :> RequestVoteResponse @@ + "raft_request_vote" :> RequestVoteRequest @@ "raft_request_pre_vote" :> RequestVoteRequest @@ + "raft_request_vote_response" :> RequestVoteResponse @@ "raft_request_pre_vote_response" :> RequestVoteResponse @@ "raft_propose_request_vote" :> ProposeVoteRequest ToLeadershipState == @@ -56,9 +57,9 @@ IsRequestVoteRequest(msg, dst, src, logline) == /\ IsHeader(msg, dst, src, logline, RequestVoteRequest) /\ msg.lastCommittableIndex = logline.msg.packet.last_committable_idx /\ msg.lastCommittableTerm = logline.msg.packet.term_of_last_committable_idx - /\ IF "election_type" \in DOMAIN logline.msg.packet - THEN msg.isPreVote = (logline.msg.packet.election_type = "PreVote") - ELSE msg.isPreVote = FALSE + /\ IF logline.msg.packet.msg = "raft_request_vote" + THEN msg.isPreVote = FALSE + ELSE msg.isPreVote = TRUE IsRequestVoteResponse(msg, dst, src, logline) == /\ IsHeader(msg, dst, src, logline, RequestVoteResponse) diff --git a/tla/consensus/ccfraft.tla b/tla/consensus/ccfraft.tla index a71cf72e1e56..18ed7b0a58bb 100644 --- a/tla/consensus/ccfraft.tla +++ b/tla/consensus/ccfraft.tla @@ -1255,6 +1255,12 @@ DropIgnoredMessage(i,j,m) == \* the next configurations learns that its retirement has been committed. \/ /\ membershipState[i] = RetiredCommitted /\ m.type /= AppendEntriesRequest + \* raft.h::recv_append_entries + \* We drop append entries which start before the commit index + \* This is safe as sentIndex will still be incremented allowing subsequent AppendEntries + \* to start later and hence it won't livelock + \/ /\ m.type = AppendEntriesRequest + /\ m.prevLogIndex < commitIndex[i] /\ Discard(m) /\ UNCHANGED <> diff --git a/tla/install_deps.py b/tla/install_deps.py index 126bcdd1f188..493420e89636 100644 --- a/tla/install_deps.py +++ b/tla/install_deps.py @@ -71,7 +71,8 @@ def _parse_args() -> argparse.Namespace: ) parser.add_argument( - "--skip-apt-packages", action="store_false", default=True, dest="apt_packages" + "--tdnf-extended", + action="store_true", ) return parser.parse_args() @@ -89,6 +90,21 @@ def install_tlc(): def install_deps(args: argparse.Namespace): + if args.tdnf_extended: + with open("/etc/yum.repos.d/tdnf.repo", "w", encoding="utf-8") as tdnf_repo: + tdnf_repo.write( + """[azurelinux-official-extended] + name=Azure Linux Official Extended $releasever $basearch + baseurl=https://packages.microsoft.com/azurelinux/$releasever/prod/extended/$basearch + gpgkey=file:///etc/pki/rpm-gpg/MICROSOFT-RPM-GPG-KEY + gpgcheck=1 + repo_gpgcheck=1 + enabled=1 + skip_if_unavailable=True + sslverify=1""" + ) + subprocess.check_call(["tdnf", "install", "-y", "parallel"]) + # Setup tools directory tools_dir = os.path.join(TLA_DIR, "tools") @@ -112,17 +128,6 @@ def create_tools_dir(): dest=tools_dir, ) - if args.apt_packages: - subprocess.Popen( - "sudo apt-get install -y --no-install-recommends".split() - + [ - "wget", - "graphviz", - "htop", - "texlive-latex-recommended", - ] - ).wait() - fetch_latest( url="https://nightly.tlapl.us/dist/tla2tools.jar", dest=TLA_DIR,