Browse Source

Fixed synchronizer issues which caused large re-orgs

Peers without a recent block are removed at the start of the sync process, however, due to the time lag involved in fetching block summaries and comparing the list of peers, some of these could subsequently drop back to a non-recent block and still be chosen as the next peer to sync with. The end result being that nodes could unnecessarily orphan as many as 20 blocks due to syncing with a peer that doesn't have a recent block (but has a couple of high weight blocks after the common block).

This commit adds some additional filtering to avoid this situation.

1) Peers without a recent block are removed as candidates in comparePeers(), allowing for alternate peers to be chosen.
2) After comparePeers() completes, the list is filtered a second time to make sure that all are still recent.
3) Finally, the peer's state is checked one last time in syncToPeerChain(), just before any orphaning takes place.

Whilst just one of the above would probably have been sufficient, the consequences of this bug are so severe that it makes sense to be very thorough.

The only exception to the above is when the node is in "recovery mode", in which case peers without recent blocks are allowed to be included. Items 1 and 3 above do not apply in recovery mode. Item 2 does apply, since the entire comparePeers() functionality is already skipped in a recovery situation due to our chain being out of date.
name-fixes
CalDescent 3 years ago
parent
commit
13eff43b87
  1. 29
      src/main/java/org/qortal/controller/Synchronizer.java

29
src/main/java/org/qortal/controller/Synchronizer.java

@ -95,7 +95,7 @@ public class Synchronizer extends Thread {
private static Synchronizer instance;
public enum SynchronizationResult {
OK, NOTHING_TO_DO, GENESIS_ONLY, NO_COMMON_BLOCK, TOO_DIVERGENT, NO_REPLY, INFERIOR_CHAIN, INVALID_DATA, NO_BLOCKCHAIN_LOCK, REPOSITORY_ISSUE, SHUTTING_DOWN;
OK, NOTHING_TO_DO, GENESIS_ONLY, NO_COMMON_BLOCK, TOO_DIVERGENT, NO_REPLY, INFERIOR_CHAIN, INVALID_DATA, NO_BLOCKCHAIN_LOCK, REPOSITORY_ISSUE, SHUTTING_DOWN, CHAIN_TIP_TOO_OLD;
}
public static class NewChainTipEvent implements Event {
@ -246,6 +246,12 @@ public class Synchronizer extends Thread {
// We may have added more inferior chain tips when comparing peers, so remove any peers that are currently on those chains
peers.removeIf(Controller.hasInferiorChainTip);
// Remove any peers that are no longer on a recent block since the last check
// Except for times when we're in recovery mode, in which case we need to keep them
if (!recoveryMode) {
peers.removeIf(Controller.hasNoRecentBlock);
}
final int peersRemoved = peersBeforeComparison - peers.size();
if (peersRemoved > 0 && peers.size() > 0)
LOGGER.debug(String.format("Ignoring %d peers on inferior chains. Peers remaining: %d", peersRemoved, peers.size()));
@ -563,7 +569,7 @@ public class Synchronizer extends Thread {
// If our latest block is very old, it's best that we don't try and determine the best peers to sync to.
// This is because it can involve very large chain comparisons, which is too intensive.
// In reality, most forking problems occur near the chain tips, so we will reserve this functionality for those situations.
final Long minLatestBlockTimestamp = Controller.getMinimumLatestBlockTimestamp();
Long minLatestBlockTimestamp = Controller.getMinimumLatestBlockTimestamp();
if (minLatestBlockTimestamp == null)
return peers;
@ -719,6 +725,7 @@ public class Synchronizer extends Thread {
LOGGER.debug(String.format("Listing peers with common block %.8s...", Base58.encode(commonBlockSummary.getSignature())));
for (Peer peer : peersSharingCommonBlock) {
final int peerHeight = peer.getChainTipData().getLastHeight();
final Long peerLastBlockTimestamp = peer.getChainTipData().getLastBlockTimestamp();
final int peerAdditionalBlocksAfterCommonBlock = peerHeight - commonBlockSummary.getHeight();
final CommonBlockData peerCommonBlockData = peer.getCommonBlockData();
@ -729,6 +736,14 @@ public class Synchronizer extends Thread {
continue;
}
// If peer is our of date (since our last check), we should exclude it from this round
minLatestBlockTimestamp = Controller.getMinimumLatestBlockTimestamp();
if (peerLastBlockTimestamp == null || peerLastBlockTimestamp < minLatestBlockTimestamp) {
LOGGER.debug(String.format("Peer %s is out of date - removing it from this round", peer));
peers.remove(peer);
continue;
}
final List<BlockSummaryData> peerBlockSummariesAfterCommonBlock = peerCommonBlockData.getBlockSummariesAfterCommonBlock();
populateBlockSummariesMinterLevels(repository, peerBlockSummariesAfterCommonBlock);
@ -1291,6 +1306,16 @@ public class Synchronizer extends Thread {
return SynchronizationResult.INVALID_DATA;
}
// Final check to make sure the peer isn't out of date (except for when we're in recovery mode)
if (!recoveryMode && peer.getChainTipData() != null) {
final Long minLatestBlockTimestamp = Controller.getMinimumLatestBlockTimestamp();
final Long peerLastBlockTimestamp = peer.getChainTipData().getLastBlockTimestamp();
if (peerLastBlockTimestamp == null || peerLastBlockTimestamp < minLatestBlockTimestamp) {
LOGGER.info(String.format("Peer %s is out of date, so abandoning sync attempt", peer));
return SynchronizationResult.CHAIN_TIP_TOO_OLD;
}
}
byte[] nextPeerSignature = peerBlockSignatures.get(0);
int nextHeight = height + 1;

Loading…
Cancel
Save