From d85a3d17c82e7435a1f2a7d71bbb88181fbf628b Mon Sep 17 00:00:00 2001 From: catbref Date: Mon, 28 Sep 2020 14:22:18 +0100 Subject: [PATCH] Fix for HSQLDB deadlock during CHECKPOINT. Symptoms are: * db/blockchain.log is pretty much exactly 50MB - the checkpoint-triggering size. * Loads of threads are stuck waiting for HSQLDB's CountUpDownLatch$Sync.await() * Synchronizer, or some other thread, possibly orphaning blocks. The cause seems to be method A, which has a repository session, calls EventBus.INSTANCE.notify() and one of the event listeners then obtains their own repository session to do repository 'work'. In the meantime, the HSQLDB log has reached 50MB, triggering auto-checkpoint. HSQLDB attempts to CHECKPOINT, but waits for existing transactions to complete, and also blocks starting new transactions. Thus, one of the event listeners is blocked when they try to obtain a new repository session, but HSQLDB never performs CHECKPOINT because the event notifier (method A) still has an unfinished transaction - hence deadlock. --- src/main/java/org/qortal/block/BlockChain.java | 1 + .../java/org/qortal/controller/BlockMinter.java | 4 +++- .../java/org/qortal/controller/Controller.java | 6 ++++++ .../java/org/qortal/controller/Synchronizer.java | 1 + src/main/java/org/qortal/event/EventBus.java | 15 +++++++++++++++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/qortal/block/BlockChain.java b/src/main/java/org/qortal/block/BlockChain.java index 8f6ddab7..95ecc41b 100644 --- a/src/main/java/org/qortal/block/BlockChain.java +++ b/src/main/java/org/qortal/block/BlockChain.java @@ -567,6 +567,7 @@ public class BlockChain { --height; orphanBlockData = repository.getBlockRepository().fromHeight(height); + repository.discardChanges(); // clear transaction status to prevent deadlocks Controller.getInstance().onNewBlock(orphanBlockData); } diff --git a/src/main/java/org/qortal/controller/BlockMinter.java b/src/main/java/org/qortal/controller/BlockMinter.java index aa80246d..46a29cf9 100644 --- a/src/main/java/org/qortal/controller/BlockMinter.java +++ b/src/main/java/org/qortal/controller/BlockMinter.java @@ -306,8 +306,10 @@ public class BlockMinter extends Thread { } if (newBlockMinted) { - BlockData newBlockData = newBlock.getBlockData(); // Notify Controller and broadcast our new chain to network + BlockData newBlockData = newBlock.getBlockData(); + + repository.discardChanges(); // clear transaction status to prevent deadlocks Controller.getInstance().onNewBlock(newBlockData); Network network = Network.getInstance(); diff --git a/src/main/java/org/qortal/controller/Controller.java b/src/main/java/org/qortal/controller/Controller.java index 2683fb2d..406fda79 100644 --- a/src/main/java/org/qortal/controller/Controller.java +++ b/src/main/java/org/qortal/controller/Controller.java @@ -835,6 +835,12 @@ public class Controller extends Thread { } } + /** + * Callback for when we've received a new block. + *

+ * See WARNING for {@link EventBus#notify(Event)} + * to prevent deadlocks. + */ public void onNewBlock(BlockData latestBlockData) { // Protective copy BlockData blockDataCopy = new BlockData(latestBlockData); diff --git a/src/main/java/org/qortal/controller/Synchronizer.java b/src/main/java/org/qortal/controller/Synchronizer.java index 8dca5b05..5af2030d 100644 --- a/src/main/java/org/qortal/controller/Synchronizer.java +++ b/src/main/java/org/qortal/controller/Synchronizer.java @@ -410,6 +410,7 @@ public class Synchronizer { --ourHeight; orphanBlockData = repository.getBlockRepository().fromHeight(ourHeight); + repository.discardChanges(); // clear transaction status to prevent deadlocks Controller.getInstance().onNewBlock(orphanBlockData); } diff --git a/src/main/java/org/qortal/event/EventBus.java b/src/main/java/org/qortal/event/EventBus.java index e0014a20..63c80143 100644 --- a/src/main/java/org/qortal/event/EventBus.java +++ b/src/main/java/org/qortal/event/EventBus.java @@ -20,6 +20,21 @@ public enum EventBus { } } + /** + * WARNING: before calling this method, + * make sure repository holds no locks, e.g. by calling + * repository.discardChanges(). + *

+ * This is because event listeners might open a new + * repository session which will deadlock HSQLDB + * if it tries to CHECKPOINT. + *

+ * The HSQLDB deadlock occurs because the caller's + * repository session blocks the CHECKPOINT until + * their transaction is closed, yet event listeners + * new sessions are blocked until CHECKPOINT is + * completed, hence deadlock. + */ public void notify(Event event) { List clonedListeners;