diff --git a/pom.xml b/pom.xml index 801b57d4..8cd05317 100644 --- a/pom.xml +++ b/pom.xml @@ -21,6 +21,8 @@ 1.2.2 28.1-jre 2.5.1 + 1.2.1 + 70.1 1.1 2.29.1 9.4.29.v20200521 @@ -568,7 +570,18 @@ net.codebox homoglyph - 1.2.0 + ${homoglyph.version} + + + + com.ibm.icu + icu4j + ${icu4j.version} + + + com.ibm.icu + icu4j-charset + ${icu4j.version} diff --git a/src/main/java/org/qortal/data/transaction/UpdateNameTransactionData.java b/src/main/java/org/qortal/data/transaction/UpdateNameTransactionData.java index b43361db..1fda16a5 100644 --- a/src/main/java/org/qortal/data/transaction/UpdateNameTransactionData.java +++ b/src/main/java/org/qortal/data/transaction/UpdateNameTransactionData.java @@ -48,6 +48,7 @@ public class UpdateNameTransactionData extends TransactionData { public void afterUnmarshal(Unmarshaller u, Object parent) { this.creatorPublicKey = this.ownerPublicKey; + this.reducedNewName = this.newName != null ? Unicode.sanitize(this.newName) : null; } /** From repository */ @@ -62,7 +63,7 @@ public class UpdateNameTransactionData extends TransactionData { this.nameReference = nameReference; } - /** From network/API */ + /** From network */ public UpdateNameTransactionData(BaseTransactionData baseTransactionData, String name, String newName, String newData) { this(baseTransactionData, name, newName, newData, Unicode.sanitize(newName), null); } diff --git a/src/main/java/org/qortal/transaction/UpdateNameTransaction.java b/src/main/java/org/qortal/transaction/UpdateNameTransaction.java index c9eedbae..8d578c85 100644 --- a/src/main/java/org/qortal/transaction/UpdateNameTransaction.java +++ b/src/main/java/org/qortal/transaction/UpdateNameTransaction.java @@ -118,10 +118,13 @@ public class UpdateNameTransaction extends Transaction { if (!owner.getAddress().equals(nameData.getOwner())) return ValidationResult.INVALID_NAME_OWNER; - // Check new name isn't already taken, unless it is the same name (this allows for case-adjusting renames) - NameData newNameData = this.repository.getNameRepository().fromReducedName(this.updateNameTransactionData.getReducedNewName()); - if (newNameData != null && !newNameData.getName().equals(nameData.getName())) - return ValidationResult.NAME_ALREADY_REGISTERED; + // Additional checks if transaction intends to change name + if (!this.updateNameTransactionData.getNewName().isEmpty()) { + // Check new name isn't already taken, unless it is the same name (this allows for case-adjusting renames) + NameData newNameData = this.repository.getNameRepository().fromReducedName(this.updateNameTransactionData.getReducedNewName()); + if (newNameData != null && !newNameData.getName().equals(nameData.getName())) + return ValidationResult.NAME_ALREADY_REGISTERED; + } return ValidationResult.OK; } diff --git a/src/main/java/org/qortal/utils/Unicode.java b/src/main/java/org/qortal/utils/Unicode.java index b73f3a32..c9484d82 100644 --- a/src/main/java/org/qortal/utils/Unicode.java +++ b/src/main/java/org/qortal/utils/Unicode.java @@ -18,6 +18,9 @@ import java.util.TreeMap; import com.google.common.base.CharMatcher; +import com.ibm.icu.text.CaseMap; +import com.ibm.icu.text.Normalizer2; +import com.ibm.icu.text.UnicodeSet; import net.codebox.homoglyph.HomoglyphBuilder; public abstract class Unicode { @@ -31,6 +34,8 @@ public abstract class Unicode { public static final String ZERO_WIDTH_NO_BREAK_SPACE = "\ufeff"; public static final CharMatcher ZERO_WIDTH_CHAR_MATCHER = CharMatcher.anyOf(ZERO_WIDTH_SPACE + ZERO_WIDTH_NON_JOINER + ZERO_WIDTH_JOINER + WORD_JOINER + ZERO_WIDTH_NO_BREAK_SPACE); + private static final UnicodeSet removableUniset = new UnicodeSet("[[:Mark:][:Other:]]").freeze(); + private static int[] homoglyphCodePoints; private static int[] reducedCodePoints; @@ -59,7 +64,7 @@ public abstract class Unicode { public static String normalize(String input) { String output; - // Normalize + // Normalize using NFKC to recompose in canonical form output = Normalizer.normalize(input, Form.NFKC); // Remove zero-width code-points, used for rendering @@ -91,8 +96,8 @@ public abstract class Unicode { public static String sanitize(String input) { String output; - // Normalize - output = Normalizer.normalize(input, Form.NFKD); + // Normalize using NFKD to decompose into individual combining code points + output = Normalizer2.getNFKDInstance().normalize(input); // Remove zero-width code-points, used for rendering output = removeZeroWidth(output); @@ -100,11 +105,11 @@ public abstract class Unicode { // Normalize whitespace output = CharMatcher.whitespace().trimAndCollapseFrom(output, ' '); - // Remove accents, combining marks - output = output.replaceAll("[\\p{M}\\p{C}]", ""); + // Remove accents, combining marks - see https://www.unicode.org/reports/tr44/#GC_Values_Table + output = removableUniset.stripFrom(output, true); // Convert to lowercase - output = output.toLowerCase(Locale.ROOT); + output = CaseMap.toLower().apply(Locale.ROOT, output); // Reduce homoglyphs output = reduceHomoglyphs(output); diff --git a/src/test/java/org/qortal/test/UnicodeTests.java b/src/test/java/org/qortal/test/UnicodeTests.java index 2e0f7968..f84b13d5 100644 --- a/src/test/java/org/qortal/test/UnicodeTests.java +++ b/src/test/java/org/qortal/test/UnicodeTests.java @@ -35,4 +35,41 @@ public class UnicodeTests { assertEquals("strings should match", Unicode.sanitize(input1), Unicode.sanitize(input2)); } + @Test + public void testEmojis() { + /* + * Emojis shouldn't reduce down to empty strings. + * + * 🥳 Face with Party Horn and Party Hat Emoji U+1F973 + */ + String emojis = "\uD83E\uDD73"; + + assertFalse(Unicode.sanitize(emojis).isBlank()); + } + + @Test + public void testSanitize() { + /* + * Check various code points that should be stripped out when sanitizing / reducing + */ + String enclosingCombiningMark = "\u1100\u1161\u20DD"; // \u20DD is an enclosing combining mark and should be removed + String spacingMark = "\u0A39\u0A3f"; // \u0A3f is spacing combining mark and should be removed + String nonspacingMark = "c\u0302"; // \u0302 is a non-spacing combining mark and should be removed + + assertNotSame(enclosingCombiningMark, Unicode.sanitize(enclosingCombiningMark)); + assertNotSame(spacingMark, Unicode.sanitize(spacingMark)); + assertNotSame(nonspacingMark, Unicode.sanitize(nonspacingMark)); + + String control = "\u001B\u009E"; // \u001B and \u009E are a control codes + String format = "\u202A\u2062"; // \u202A and \u2062 are zero-width formatting codes + String surrogate = "\uD800\uDFFF"; // surrogates + String privateUse = "\uE1E0"; // \uE000 - \uF8FF is private use area + String unassigned = "\uFAFA"; // \uFAFA is currently unassigned + + assertTrue(Unicode.sanitize(control).isBlank()); + assertTrue(Unicode.sanitize(format).isBlank()); + assertTrue(Unicode.sanitize(surrogate).isBlank()); + assertTrue(Unicode.sanitize(privateUse).isBlank()); + assertTrue(Unicode.sanitize(unassigned).isBlank()); + } } diff --git a/src/test/java/org/qortal/test/naming/IntegrityTests.java b/src/test/java/org/qortal/test/naming/IntegrityTests.java index 7a058deb..75612ae9 100644 --- a/src/test/java/org/qortal/test/naming/IntegrityTests.java +++ b/src/test/java/org/qortal/test/naming/IntegrityTests.java @@ -1,20 +1,26 @@ package org.qortal.test.naming; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.qortal.account.PrivateKeyAccount; import org.qortal.controller.repository.NamesDatabaseIntegrityCheck; +import org.qortal.data.naming.NameData; import org.qortal.data.transaction.*; import org.qortal.repository.DataException; import org.qortal.repository.Repository; +import org.qortal.repository.RepositoryFactory; import org.qortal.repository.RepositoryManager; +import org.qortal.repository.hsqldb.HSQLDBRepositoryFactory; +import org.qortal.settings.Settings; import org.qortal.test.common.Common; import org.qortal.test.common.TransactionUtils; import org.qortal.test.common.transaction.TestTransaction; import org.qortal.transaction.RegisterNameTransaction; import org.qortal.transaction.Transaction; -import org.qortal.utils.NTP; +import org.qortal.utils.Unicode; +import java.io.File; import java.util.List; import static org.junit.Assert.*; @@ -50,34 +56,6 @@ public class IntegrityTests extends Common { } } - @Test - public void testBlankReducedName() throws DataException { - try (final Repository repository = RepositoryManager.getRepository()) { - // Register-name - PrivateKeyAccount alice = Common.getTestAccount(repository, "alice"); - String name = "\uD83E\uDD73"; // Translates to a reducedName of "" - String data = "\uD83E\uDD73"; - - RegisterNameTransactionData transactionData = new RegisterNameTransactionData(TestTransaction.generateBase(alice), name, data); - transactionData.setFee(new RegisterNameTransaction(null, null).getUnitFee(transactionData.getTimestamp())); - TransactionUtils.signAndMint(repository, transactionData, alice); - - // Ensure the name exists and the data is correct - assertEquals(data, repository.getNameRepository().fromName(name).getData()); - - // Ensure the reducedName is blank - assertEquals("", repository.getNameRepository().fromName(name).getReducedName()); - - // Run the database integrity check for this name - NamesDatabaseIntegrityCheck integrityCheck = new NamesDatabaseIntegrityCheck(); - assertEquals(1, integrityCheck.rebuildName(name, repository)); - - // Ensure the name still exists and the data is still correct - assertEquals(data, repository.getNameRepository().fromName(name).getData()); - assertEquals("", repository.getNameRepository().fromName(name).getReducedName()); - } - } - @Test public void testUpdateWithBlankNewName() throws DataException { try (final Repository repository = RepositoryManager.getRepository()) { @@ -448,4 +426,46 @@ public class IntegrityTests extends Common { } } + @Ignore("Checks 'live' repository") + @Test + public void testRepository() throws DataException { + Settings.fileInstance("settings.json"); // use 'live' settings + + String repositoryUrlTemplate = "jdbc:hsqldb:file:%s" + File.separator + "blockchain;create=false;hsqldb.full_log_replay=true"; + String connectionUrl = String.format(repositoryUrlTemplate, Settings.getInstance().getRepositoryPath()); + RepositoryFactory repositoryFactory = new HSQLDBRepositoryFactory(connectionUrl); + RepositoryManager.setRepositoryFactory(repositoryFactory); + + try (final Repository repository = RepositoryManager.getRepository()) { + List names = repository.getNameRepository().getAllNames(); + + for (NameData nameData : names) { + String reReduced = Unicode.sanitize(nameData.getName()); + + if (reReduced.isBlank()) { + System.err.println(String.format("Name '%s' reduced to blank", + nameData.getName() + )); + } + + if (!nameData.getReducedName().equals(reReduced)) { + System.out.println(String.format("Name '%s' reduced form was '%s' but is now '%s'", + nameData.getName(), + nameData.getReducedName(), + reReduced + )); + + // ...but does another name already have this reduced form? + names.stream() + .filter(tmpNameData -> tmpNameData.getReducedName().equals(reReduced)) + .forEach(tmpNameData -> + System.err.println(String.format("Name '%s' new reduced form also matches name '%s'", + nameData.getName(), + tmpNameData.getName() + )) + ); + } + } + } + } }