From bd1b63191462ff387e27c1f7d6d268a5c5439a4f Mon Sep 17 00:00:00 2001 From: CalDescent Date: Sun, 2 Jan 2022 15:22:53 +0000 Subject: [PATCH] Use in HTMLParser, rather than attempting to swap out every relative link This delegates the task to the browser rather than doing it in java. It should also catch a few remaining types of links that we had missed - e.g. ones that originate from within js files. --- src/main/java/org/qortal/api/HTMLParser.java | 107 ++---------------- .../arbitrary/ArbitraryDataRenderer.java | 2 +- 2 files changed, 11 insertions(+), 98 deletions(-) diff --git a/src/main/java/org/qortal/api/HTMLParser.java b/src/main/java/org/qortal/api/HTMLParser.java index 38a6e951..51e0854e 100644 --- a/src/main/java/org/qortal/api/HTMLParser.java +++ b/src/main/java/org/qortal/api/HTMLParser.java @@ -4,15 +4,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - public class HTMLParser { private static final Logger LOGGER = LogManager.getLogger(HTMLParser.class); @@ -26,97 +19,17 @@ public class HTMLParser { this.data = data; } - /** - * Find relative links and prefix them with the resource ID, using Jsoup - * @param path - * @return The data with links replaced - */ - public void replaceRelativeLinks(String path) { - if (HTMLParser.isHtmlFile(path)) { - String fileContents = new String(data); - Document document = Jsoup.parse(fileContents); - - Elements href = document.select("[href]"); - for (Element element : href) { - String elementHtml = element.attr("href"); - if (this.shouldReplaceLink(elementHtml)) { - String slash = (elementHtml.startsWith("/") ? "" : "/"); - element.attr("href", this.linkPrefix + slash + element.attr("href")); - } - } - Elements src = document.select("[src]"); - for (Element element : src) { - String elementHtml = element.attr("src"); - if (this.shouldReplaceLink(elementHtml)) { - String slash = (elementHtml.startsWith("/") ? "" : "/"); - element.attr("src", this.linkPrefix + slash + element.attr("src")); - } - } - Elements srcset = document.select("[srcset]"); - for (Element element : srcset) { - String elementHtml = element.attr("srcset").trim(); - if (this.shouldReplaceLink(elementHtml)) { - String[] parts = element.attr("srcset").split(","); - ArrayList newParts = new ArrayList<>(); - for (String part : parts) { - part = part.trim(); - String slash = (elementHtml.startsWith("/") ? "" : "/"); - String newPart = this.linkPrefix + slash + part; - newParts.add(newPart); - } - String newString = String.join(",", newParts); - element.attr("srcset", newString); - } - } - Elements style = document.select("[style]"); - for (Element element : style) { - String elementHtml = element.attr("style"); - if (elementHtml.contains("url(")) { - String[] parts = elementHtml.split("url\\("); - String[] parts2 = parts[1].split("\\)"); - String link = parts2[0]; - if (link != null) { - link = this.removeQuotes(link); - if (this.shouldReplaceLink(link)) { - String slash = (link.startsWith("/") ? "" : "/"); - String modifiedLink = "url('" + this.linkPrefix + slash + link + "')"; - element.attr("style", parts[0] + modifiedLink + parts2[1]); - } - } - } - } - String html = document.html(); - html = this.replaceAmpersands(html); - this.data = html.getBytes(); + public void setDocumentBaseUrl() { + String fileContents = new String(data); + Document document = Jsoup.parse(fileContents); + String baseUrl = this.linkPrefix + "/"; + Elements head = document.getElementsByTag("head"); + if (!head.isEmpty()) { + String baseElement = String.format("", baseUrl); + head.get(0).prepend(baseElement); } - } - - private String replaceAmpersands(String html) { - return html.replace("&", "&"); - } - - private boolean shouldReplaceLink(String elementHtml) { - List prefixes = new ArrayList<>(); - prefixes.add("http"); // Don't modify absolute links - prefixes.add("//"); // Don't modify absolute links - prefixes.add("javascript:"); // Don't modify javascript - prefixes.add("../"); // Don't modify valid relative links - for (String prefix : prefixes) { - if (elementHtml.startsWith(prefix)) { - return false; - } - } - return true; - } - - private String removeQuotes(String elementHtml) { - if (elementHtml.startsWith("\"") || elementHtml.startsWith("\'")) { - elementHtml = elementHtml.substring(1); - } - if (elementHtml.endsWith("\"") || elementHtml.endsWith("\'")) { - elementHtml = elementHtml.substring(0, elementHtml.length() - 1); - } - return elementHtml; + String html = document.html(); + this.data = html.getBytes(); } public static boolean isHtmlFile(String path) { diff --git a/src/main/java/org/qortal/arbitrary/ArbitraryDataRenderer.java b/src/main/java/org/qortal/arbitrary/ArbitraryDataRenderer.java index 7db3b233..67b4c42b 100644 --- a/src/main/java/org/qortal/arbitrary/ArbitraryDataRenderer.java +++ b/src/main/java/org/qortal/arbitrary/ArbitraryDataRenderer.java @@ -119,7 +119,7 @@ public class ArbitraryDataRenderer { // HTML file - needs to be parsed byte[] data = Files.readAllBytes(Paths.get(filePath)); // TODO: limit file size that can be read into memory HTMLParser htmlParser = new HTMLParser(resourceId, inPath, prefix, usePrefix, data); - htmlParser.replaceRelativeLinks(filename); + htmlParser.setDocumentBaseUrl(); response.setContentType(context.getMimeType(filename)); response.setContentLength(htmlParser.getData().length); response.getOutputStream().write(htmlParser.getData());