forked from Qortal/qortal
Moved HTML parsing to new class.
This commit is contained in:
parent
f2feb12708
commit
e64a3978e6
115
src/main/java/org/qortal/api/HTMLParser.java
Normal file
115
src/main/java/org/qortal/api/HTMLParser.java
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
package org.qortal.api;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class HTMLParser {
|
||||||
|
|
||||||
|
private String linkPrefix;
|
||||||
|
|
||||||
|
public HTMLParser(String resourceId, boolean usePrefix) {
|
||||||
|
this.linkPrefix = usePrefix ? "/site/" + resourceId : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find relative links and prefix them with the resource ID, using Jsoup
|
||||||
|
* @param path
|
||||||
|
* @param data
|
||||||
|
* @return The data with links replaced
|
||||||
|
*/
|
||||||
|
public byte[] replaceRelativeLinks(String path, byte[] data) {
|
||||||
|
if (HTMLParser.isHtmlFile(path)) {
|
||||||
|
String fileContents = new String(data);
|
||||||
|
Document document = Jsoup.parse(fileContents);
|
||||||
|
|
||||||
|
Elements href = document.select("[href]");
|
||||||
|
for (Element element : href) {
|
||||||
|
String elementHtml = element.attr("href");
|
||||||
|
if (this.shouldReplaceLink(elementHtml)) {
|
||||||
|
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||||
|
element.attr("href", this.linkPrefix + slash + element.attr("href"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Elements src = document.select("[src]");
|
||||||
|
for (Element element : src) {
|
||||||
|
String elementHtml = element.attr("src");
|
||||||
|
if (this.shouldReplaceLink(elementHtml)) {
|
||||||
|
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||||
|
element.attr("src", this.linkPrefix + slash + element.attr("src"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Elements srcset = document.select("[srcset]");
|
||||||
|
for (Element element : srcset) {
|
||||||
|
String elementHtml = element.attr("srcset").trim();
|
||||||
|
if (this.shouldReplaceLink(elementHtml)) {
|
||||||
|
String[] parts = element.attr("srcset").split(",");
|
||||||
|
ArrayList<String> newParts = new ArrayList<>();
|
||||||
|
for (String part : parts) {
|
||||||
|
part = part.trim();
|
||||||
|
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||||
|
String newPart = this.linkPrefix + slash + part;
|
||||||
|
newParts.add(newPart);
|
||||||
|
}
|
||||||
|
String newString = String.join(",", newParts);
|
||||||
|
element.attr("srcset", newString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Elements style = document.select("[style]");
|
||||||
|
for (Element element : style) {
|
||||||
|
String elementHtml = element.attr("style");
|
||||||
|
if (elementHtml.contains("url(")) {
|
||||||
|
String[] parts = elementHtml.split("url\\(");
|
||||||
|
String[] parts2 = parts[1].split("\\)");
|
||||||
|
String link = parts2[0];
|
||||||
|
if (link != null) {
|
||||||
|
link = this.removeQuotes(link);
|
||||||
|
if (this.shouldReplaceLink(link)) {
|
||||||
|
String slash = (link.startsWith("/") ? "" : "/");
|
||||||
|
String modifiedLink = "url('" + this.linkPrefix + slash + link + "')";
|
||||||
|
element.attr("style", parts[0] + modifiedLink + parts2[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return document.html().getBytes();
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean shouldReplaceLink(String elementHtml) {
|
||||||
|
List<String> prefixes = new ArrayList<>();
|
||||||
|
prefixes.add("http"); // Don't modify absolute links
|
||||||
|
prefixes.add("//"); // Don't modify absolute links
|
||||||
|
prefixes.add("javascript:"); // Don't modify javascript
|
||||||
|
prefixes.add("../"); // Don't modify valid relative links
|
||||||
|
for (String prefix : prefixes) {
|
||||||
|
if (elementHtml.startsWith(prefix)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String removeQuotes(String elementHtml) {
|
||||||
|
if (elementHtml.startsWith("\"") || elementHtml.startsWith("\'")) {
|
||||||
|
elementHtml = elementHtml.substring(1);
|
||||||
|
}
|
||||||
|
if (elementHtml.endsWith("\"") || elementHtml.endsWith("\'")) {
|
||||||
|
elementHtml = elementHtml.substring(0, elementHtml.length() - 1);
|
||||||
|
}
|
||||||
|
return elementHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isHtmlFile(String path) {
|
||||||
|
if (path.endsWith(".html") || path.endsWith(".htm")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
@ -27,6 +27,7 @@ import org.jsoup.nodes.Element;
|
|||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import org.qortal.api.ApiError;
|
import org.qortal.api.ApiError;
|
||||||
import org.qortal.api.ApiExceptionFactory;
|
import org.qortal.api.ApiExceptionFactory;
|
||||||
|
import org.qortal.api.HTMLParser;
|
||||||
import org.qortal.api.Security;
|
import org.qortal.api.Security;
|
||||||
import org.qortal.block.BlockChain;
|
import org.qortal.block.BlockChain;
|
||||||
import org.qortal.crypto.Crypto;
|
import org.qortal.crypto.Crypto;
|
||||||
@ -245,13 +246,13 @@ public class WebsiteResource {
|
|||||||
@GET
|
@GET
|
||||||
@Path("{resource}")
|
@Path("{resource}")
|
||||||
public HttpServletResponse getResourceIndex(@PathParam("resource") String resourceId) {
|
public HttpServletResponse getResourceIndex(@PathParam("resource") String resourceId) {
|
||||||
return this.get(resourceId, "/");
|
return this.get(resourceId, "/", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("{resource}/{path:.*}")
|
@Path("{resource}/{path:.*}")
|
||||||
public HttpServletResponse getResourcePath(@PathParam("resource") String resourceId, @PathParam("path") String inPath) {
|
public HttpServletResponse getResourcePath(@PathParam("resource") String resourceId, @PathParam("path") String inPath) {
|
||||||
return this.get(resourceId, inPath);
|
return this.get(resourceId, inPath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private HttpServletResponse get(String resourceId, String inPath) {
|
private HttpServletResponse get(String resourceId, String inPath) {
|
||||||
@ -314,10 +315,11 @@ public class WebsiteResource {
|
|||||||
String filename = this.getFilename(unzippedPath, inPath);
|
String filename = this.getFilename(unzippedPath, inPath);
|
||||||
String filePath = unzippedPath + File.separator + filename;
|
String filePath = unzippedPath + File.separator + filename;
|
||||||
|
|
||||||
if (this.isHtmlFile(filename)) {
|
if (HTMLParser.isHtmlFile(filename)) {
|
||||||
// HTML file - needs to be parsed
|
// HTML file - needs to be parsed
|
||||||
byte[] data = Files.readAllBytes(Paths.get(filePath)); // TODO: limit file size that can be read into memory
|
byte[] data = Files.readAllBytes(Paths.get(filePath)); // TODO: limit file size that can be read into memory
|
||||||
data = this.replaceRelativeLinks(filename, data, resourceId);
|
HTMLParser htmlParser = new HTMLParser(resourceId, usePrefix);
|
||||||
|
data = htmlParser.replaceRelativeLinks(filename, data);
|
||||||
response.setContentType(context.getMimeType(filename));
|
response.setContentType(context.getMimeType(filename));
|
||||||
response.setContentLength(data.length);
|
response.setContentLength(data.length);
|
||||||
response.getOutputStream().write(data);
|
response.getOutputStream().write(data);
|
||||||
@ -384,96 +386,6 @@ public class WebsiteResource {
|
|||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Find relative links and prefix them with the resource ID, using Jsoup
|
|
||||||
* @param path
|
|
||||||
* @param data
|
|
||||||
* @param resourceId
|
|
||||||
* @return The data with links replaced
|
|
||||||
*/
|
|
||||||
private byte[] replaceRelativeLinks(String path, byte[] data, String resourceId) {
|
|
||||||
if (this.isHtmlFile(path)) {
|
|
||||||
String fileContents = new String(data);
|
|
||||||
Document document = Jsoup.parse(fileContents);
|
|
||||||
|
|
||||||
Elements href = document.select("[href]");
|
|
||||||
for (Element element : href) {
|
|
||||||
String elementHtml = element.attr("href");
|
|
||||||
if (this.shouldReplaceLink(elementHtml)) {
|
|
||||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
|
||||||
element.attr("href", "/site/" +resourceId + slash + element.attr("href"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Elements src = document.select("[src]");
|
|
||||||
for (Element element : src) {
|
|
||||||
String elementHtml = element.attr("src");
|
|
||||||
if (this.shouldReplaceLink(elementHtml)) {
|
|
||||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
|
||||||
element.attr("src", "/site/" +resourceId + slash + element.attr("src"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Elements srcset = document.select("[srcset]");
|
|
||||||
for (Element element : srcset) {
|
|
||||||
String elementHtml = element.attr("srcset").trim();
|
|
||||||
if (this.shouldReplaceLink(elementHtml)) {
|
|
||||||
String[] parts = element.attr("srcset").split(",");
|
|
||||||
ArrayList<String> newParts = new ArrayList<>();
|
|
||||||
for (String part : parts) {
|
|
||||||
part = part.trim();
|
|
||||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
|
||||||
String newPart = "/site/" +resourceId + slash + part;
|
|
||||||
newParts.add(newPart);
|
|
||||||
}
|
|
||||||
String newString = String.join(",", newParts);
|
|
||||||
element.attr("srcset", newString);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Elements style = document.select("[style]");
|
|
||||||
for (Element element : style) {
|
|
||||||
String elementHtml = element.attr("style");
|
|
||||||
if (elementHtml.contains("url(")) {
|
|
||||||
String[] parts = elementHtml.split("url\\(");
|
|
||||||
String[] parts2 = parts[1].split("\\)");
|
|
||||||
String link = parts2[0];
|
|
||||||
if (link != null) {
|
|
||||||
link = this.removeQuotes(link);
|
|
||||||
if (this.shouldReplaceLink(link)) {
|
|
||||||
String slash = (link.startsWith("/") ? "" : "/");
|
|
||||||
String modifiedLink = "url('" + "/site/" + resourceId + slash + link + "')";
|
|
||||||
element.attr("style", parts[0] + modifiedLink + parts2[1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return document.html().getBytes();
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean shouldReplaceLink(String elementHtml) {
|
|
||||||
List<String> prefixes = new ArrayList<>();
|
|
||||||
prefixes.add("http"); // Don't modify absolute links
|
|
||||||
prefixes.add("//"); // Don't modify absolute links
|
|
||||||
prefixes.add("javascript:"); // Don't modify javascript
|
|
||||||
prefixes.add("../"); // Don't modify valid relative links
|
|
||||||
for (String prefix : prefixes) {
|
|
||||||
if (elementHtml.startsWith(prefix)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String removeQuotes(String elementHtml) {
|
|
||||||
if (elementHtml.startsWith("\"") || elementHtml.startsWith("\'")) {
|
|
||||||
elementHtml = elementHtml.substring(1);
|
|
||||||
}
|
|
||||||
if (elementHtml.endsWith("\"") || elementHtml.endsWith("\'")) {
|
|
||||||
elementHtml = elementHtml.substring(0, elementHtml.length() - 1);
|
|
||||||
}
|
|
||||||
return elementHtml;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> indexFiles() {
|
private List<String> indexFiles() {
|
||||||
List<String> indexFiles = new ArrayList<>();
|
List<String> indexFiles = new ArrayList<>();
|
||||||
indexFiles.add("index.html");
|
indexFiles.add("index.html");
|
||||||
@ -485,11 +397,4 @@ public class WebsiteResource {
|
|||||||
return indexFiles;
|
return indexFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isHtmlFile(String path) {
|
|
||||||
if (path.endsWith(".html") || path.endsWith(".htm")) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user