BufferPool = new SoftPool<>(() -> new byte[BufferSize]);
+
+ private byte @Nullable [] byteBuf; // the byte buffer; recycled via SoftPool. Created in fill if required
+ private int bufPos;
+ private int bufLength;
+ private int bufMark = -1;
+ private boolean inReadFully = false; // true when the underlying inputstream has been read fully
+
+ SimpleBufferedInput(InputStream in) {
+ super(in);
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (bufPos >= bufLength) {
+ fill();
+ if (bufPos >= bufLength)
+ return -1;
+ }
+ return getBuf()[bufPos++] & 0xff;
+ }
+
+ @Override
+ public int read(byte[] dest, int offset, int desiredLen) throws IOException {
+ Validate.notNull(dest);
+ if (offset < 0 || desiredLen < 0 || desiredLen > dest.length - offset) {
+ throw new IndexOutOfBoundsException();
+ } else if (desiredLen == 0) {
+ return 0;
+ }
+
+ int bufAvail = bufLength - bufPos;
+ if (bufAvail <= 0) { // can't serve from the buffer
+ if (!inReadFully && bufMark < 0) {
+ // skip creating / copying into a local buffer; just pass through
+ int read = in.read(dest, offset, desiredLen);
+ closeIfDone(read);
+ return read;
+ }
+ fill();
+ bufAvail = bufLength - bufPos;
+ }
+
+ int read = Math.min(bufAvail, desiredLen);
+ if (read <= 0) {
+ return -1;
+ }
+
+ System.arraycopy(getBuf(), bufPos, dest, offset, read);
+ bufPos += read;
+ return read;
+ }
+
+ private void fill() throws IOException {
+ if (inReadFully) return;
+ if (byteBuf == null) { // get one on first demand
+ byteBuf = BufferPool.borrow();
+ }
+
+ if (bufMark < 0) { // no mark, can lose buffer (assumes we've read to bufLen)
+ bufPos = 0;
+ } else if (bufPos >= BufferSize) { // no room left in buffer
+ if (bufMark > 0) { // can throw away early part of the buffer
+ int size = bufPos - bufMark;
+ System.arraycopy(byteBuf, bufMark, byteBuf, 0, size);
+ bufPos = size;
+ bufMark = 0;
+ } else { // invalidate mark
+ bufMark = -1;
+ bufPos = 0;
+ }
+ }
+ bufLength = bufPos;
+ int read = in.read(byteBuf, bufPos, byteBuf.length - bufPos);
+ if (read > 0) {
+ bufLength = read + bufPos;
+ while (byteBuf.length - bufLength > 0) { // read in more if we have space, without blocking
+ if (in.available() < 1) break;
+ read = in.read(byteBuf, bufLength, byteBuf.length - bufLength);
+ if (read <= 0) break;
+ bufLength += read;
+ }
+ }
+ closeIfDone(read);
+ }
+
+ private void closeIfDone(int read) throws IOException {
+ if (read == -1) {
+ inReadFully = true;
+ super.close(); // close underlying stream immediately; frees resources a little earlier
+ }
+ }
+
+ byte[] getBuf() {
+ Validate.notNull(byteBuf);
+ return byteBuf;
+ }
+
+ /**
+ Check if the underlying InputStream has been read fully. There may still content in this buffer to be consumed.
+ @return true if the underlying inputstream has been read fully.
+ */
+ boolean baseReadFully() {
+ return inReadFully;
+ }
+
+ @Override
+ public int available() throws IOException {
+ if (byteBuf != null && bufLength - bufPos > 0)
+ return bufLength - bufPos; // doesn't include those in.available(), but mostly used as a block test
+ return inReadFully ? 0 : in.available();
+ }
+
+ @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") // explicitly not synced
+ @Override
+ public void mark(int readlimit) {
+ if (readlimit > BufferSize) {
+ throw new IllegalArgumentException("Read-ahead limit is greater than buffer size");
+ }
+ bufMark = bufPos;
+ }
+
+ @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") // explicitly not synced
+ @Override
+ public void reset() throws IOException {
+ if (bufMark < 0)
+ throw new IOException("Resetting to invalid mark");
+ bufPos = bufMark;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ if (byteBuf == null) return; // already closed, or never allocated
+ BufferPool.release(byteBuf); // return the buffer to the pool
+ byteBuf = null; // NPE further attempts to read
+ }
+}
diff --git a/src/main/java/org/jsoup/internal/SoftPool.java b/src/main/java/org/jsoup/internal/SoftPool.java
new file mode 100644
index 0000000000..1125b7d1a0
--- /dev/null
+++ b/src/main/java/org/jsoup/internal/SoftPool.java
@@ -0,0 +1,66 @@
+package org.jsoup.internal;
+
+import java.lang.ref.SoftReference;
+import java.util.Stack;
+import java.util.function.Supplier;
+
+/**
+ A SoftPool is a ThreadLocal that holds a SoftReference to a pool of initializable objects. This allows us to reuse
+ expensive objects (buffers, etc.) between invocations (the ThreadLocal), but also for those objects to be reaped if
+ they are no longer in use.
+ Like a ThreadLocal, should be stored in a static field.
+ @param the type of object to pool.
+ @since 1.18.2
+ */
+public class SoftPool {
+ final ThreadLocal>> threadLocalStack;
+ private final Supplier initializer;
+ /**
+ How many total uses of the creating object might be instantiated on the same thread at once. More than this and
+ those objects aren't recycled. Doesn't need to be too conservative, as they can still be GCed as SoftRefs.
+ */
+ static final int MaxIdle = 12;
+
+ /**
+ Create a new SoftPool.
+ @param initializer a supplier that creates a new object when one is needed.
+ */
+ public SoftPool(Supplier initializer) {
+ this.initializer = initializer;
+ this.threadLocalStack = ThreadLocal.withInitial(() -> new SoftReference<>(new Stack<>()));
+ }
+
+ /**
+ Borrow an object from the pool, creating a new one if the pool is empty. Make sure to release it back to the pool
+ when done, so that it can be reused.
+ @return an object from the pool, as defined by the initializer.
+ */
+ public T borrow() {
+ Stack stack = getStack();
+ if (!stack.isEmpty()) {
+ return stack.pop();
+ }
+ return initializer.get();
+ }
+
+ /**
+ Release an object back to the pool. If the pool is full, the object is not retained. If you don't want to reuse a
+ borrowed object (for e.g. a StringBuilder that grew too large), just don't release it.
+ @param value the object to release back to the pool.
+ */
+ public void release(T value) {
+ Stack stack = getStack();
+ if (stack.size() < MaxIdle) {
+ stack.push(value);
+ }
+ }
+
+ Stack getStack() {
+ Stack stack = threadLocalStack.get().get();
+ if (stack == null) {
+ stack = new Stack<>();
+ threadLocalStack.set(new SoftReference<>(stack));
+ }
+ return stack;
+ }
+}
diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java
index cb2ebec97d..9d6b0154c3 100644
--- a/src/main/java/org/jsoup/internal/StringUtil.java
+++ b/src/main/java/org/jsoup/internal/StringUtil.java
@@ -8,7 +8,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
-import java.util.Stack;
import java.util.regex.Pattern;
import java.util.stream.Collector;
import java.util.stream.Collectors;
@@ -337,7 +336,10 @@ private static String stripControlChars(final String input) {
return controlChars.matcher(input).replaceAll("");
}
- private static final ThreadLocal> threadLocalBuilders = ThreadLocal.withInitial(Stack::new);
+ private static final int InitBuilderSize = 1024;
+ private static final int MaxBuilderSize = 8 * 1024;
+ private static final SoftPool BuilderPool = new SoftPool<>(
+ () -> new StringBuilder(InitBuilderSize));
/**
* Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is
@@ -347,10 +349,7 @@ private static String stripControlChars(final String input) {
* @return an empty StringBuilder
*/
public static StringBuilder borrowBuilder() {
- Stack builders = threadLocalBuilders.get();
- return builders.empty() ?
- new StringBuilder(MaxCachedBuilderSize) :
- builders.pop();
+ return BuilderPool.borrow();
}
/**
@@ -363,17 +362,12 @@ public static String releaseBuilder(StringBuilder sb) {
Validate.notNull(sb);
String string = sb.toString();
- if (sb.length() > MaxCachedBuilderSize)
- sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big
- else
+ // if it hasn't grown too big, reset it and return it to the pool:
+ if (sb.length() <= MaxBuilderSize) {
sb.delete(0, sb.length()); // make sure it's emptied on release
-
- Stack builders = threadLocalBuilders.get();
- builders.push(sb);
-
- while (builders.size() > MaxIdleBuilders) {
- builders.pop();
+ BuilderPool.release(sb);
}
+
return string;
}
@@ -394,6 +388,4 @@ public static String releaseBuilder(StringBuilder sb) {
StringJoiner::complete);
}
- private static final int MaxCachedBuilderSize = 8 * 1024;
- private static final int MaxIdleBuilders = 8;
}
diff --git a/src/main/java/org/jsoup/parser/CharacterReader.java b/src/main/java/org/jsoup/parser/CharacterReader.java
index 9710c414a9..1e015b0c06 100644
--- a/src/main/java/org/jsoup/parser/CharacterReader.java
+++ b/src/main/java/org/jsoup/parser/CharacterReader.java
@@ -2,6 +2,7 @@
import org.jsoup.UncheckedIOException;
import org.jsoup.helper.Validate;
+import org.jsoup.internal.SoftPool;
import org.jspecify.annotations.Nullable;
import java.io.IOException;
@@ -17,38 +18,43 @@
*/
public final class CharacterReader {
static final char EOF = (char) -1;
- private static final int maxStringCacheLen = 12;
- static final int maxBufferLen = 1024 * 32; // visible for testing
- static final int readAheadLimit = (int) (maxBufferLen * 0.75); // visible for testing
- private static final int minReadAheadLen = 1024; // the minimum mark length supported. No HTML entities can be larger than this.
-
- private char[] charBuf;
- private Reader reader;
- private int bufLength;
- private int bufSplitPoint;
- private int bufPos;
- private int readerPos;
- private int bufMark = -1;
- private static final int stringCacheSize = 512;
- private String[] stringCache = new String[stringCacheSize]; // holds reused strings in this doc, to lessen garbage
+ private static final int MaxStringCacheLen = 12;
+ private static final int StringCacheSize = 512;
+ private String[] stringCache; // holds reused strings in this doc, to lessen garbage
+ private static final SoftPool StringPool = new SoftPool<>(() -> new String[StringCacheSize]); // reuse cache between iterations
+
+ static final int BufferSize = 1024 * 2; // visible for testing
+ static final int RefillPoint = BufferSize / 2; // when bufPos characters read, refill; visible for testing
+ private static final int RewindLimit = 1024; // the maximum we can rewind. No HTML entities can be larger than this.
+
+ private Reader reader; // underlying Reader, will be backed by a buffered+controlled input stream, or StringReader
+ private char[] charBuf; // character buffer we consume from; filled from Reader
+ private int bufPos; // position in charBuf that's been consumed to
+ private int bufLength; // the num of characters actually buffered in charBuf, <= charBuf.length
+ private int fillPoint = 0; // how far into the charBuf we read before re-filling. 0.5 of charBuf.length after bufferUp
+ private int consumed; // how many characters total have been consumed from this CharacterReader (less the current bufPos)
+ private int bufMark = -1; // if not -1, the marked rewind position
+ private boolean readFully; // if the underlying stream has been completely read, no value in further buffering
+
+ private static final SoftPool BufferPool = new SoftPool<>(() -> new char[BufferSize]); // recycled char buffer
@Nullable private ArrayList newlinePositions = null; // optionally track the pos() position of newlines - scans during bufferUp()
private int lineNumberOffset = 1; // line numbers start at 1; += newlinePosition[indexof(pos)]
public CharacterReader(Reader input, int sz) {
- Validate.notNull(input);
- Validate.isTrue(input.markSupported(), "The supplied Reader must support mark(), but does not.");
- reader = input;
- charBuf = new char[Math.min(sz, maxBufferLen)];
- bufferUp();
+ this(input); // sz is no longer used
}
public CharacterReader(Reader input) {
- this(input, maxBufferLen);
+ Validate.notNull(input);
+ reader = input;
+ charBuf = BufferPool.borrow();
+ stringCache = StringPool.borrow();
+ bufferUp();
}
public CharacterReader(String input) {
- this(new StringReader(input), input.length());
+ this(new StringReader(input));
}
public void close() {
@@ -59,61 +65,79 @@ public void close() {
} catch (IOException ignored) {
} finally {
reader = null;
+ Arrays.fill(charBuf, (char) 0); // before release, clear the buffer. Not required, but acts as a safety net, and makes debug view clearer
+ BufferPool.release(charBuf);
charBuf = null;
+ StringPool.release(stringCache); // conversely, we don't clear the string cache, so we can reuse the contents
stringCache = null;
}
}
- private boolean readFully; // if the underlying stream has been completely read, no value in further buffering
private void bufferUp() {
- if (readFully || bufPos < bufSplitPoint)
+ if (readFully || bufPos < fillPoint || bufMark != -1)
return;
-
- final int pos;
- final int offset;
- if (bufMark != -1) {
- pos = bufMark;
- offset = bufPos - bufMark;
- } else {
- pos = bufPos;
- offset = 0;
- }
-
- try {
- final long skipped = reader.skip(pos);
- reader.mark(maxBufferLen);
- int read = 0;
- while (read <= minReadAheadLen) {
- int thisRead = reader.read(charBuf, read, charBuf.length - read);
- if (thisRead == -1)
+ doBufferUp(); // structured so bufferUp may become an intrinsic candidate
+ }
+
+ private void doBufferUp() {
+ /*
+ The flow:
+ - if read fully, or if bufPos < fillPoint, or if marked - do not fill.
+ - update readerPos (total amount consumed from this CharacterReader) += bufPos
+ - shift charBuf contents such that bufPos = 0; set next read offset (bufLength) -= shift amount
+ - loop read the Reader until we fill charBuf. bufLength += read.
+ - readFully = true when read = -1
+ */
+ consumed += bufPos;
+ bufLength -= bufPos;
+ if (bufLength > 0)
+ System.arraycopy(charBuf, bufPos, charBuf, 0, bufLength);
+ bufPos = 0;
+ while (bufLength < BufferSize) {
+ try {
+ int read = reader.read(charBuf, bufLength, charBuf.length - bufLength);
+ if (read == -1) {
readFully = true;
- if (thisRead <= 0)
break;
- read += thisRead;
- }
- reader.reset();
- if (read > 0) {
- Validate.isTrue(skipped == pos); // Previously asserted that there is room in buf to skip, so this will be a WTF
- bufLength = read;
- readerPos += pos;
- bufPos = offset;
- if (bufMark != -1)
- bufMark = 0;
- bufSplitPoint = Math.min(bufLength, readAheadLimit);
+ }
+ bufLength += read;
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
}
- } catch (IOException e) {
- throw new UncheckedIOException(e);
}
+ fillPoint = Math.min(bufLength, RefillPoint);
+
scanBufferForNewlines(); // if enabled, we index newline positions for line number tracking
lastIcSeq = null; // cache for last containsIgnoreCase(seq)
}
+ void mark() {
+ // make sure there is enough look ahead capacity
+ if (bufLength - bufPos < RewindLimit)
+ fillPoint = 0;
+
+ bufferUp();
+ bufMark = bufPos;
+ }
+
+ void unmark() {
+ bufMark = -1;
+ }
+
+ void rewindToMark() {
+ if (bufMark == -1)
+ throw new UncheckedIOException(new IOException("Mark invalid"));
+
+ bufPos = bufMark;
+ unmark();
+ }
+
/**
* Gets the position currently read to in the content. Starts at 0.
* @return current position
*/
public int pos() {
- return readerPos + bufPos;
+ return consumed + bufPos;
}
/** Tests if the buffer has been fully read. */
@@ -131,7 +155,7 @@ boolean readFully() {
*/
public void trackNewlines(boolean track) {
if (track && newlinePositions == null) {
- newlinePositions = new ArrayList<>(maxBufferLen / 80); // rough guess of likely count
+ newlinePositions = new ArrayList<>(BufferSize / 80); // rough guess of likely count
scanBufferForNewlines(); // first pass when enabled; subsequently called during bufferUp
}
else if (!track)
@@ -216,7 +240,7 @@ private void scanBufferForNewlines() {
if (newlinePositions.size() > 0) {
// work out the line number that we have read up to (as we have likely scanned past this point)
- int index = lineNumIndex(readerPos);
+ int index = lineNumIndex(consumed);
if (index == -1) index = 0; // first line
int linePos = newlinePositions.get(index);
lineNumberOffset += index; // the num lines we've read up to
@@ -226,7 +250,7 @@ private void scanBufferForNewlines() {
for (int i = bufPos; i < bufLength; i++) {
if (charBuf[i] == '\n')
- newlinePositions.add(1 + readerPos + i);
+ newlinePositions.add(1 + consumed + i);
}
}
@@ -276,27 +300,6 @@ public void advance() {
bufPos++;
}
- void mark() {
- // make sure there is enough look ahead capacity
- if (bufLength - bufPos < minReadAheadLen)
- bufSplitPoint = 0;
-
- bufferUp();
- bufMark = bufPos;
- }
-
- void unmark() {
- bufMark = -1;
- }
-
- void rewindToMark() {
- if (bufMark == -1)
- throw new UncheckedIOException(new IOException("Mark invalid"));
-
- bufPos = bufMark;
- unmark();
- }
-
/**
* Returns the number of characters between the current position and the next instance of the input char
* @param c scan target
@@ -716,20 +719,20 @@ public String toString() {
* some more duplicates.
*/
private static String cacheString(final char[] charBuf, final String[] stringCache, final int start, final int count) {
- // limit (no cache):
- if (count > maxStringCacheLen)
+ if (count > MaxStringCacheLen) // don't cache strings that are too big
return new String(charBuf, start, count);
if (count < 1)
return "";
// calculate hash:
int hash = 0;
- for (int i = 0; i < count; i++) {
- hash = 31 * hash + charBuf[start + i];
+ int end = count + start;
+ for (int i = start; i < end; i++) {
+ hash = 31 * hash + charBuf[i];
}
// get from cache
- final int index = hash & stringCacheSize - 1;
+ final int index = hash & StringCacheSize - 1;
String cached = stringCache[index];
if (cached != null && rangeEquals(charBuf, start, count, cached)) // positive hit
diff --git a/src/test/java/org/jsoup/helper/DataUtilTest.java b/src/test/java/org/jsoup/helper/DataUtilTest.java
index a588c98e0e..a583ddc0af 100644
--- a/src/test/java/org/jsoup/helper/DataUtilTest.java
+++ b/src/test/java/org/jsoup/helper/DataUtilTest.java
@@ -2,6 +2,7 @@
import org.jsoup.Jsoup;
import org.jsoup.integration.ParseTest;
+import org.jsoup.internal.ControllableInputStream;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.junit.jupiter.api.Test;
@@ -37,12 +38,12 @@ public void testQuotedCharset() {
assertEquals("UTF-8", DataUtil.getCharsetFromContentType("text/html; charset='UTF-8'"));
}
- private InputStream stream(String data) {
- return new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
+ private ControllableInputStream stream(String data) {
+ return ControllableInputStream.wrap(new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)), 0);
}
- private InputStream stream(String data, String charset) {
- return new ByteArrayInputStream(data.getBytes(Charset.forName(charset)));
+ private ControllableInputStream stream(String data, String charset) {
+ return ControllableInputStream.wrap(new ByteArrayInputStream(data.getBytes(Charset.forName(charset))), 0);
}
@Test
@@ -143,7 +144,8 @@ public void parseSequenceInputStream() throws IOException {
stream(firstPart),
stream(secondPart)
);
- Document doc = DataUtil.parseInputStream(sequenceStream, null, "", Parser.htmlParser());
+ ControllableInputStream stream = ControllableInputStream.wrap(sequenceStream, 0);
+ Document doc = DataUtil.parseInputStream(stream, null, "", Parser.htmlParser());
assertEquals(fileContent, doc.outerHtml());
}
@@ -331,7 +333,7 @@ void handlesUnlimitedRead() throws IOException {
VaryingReadInputStream stream = new VaryingReadInputStream(ParseTest.inputStreamFrom(input));
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(stream, 0);
- String read = new String(byteBuffer.array());
+ String read = new String(byteBuffer.array(), 0, byteBuffer.limit(), StandardCharsets.UTF_8);
assertEquals(input, read);
}
diff --git a/src/test/java/org/jsoup/integration/ConnectIT.java b/src/test/java/org/jsoup/integration/ConnectIT.java
index 92c10c1b40..672fed2911 100644
--- a/src/test/java/org/jsoup/integration/ConnectIT.java
+++ b/src/test/java/org/jsoup/integration/ConnectIT.java
@@ -54,6 +54,7 @@ public void canInterruptBodyStringRead() throws InterruptedException {
@Test
public void canInterruptDocumentRead() throws InterruptedException {
// todo - implement in interruptable channels, so it's immediate
+ long start = System.currentTimeMillis();
final String[] body = new String[1];
Thread runner = new Thread(() -> {
try {
@@ -68,12 +69,15 @@ public void canInterruptDocumentRead() throws InterruptedException {
});
runner.start();
- Thread.sleep(1000 * 3);
+ Thread.sleep(3 * 1000);
runner.interrupt();
assertTrue(runner.isInterrupted());
runner.join();
- assertEquals(0, body[0].length()); // doesn't read a failed doc
+ long end = System.currentTimeMillis();
+ // check we are between 3 and connect timeout seconds (should be just over 3; but allow some slack for slow CI runners)
+ assertTrue(end - start > 3 * 1000);
+ assertTrue(end - start < 10 * 1000);
}
@Test public void canInterruptThenJoinASpawnedThread() throws InterruptedException {
@@ -184,6 +188,8 @@ public void infiniteReadSupported() throws IOException {
assertTrue(caught);
}
+ private static final int LargeHtmlSize = 280735;
+
@Test
public void remainingAfterFirstRead() throws IOException {
int bufferSize = 5 * 1024;
@@ -214,33 +220,34 @@ public void remainingAfterFirstRead() throws IOException {
// bodyStream is not capped to body size - only for jsoup consumed stream
assertTrue(fullArray.length > capSize);
- assertEquals(280735, fullArray.length);
- String fullText = new String(fullArray, StandardCharsets.UTF_8);
+ assertEquals(LargeHtmlSize, fullRead.limit());
+ String fullText = new String(fullRead.array(), 0, fullRead.limit(), StandardCharsets.UTF_8);
assertTrue(fullText.startsWith(firstText));
+ assertEquals(LargeHtmlSize, fullText.length());
}
}
@Test
public void noLimitAfterFirstRead() throws IOException {
- int bufferSize = 5 * 1024;
+ int firstMaxRead = 5 * 1024;
String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
try (BufferedInputStream stream = Jsoup.connect(url).execute().bodyStream()) {
// simulates parse which does a limited read first
- stream.mark(bufferSize);
- ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize);
+ stream.mark(firstMaxRead);
+ ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, firstMaxRead);
byte[] array = firstBytes.array();
String firstText = new String(array, StandardCharsets.UTF_8);
assertTrue(firstText.startsWith("Large"));
- assertEquals(bufferSize, array.length);
+ assertEquals(firstMaxRead, array.length);
// reset and read fully
stream.reset();
ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0);
- byte[] fullArray = fullRead.array();
- assertEquals(280735, fullArray.length);
- String fullText = new String(fullArray, StandardCharsets.UTF_8);
+ assertEquals(LargeHtmlSize, fullRead.limit());
+ String fullText = new String(fullRead.array(), 0, fullRead.limit(), StandardCharsets.UTF_8);
assertTrue(fullText.startsWith(firstText));
+ assertEquals(LargeHtmlSize, fullText.length());
}
}
@@ -255,8 +262,7 @@ public void noLimitAfterFirstRead() throws IOException {
.bodyStream()) {
ByteBuffer cappedRead = DataUtil.readToByteBuffer(stream, 0);
- byte[] cappedArray = cappedRead.array();
- assertEquals(cap, cappedArray.length);
+ assertEquals(cap, cappedRead.limit());
}
}
}
diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java
index 688713dc0a..4c1f7d378a 100644
--- a/src/test/java/org/jsoup/integration/ConnectTest.java
+++ b/src/test/java/org/jsoup/integration/ConnectTest.java
@@ -33,6 +33,7 @@
import java.net.URL;
import java.net.URLDecoder;
import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -603,13 +604,22 @@ public void testBinaryContentTypeThrowsException() throws IOException {
@Test
public void canFetchBinaryAsBytes() throws IOException {
- Connection.Response res = Jsoup.connect(FileServlet.urlTo("/htmltests/thumb.jpg"))
+ String path = "/htmltests/thumb.jpg";
+ int actualSize = 1052;
+
+ Connection.Response res = Jsoup.connect(FileServlet.urlTo(path))
.data(FileServlet.ContentTypeParam, "image/jpeg")
.ignoreContentType(true)
.execute();
- byte[] bytes = res.bodyAsBytes();
- assertEquals(1052, bytes.length);
+ byte[] resBytes = res.bodyAsBytes();
+ assertEquals(actualSize, resBytes.length);
+
+ // compare the content of the file and the bytes:
+ Path filePath = ParseTest.getPath(path);
+ byte[] fileBytes = Files.readAllBytes(filePath);
+ assertEquals(actualSize, fileBytes.length);
+ assertArrayEquals(fileBytes, resBytes);
}
@Test
@@ -996,8 +1006,14 @@ void progressListener(String path) throws IOException {
// should expect to see events relative to how large the buffer is.
int expected = LargeDocFileLen / 8192;
- assertTrue(numProgress.get() > expected * 0.75);
- assertTrue(numProgress.get() < expected * 1.25);
+
+ int num = numProgress.get();
+ // debug log if not in those ranges:
+ if (num < expected * 0.75 || num > expected * 1.25) {
+ System.err.println("Expected: " + expected + ", got: " + num);
+ }
+ assertTrue(num > expected * 0.75);
+ assertTrue(num < expected * 1.25);
// check the document works
assertEquals(LargeDocTextLen, document.text().length());
diff --git a/src/test/java/org/jsoup/integration/ParseTest.java b/src/test/java/org/jsoup/integration/ParseTest.java
index d84c103497..d963d1d621 100644
--- a/src/test/java/org/jsoup/integration/ParseTest.java
+++ b/src/test/java/org/jsoup/integration/ParseTest.java
@@ -153,7 +153,8 @@ public static String getFileAsString(File file) throws IOException {
if (file.getName().endsWith(".gz")) {
InputStream stream = new GZIPInputStream(new FileInputStream(file));
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(stream, 0);
- bytes = byteBuffer.array();
+ bytes = new byte[byteBuffer.limit()];
+ System.arraycopy(byteBuffer.array(), 0, bytes, 0, byteBuffer.limit());
} else {
bytes = Files.readAllBytes(file.toPath());
}
diff --git a/src/test/java/org/jsoup/integration/servlets/EchoServlet.java b/src/test/java/org/jsoup/integration/servlets/EchoServlet.java
index 6ff31a2a0d..76ef7ff5d0 100644
--- a/src/test/java/org/jsoup/integration/servlets/EchoServlet.java
+++ b/src/test/java/org/jsoup/integration/servlets/EchoServlet.java
@@ -90,7 +90,7 @@ protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOEx
// post body
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(req.getInputStream(), 0);
- String postData = new String(byteBuffer.array(), StandardCharsets.UTF_8);
+ String postData = new String(byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit(), StandardCharsets.UTF_8);
if (!StringUtil.isBlank(postData)) {
write(w, "Post Data", postData);
}
diff --git a/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java b/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java
index 26b2fef9f5..4158e1e454 100644
--- a/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java
+++ b/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java
@@ -27,8 +27,8 @@ protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOEx
StringBuilder sb = new StringBuilder();
sb.append("Something");
- while (sb.length() <= CharacterReaderTest.maxBufferLen) {
- sb.append("A suitable amount of data. \n");
+ while (sb.length() <= 32 * 1024) {
+ sb.append("A suitable amount of data.
\n");
}
sb.append("Finale.
");
String data = sb.toString();
diff --git a/src/test/java/org/jsoup/internal/SoftPoolTest.java b/src/test/java/org/jsoup/internal/SoftPoolTest.java
new file mode 100644
index 0000000000..bca2c199a4
--- /dev/null
+++ b/src/test/java/org/jsoup/internal/SoftPoolTest.java
@@ -0,0 +1,139 @@
+package org.jsoup.internal;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class SoftPoolTest {
+
+ private static final int BufSize = 12;
+ private static final int NumThreads = 5;
+ private static final int NumObjects = 3;
+
+ @Test
+ public void testSoftLocalPool() throws InterruptedException {
+ SoftPool softLocalPool = new SoftPool<>(() -> new char[BufSize]);
+
+ ExecutorService executorService = Executors.newFixedThreadPool(NumThreads);
+ CountDownLatch latch = new CountDownLatch(NumThreads);
+
+ Set allBuffers = new HashSet<>();
+ Set[] threadLocalBuffers = new Set[NumThreads];
+
+ for (int i = 0; i < NumThreads; i++) {
+ threadLocalBuffers[i] = new HashSet<>();
+ }
+
+ AtomicInteger threadCount = new AtomicInteger();
+
+ Runnable task = () -> {
+ try {
+ int threadIndex = threadCount.getAndIncrement();
+ Set localBuffers = new HashSet<>();
+ // First borrow
+ for (int i = 0; i < NumObjects; i++) {
+ char[] buffer = softLocalPool.borrow();
+ assertEquals(BufSize, buffer.length);
+ localBuffers.add(buffer);
+ }
+
+ // Release buffers back to the pool
+ for (char[] buffer : localBuffers) {
+ softLocalPool.release(buffer);
+ }
+
+ // Borrow again and ensure buffers are reused
+ for (int i = 0; i < NumObjects; i++) {
+ char[] buffer = softLocalPool.borrow();
+ assertTrue(localBuffers.contains(buffer), "Buffer was not reused in the same thread");
+ threadLocalBuffers[threadIndex].add(buffer);
+ }
+
+ synchronized (allBuffers) {
+ allBuffers.addAll(threadLocalBuffers[threadIndex]);
+ }
+ } finally {
+ latch.countDown();
+ }
+ };
+
+ // Run the tasks
+ for (int i = 0; i < NumThreads; i++) {
+ executorService.submit(task::run);
+ }
+
+ // Wait for all threads to complete
+ latch.await();
+ executorService.shutdown();
+
+ // Ensure no buffers are shared between threads
+ Set uniqueBuffers = new HashSet<>();
+ for (Set bufferSet : threadLocalBuffers) {
+ for (char[] buffer : bufferSet) {
+ assertTrue(uniqueBuffers.add(buffer), "Buffer was shared between threads");
+ }
+ }
+ }
+
+ @Test
+ public void testSoftReferenceBehavior() {
+ SoftPool softLocalPool = new SoftPool<>(() -> new char[BufSize]);
+
+ // Borrow and release an object
+ char[] buffer = softLocalPool.borrow();
+ assertEquals(BufSize, buffer.length);
+ softLocalPool.release(buffer);
+
+ // Fake a GC
+ softLocalPool.threadLocalStack.get().clear();
+
+ // Ensure the object is garbage collected
+ assertNull(softLocalPool.threadLocalStack.get().get());
+
+ char[] second = softLocalPool.borrow();
+ // should be different, but same size
+ assertNotEquals(buffer, second);
+ assertEquals(BufSize, second.length);
+ }
+
+ @Test
+ public void testBorrowFromEmptyPool() {
+ SoftPool softLocalPool = new SoftPool<>(() -> new char[BufSize]);
+
+ // Borrow from an empty pool
+ char[] buffer = softLocalPool.borrow();
+ assertNotNull(buffer, "Borrowed null from an empty pool");
+ assertEquals(BufSize, buffer.length);
+ }
+
+ @Test
+ public void testReleaseMoreThanMaxIdle() {
+ SoftPool softLocalPool = new SoftPool<>(() -> new char[BufSize]);
+
+ // Borrow more than MaxIdle objects
+ List borrowedBuffers = new ArrayList<>();
+ for (int i = 0; i < SoftPool.MaxIdle + 5; i++) {
+ char[] buffer = softLocalPool.borrow();
+ borrowedBuffers.add(buffer);
+ }
+
+ // Release all borrowed objects back to the pool
+ for (char[] buffer : borrowedBuffers) {
+ softLocalPool.release(buffer);
+ }
+
+ // Ensure the pool size does not exceed MaxIdle
+ Stack stack = softLocalPool.getStack();
+ assertTrue(stack.size() <= SoftPool.MaxIdle, "Pool size exceeded MaxIdle limit");
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/org/jsoup/parser/CharacterReaderTest.java b/src/test/java/org/jsoup/parser/CharacterReaderTest.java
index 7071bfe51d..8f0ee30ca2 100644
--- a/src/test/java/org/jsoup/parser/CharacterReaderTest.java
+++ b/src/test/java/org/jsoup/parser/CharacterReaderTest.java
@@ -1,6 +1,7 @@
package org.jsoup.parser;
import org.jsoup.integration.ParseTest;
+import org.jsoup.internal.StringUtil;
import org.junit.jupiter.api.Test;
import java.io.BufferedReader;
@@ -16,7 +17,7 @@
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class CharacterReaderTest {
- public final static int maxBufferLen = CharacterReader.maxBufferLen;
+ public final static int maxBufferLen = CharacterReader.BufferSize;
@Test public void consume() {
CharacterReader r = new CharacterReader("one");
@@ -359,24 +360,23 @@ public void consumeToNonexistentEndWhenAtAnd() {
@Test
public void notEmptyAtBufferSplitPoint() {
- CharacterReader r = new CharacterReader(new StringReader("How about now"), 3);
- assertEquals("How", r.consumeTo(' '));
- assertFalse(r.isEmpty(), "Should not be empty");
-
- assertEquals(' ', r.consume());
- assertFalse(r.isEmpty());
- assertEquals(4, r.pos());
- assertEquals('a', r.consume());
- assertEquals(5, r.pos());
- assertEquals('b', r.consume());
- assertEquals('o', r.consume());
- assertEquals('u', r.consume());
- assertEquals('t', r.consume());
- assertEquals(' ', r.consume());
- assertEquals('n', r.consume());
- assertEquals('o', r.consume());
- assertEquals('w', r.consume());
+ int len = CharacterReader.BufferSize * 12;
+ StringBuilder builder = StringUtil.borrowBuilder();
+ while (builder.length() <= len) builder.append('!');
+ CharacterReader r = new CharacterReader(builder.toString());
+ StringUtil.releaseBuilder(builder);
+
+ // consume through
+ for (int pos = 0; pos < len; pos ++) {
+ assertEquals(pos, r.pos());
+ assertFalse(r.isEmpty());
+ assertEquals('!', r.consume());
+ assertEquals(pos + 1, r.pos());
+ assertFalse(r.isEmpty());
+ }
+ assertEquals('!', r.consume());
assertTrue(r.isEmpty());
+ assertEquals(CharacterReader.EOF, r.consume());
}
@Test public void bufferUp() {
@@ -437,10 +437,10 @@ public void notEmptyAtBufferSplitPoint() {
// get over the buffer
while (!noTrack.matches("[foo]"))
noTrack.consumeTo("[foo]");
- assertEquals(32778, noTrack.pos());
+ assertEquals(2090, noTrack.pos());
assertEquals(1, noTrack.lineNumber());
assertEquals(noTrack.pos()+1, noTrack.columnNumber());
- assertEquals("1:32779", noTrack.posLineCol());
+ assertEquals("1:2091", noTrack.posLineCol());
// and the line numbers: "\n\n\n"
assertEquals(0, track.pos());
@@ -468,12 +468,12 @@ public void notEmptyAtBufferSplitPoint() {
// get over the buffer
while (!track.matches("[foo]"))
track.consumeTo("[foo]");
- assertEquals(32778, track.pos());
+ assertEquals(2090, track.pos());
assertEquals(4, track.lineNumber());
- assertEquals(32761, track.columnNumber());
- assertEquals("4:32761", track.posLineCol());
+ assertEquals(2073, track.columnNumber());
+ assertEquals("4:2073", track.posLineCol());
track.consumeTo('\n');
- assertEquals("4:32766", track.posLineCol());
+ assertEquals("4:2078", track.posLineCol());
track.consumeTo("[bar]");
assertEquals(5, track.lineNumber());
@@ -491,9 +491,11 @@ public void notEmptyAtBufferSplitPoint() {
reader.trackNewlines(true);
assertEquals("1:1", reader.posLineCol());
+ StringBuilder seen = new StringBuilder();
while (!reader.isEmpty())
- reader.consume();
- assertEquals(131096, reader.pos());
+ seen.append(reader.consume());
+ assertEquals(content, seen.toString());
+ assertEquals(content.length(), reader.pos());
assertEquals(reader.pos() + 1, reader.columnNumber());
assertEquals(1, reader.lineNumber());
}
diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java
index f911fe161b..7fa7a67a59 100644
--- a/src/test/java/org/jsoup/parser/HtmlParserTest.java
+++ b/src/test/java/org/jsoup/parser/HtmlParserTest.java
@@ -368,7 +368,7 @@ private static Stream dupeAttributeData() {
@Test public void handlesCdataAcrossBuffer() {
StringBuilder sb = new StringBuilder();
- while (sb.length() <= CharacterReader.maxBufferLen) {
+ while (sb.length() <= CharacterReader.BufferSize) {
sb.append("A suitable amount of CData.\n");
}
String cdata = sb.toString();
diff --git a/src/test/java/org/jsoup/parser/TokeniserStateTest.java b/src/test/java/org/jsoup/parser/TokeniserStateTest.java
index d34c38fcea..6d9b5f7a77 100644
--- a/src/test/java/org/jsoup/parser/TokeniserStateTest.java
+++ b/src/test/java/org/jsoup/parser/TokeniserStateTest.java
@@ -208,21 +208,21 @@ public void testPublicAndSystemIdentifiersWithWhitespace() {
@Test
public void testUnconsumeAtBufferBoundary() {
String triggeringSnippet = "One" + tag + ">";
@@ -60,10 +60,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void handleSuperLargeAttributeName() {
- StringBuilder sb = new StringBuilder(maxBufferLen);
+ StringBuilder sb = new StringBuilder(BufferSize);
do {
sb.append("LargAttributeName");
- } while (sb.length() < maxBufferLen);
+ } while (sb.length() < BufferSize);
String attrName = sb.toString();
String html = "One
";
@@ -79,10 +79,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void handleLargeText() {
- StringBuilder sb = new StringBuilder(maxBufferLen);
+ StringBuilder sb = new StringBuilder(BufferSize);
do {
sb.append("A Large Amount of Text");
- } while (sb.length() < maxBufferLen);
+ } while (sb.length() < BufferSize);
String text = sb.toString();
String html = "" + text + "
";
@@ -96,10 +96,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void handleLargeComment() {
- StringBuilder sb = new StringBuilder(maxBufferLen);
+ StringBuilder sb = new StringBuilder(BufferSize);
do {
sb.append("Quite a comment ");
- } while (sb.length() < maxBufferLen);
+ } while (sb.length() < BufferSize);
String comment = sb.toString();
String html = "";
@@ -114,10 +114,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void handleLargeCdata() {
- StringBuilder sb = new StringBuilder(maxBufferLen);
+ StringBuilder sb = new StringBuilder(BufferSize);
do {
sb.append("Quite a lot of CDATA <><><><>");
- } while (sb.length() < maxBufferLen);
+ } while (sb.length() < BufferSize);
String cdata = sb.toString();
String html = "";
@@ -133,10 +133,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void handleLargeTitle() {
- StringBuilder sb = new StringBuilder(maxBufferLen);
+ StringBuilder sb = new StringBuilder(BufferSize);
do {
sb.append("Quite a long title");
- } while (sb.length() < maxBufferLen);
+ } while (sb.length() < BufferSize);
String title = sb.toString();
String html = "" + title + "";
@@ -178,10 +178,10 @@ public void bufferUpInAttributeVal() {
}
@Test public void canParseVeryLongBogusComment() {
- StringBuilder commentData = new StringBuilder(maxBufferLen);
+ StringBuilder commentData = new StringBuilder(BufferSize);
do {
commentData.append("blah blah blah blah ");
- } while (commentData.length() < maxBufferLen);
+ } while (commentData.length() < BufferSize);
String expectedCommentData = commentData.toString();
String testMarkup = "";
Parser parser = new Parser(new HtmlTreeBuilder());
@@ -196,7 +196,7 @@ public void bufferUpInAttributeVal() {
@Test public void canParseCdataEndingAtEdgeOfBuffer() {
String cdataStart = "";
- int bufLen = maxBufferLen - cdataStart.length() - 1; // also breaks with -2, but not with -3 or 0
+ int bufLen = BufferSize - cdataStart.length() - 1; // also breaks with -2, but not with -3 or 0
char[] cdataContentsArray = new char[bufLen];
Arrays.fill(cdataContentsArray, 'x');
String cdataContents = new String(cdataContentsArray);