From e96e9410bde06962c211fa6f21c3d91263a90f86 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Fri, 29 Apr 2022 22:50:33 +0000 Subject: [PATCH] ICU-22005 Fix int32 overflow in FormattedStringBuilder See #2070 --- .../i18n/formatted_string_builder.cpp | 55 +++++++++++++------ .../formatted_string_builder_test.cpp | 41 ++++++++++++++ 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/i18n/formatted_string_builder.cpp b/i18n/formatted_string_builder.cpp index 734078644b8..628fbea8711 100644 --- a/i18n/formatted_string_builder.cpp +++ b/i18n/formatted_string_builder.cpp @@ -6,6 +6,7 @@ #if !UCONFIG_NO_FORMATTING #include "formatted_string_builder.h" +#include "putilimp.h" #include "unicode/ustring.h" #include "unicode/utf16.h" #include "unicode/unum.h" // for UNumberFormatFields literals @@ -197,6 +198,9 @@ FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const Unicod int32_t thisLength = endThis - startThis; int32_t otherLength = endOther - startOther; int32_t count = otherLength - thisLength; + if (U_FAILURE(status)) { + return count; + } int32_t position; if (count > 0) { // Overall, chars need to be added. @@ -221,6 +225,9 @@ int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErr int32_t FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { + if (U_FAILURE(status)) { + return 0; + } if (this == &other) { status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -255,12 +262,18 @@ int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, U U_ASSERT(index >= 0); U_ASSERT(index <= fLength); U_ASSERT(count >= 0); + U_ASSERT(fZero >= 0); + U_ASSERT(fLength >= 0); + U_ASSERT(getCapacity() - fZero >= fLength); + if (U_FAILURE(status)) { + return count; + } if (index == 0 && fZero - count >= 0) { // Append to start fZero -= count; fLength += count; return fZero; - } else if (index == fLength && fZero + fLength + count < getCapacity()) { + } else if (index == fLength && count <= getCapacity() - fZero - fLength) { // Append to end fLength += count; return fZero + fLength - count; @@ -275,18 +288,26 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co int32_t oldZero = fZero; char16_t *oldChars = getCharPtr(); Field *oldFields = getFieldPtr(); - if (fLength + count > oldCapacity) { - if ((fLength + count) > INT32_MAX / 2) { - // If we continue, then newCapacity will overflow int32_t in the next line. + int32_t newLength; + if (uprv_add32_overflow(fLength, count, &newLength)) { + status = U_INPUT_TOO_LONG_ERROR; + return -1; + } + int32_t newZero; + if (newLength > oldCapacity) { + if (newLength > INT32_MAX / 2) { + // We do not support more than 1G char16_t in this code because + // dealing with >2G *bytes* can cause subtle bugs. status = U_INPUT_TOO_LONG_ERROR; return -1; } - int32_t newCapacity = (fLength + count) * 2; - int32_t newZero = newCapacity / 2 - (fLength + count) / 2; + // Keep newCapacity also to at most 1G char16_t. + int32_t newCapacity = newLength * 2; + newZero = (newCapacity - newLength) / 2; // C++ note: malloc appears in two places: here and in the assignment operator. - auto newChars = static_cast (uprv_malloc(sizeof(char16_t) * newCapacity)); - auto newFields = static_cast(uprv_malloc(sizeof(Field) * newCapacity)); + auto newChars = static_cast (uprv_malloc(sizeof(char16_t) * static_cast(newCapacity))); + auto newFields = static_cast(uprv_malloc(sizeof(Field) * static_cast(newCapacity))); if (newChars == nullptr || newFields == nullptr) { uprv_free(newChars); uprv_free(newFields); @@ -315,10 +336,8 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co fChars.heap.capacity = newCapacity; fFields.heap.ptr = newFields; fFields.heap.capacity = newCapacity; - fZero = newZero; - fLength += count; } else { - int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; + newZero = (oldCapacity - newLength) / 2; // C++ note: memmove is required because src and dest may overlap. // First copy the entire string to the location of the prefix, and then move the suffix @@ -331,18 +350,20 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co uprv_memmove2(oldFields + newZero + index + count, oldFields + newZero + index, sizeof(Field) * (fLength - index)); - - fZero = newZero; - fLength += count; } - U_ASSERT((fZero + index) >= 0); + fZero = newZero; + fLength = newLength; return fZero + index; } int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { - // TODO: Reset the heap here? (If the string after removal can fit on stack?) + U_ASSERT(0 <= index); + U_ASSERT(index <= fLength); + U_ASSERT(count <= (fLength - index)); + U_ASSERT(index <= getCapacity() - fZero); + int32_t position = index + fZero; - U_ASSERT(position >= 0); + // TODO: Reset the heap here? (If the string after removal can fit on stack?) uprv_memmove2(getCharPtr() + position, getCharPtr() + position + count, sizeof(char16_t) * (fLength - index - count)); diff --git a/test/intltest/formatted_string_builder_test.cpp b/test/intltest/formatted_string_builder_test.cpp index 45721a320ac..57294e24856 100644 --- a/test/intltest/formatted_string_builder_test.cpp +++ b/test/intltest/formatted_string_builder_test.cpp @@ -22,6 +22,7 @@ class FormattedStringBuilderTest : public IntlTest { void testFields(); void testUnlimitedCapacity(); void testCodePoints(); + void testInsertOverflow(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0) override; @@ -50,6 +51,7 @@ void FormattedStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const TESTCASE_AUTO(testFields); TESTCASE_AUTO(testUnlimitedCapacity); TESTCASE_AUTO(testCodePoints); + TESTCASE_AUTO(testInsertOverflow); TESTCASE_AUTO_END; } @@ -308,6 +310,45 @@ void FormattedStringBuilderTest::testCodePoints() { assertEquals("Code point count is 2", 2, nsb.codePointCount()); } +void FormattedStringBuilderTest::testInsertOverflow() { + if (quick) return; + // Setup the test fixture in sb, sb2, ustr. + UErrorCode status = U_ZERO_ERROR; + FormattedStringBuilder sb; + int32_t data_length = INT32_MAX / 2; + UnicodeString ustr(data_length, u'a', data_length); + sb.append(ustr, kUndefinedField, status); + assertSuccess("Setup the first FormattedStringBuilder", status); + + FormattedStringBuilder sb2; + sb2.append(ustr, kUndefinedField, status); + sb2.insert(0, ustr, 0, data_length / 2, kUndefinedField, status); + sb2.writeTerminator(status); + assertSuccess("Setup the second FormattedStringBuilder", status); + + ustr = sb2.toUnicodeString(); + // Complete setting up the test fixture in sb, sb2 and ustr. + + // Test splice() of the second UnicodeString + sb.splice(0, 1, ustr, 1, ustr.length(), + kUndefinedField, status); + assertEquals( + "splice() long text should not crash but return U_INPUT_TOO_LONG_ERROR", + U_INPUT_TOO_LONG_ERROR, status); + + // Test sb.insert() of the first FormattedStringBuilder with the second one. + sb.insert(0, sb2, status); + assertEquals( + "insert() long FormattedStringBuilder should not crash but return " + "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status); + + // Test sb.insert() of the first FormattedStringBuilder with UnicodeString. + sb.insert(0, ustr, 0, ustr.length(), kUndefinedField, status); + assertEquals( + "insert() long UnicodeString should not crash but return " + "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status); +} + void FormattedStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b) { // TODO: Why won't this compile without the IntlTest:: qualifier? IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());