|
| 1 | +/* |
| 2 | + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. |
| 3 | + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | + * |
| 5 | + * This code is free software; you can redistribute it and/or modify it |
| 6 | + * under the terms of the GNU General Public License version 2 only, as |
| 7 | + * published by the Free Software Foundation. |
| 8 | + * |
| 9 | + * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | + * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | + * accompanied this code). |
| 14 | + * |
| 15 | + * You should have received a copy of the GNU General Public License version |
| 16 | + * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | + * |
| 19 | + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | + * or visit www.oracle.com if you need additional information or have any |
| 21 | + * questions. |
| 22 | + */ |
| 23 | + |
| 24 | +/* |
| 25 | + * @test |
| 26 | + * @bug 8358066 |
| 27 | + * @summary Test for bug in Convert.utfNumChars() |
| 28 | + * @modules jdk.compiler/com.sun.tools.javac.util |
| 29 | + * @run main TestUtfNumChars |
| 30 | + */ |
| 31 | + |
| 32 | +import com.sun.tools.javac.util.Convert; |
| 33 | + |
| 34 | +import java.util.function.IntPredicate; |
| 35 | +import java.util.stream.IntStream; |
| 36 | + |
| 37 | +public class TestUtfNumChars { |
| 38 | + |
| 39 | + public static void main(String[] args) { |
| 40 | + |
| 41 | + // This is the string "ab«cd≤ef🟢gh" |
| 42 | + String s = "ab\u00ABcd\u2264ef\ud83d\udd34gh"; |
| 43 | + |
| 44 | + // This is its modified UTF-8 encoding |
| 45 | + byte[] utf8 = Convert.string2utf(s); // UTF-8: 61 62 c2 ab 63 64 e2 89 a4 65 66 ed a0 bd ed b4 b4 67 68 |
| 46 | + // Bytes: 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 |
| 47 | + // Chars: 00 01 02 .. 03 04 05 .. .. 06 07 08 .. .. 09 .. .. 10 11 |
| 48 | + |
| 49 | + // These are the offsets in "utf8" marking the boundaries of encoded Java charcters |
| 50 | + int[] offsets = new int[] { |
| 51 | + 0, 1, 2, 4, 5, 6, 9, 10, 11, 14, 17, 18 |
| 52 | + }; |
| 53 | + IntPredicate boundary = off -> off == utf8.length || IntStream.of(offsets).anyMatch(off2 -> off2 == off); |
| 54 | + |
| 55 | + // Check Convert.utfNumChars() on every subsequence |
| 56 | + for (int i = 0; i < offsets.length; i++) { |
| 57 | + int i_off = offsets[i]; |
| 58 | + if (!boundary.test(i_off)) |
| 59 | + continue; |
| 60 | + for (int j = i; j < offsets.length; j++) { |
| 61 | + int j_off = offsets[j]; |
| 62 | + if (!boundary.test(j_off)) |
| 63 | + continue; |
| 64 | + int nchars = Convert.utfNumChars(utf8, i_off, j_off - i_off); |
| 65 | + if (nchars != j - i) |
| 66 | + throw new AssertionError(String.format("nchars %d != %d for [%d, %d)", nchars, j - i, i_off, j_off)); |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | +} |
0 commit comments