From d4a6811efff8aecbbdd0e8e7cfbb57014231f201 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 21 Jan 2023 13:50:36 +0100 Subject: [PATCH] Fix incorrect check in cs_8559_5 in map_from_unicode() The condition `code == 0x0450 || code == 0x045D` is always false because of an incorrect range check on code. According to the BMP coverage in the encoding spec for ISO-8859-5 (https://encoding.spec.whatwg.org/iso-8859-5-bmp.html) the range of valid characters is 0x0401 - 0x045F (except for 0x040D, 0x0450, 0x045D). The current check has an upper bound of 0x044F instead of 0x045F. Fix this by changing the upper bound. --- ext/standard/html.c | 2 +- .../strings/html_entity_decode_iso8859-5.phpt | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ext/standard/html.c b/ext/standard/html.c index b93ce95df1900..14ccd71a2368c 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -477,7 +477,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u *res = 0xF0; /* numero sign */ } else if (code == 0xA7) { *res = 0xFD; /* section sign */ - } else if (code >= 0x0401 && code <= 0x044F) { + } else if (code >= 0x0401 && code <= 0x045F) { if (code == 0x040D || code == 0x0450 || code == 0x045D) return FAILURE; *res = code - 0x360; diff --git a/ext/standard/tests/strings/html_entity_decode_iso8859-5.phpt b/ext/standard/tests/strings/html_entity_decode_iso8859-5.phpt index 46e6dc4dfe3c8..0616827c54853 100644 --- a/ext/standard/tests/strings/html_entity_decode_iso8859-5.phpt +++ b/ext/standard/tests/strings/html_entity_decode_iso8859-5.phpt @@ -358,47 +358,47 @@ CYRILLIC SMALL LETTER YA: я => ef NUMERO SIGN: № => f0 ð => ð -CYRILLIC SMALL LETTER IO: ё => 2623783435313b +CYRILLIC SMALL LETTER IO: ё => f1 ñ => ñ -CYRILLIC SMALL LETTER DJE: ђ => 2623783435323b +CYRILLIC SMALL LETTER DJE: ђ => f2 ò => ò -CYRILLIC SMALL LETTER GJE: ѓ => 2623783435333b +CYRILLIC SMALL LETTER GJE: ѓ => f3 ó => ó -CYRILLIC SMALL LETTER UKRAINIAN IE: є => 2623783435343b +CYRILLIC SMALL LETTER UKRAINIAN IE: є => f4 ô => ô -CYRILLIC SMALL LETTER DZE: ѕ => 2623783435353b +CYRILLIC SMALL LETTER DZE: ѕ => f5 õ => õ -CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I: і => 2623783435363b +CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I: і => f6 ö => ö -CYRILLIC SMALL LETTER YI: ї => 2623783435373b +CYRILLIC SMALL LETTER YI: ї => f7 ÷ => ÷ -CYRILLIC SMALL LETTER JE: ј => 2623783435383b +CYRILLIC SMALL LETTER JE: ј => f8 ø => ø -CYRILLIC SMALL LETTER LJE: љ => 2623783435393b +CYRILLIC SMALL LETTER LJE: љ => f9 ù => ù -CYRILLIC SMALL LETTER NJE: њ => 2623783435413b +CYRILLIC SMALL LETTER NJE: њ => fa ú => ú -CYRILLIC SMALL LETTER TSHE: ћ => 2623783435423b +CYRILLIC SMALL LETTER TSHE: ћ => fb û => û -CYRILLIC SMALL LETTER KJE: ќ => 2623783435433b +CYRILLIC SMALL LETTER KJE: ќ => fc ü => ü SECTION SIGN: § => fd ý => ý -CYRILLIC SMALL LETTER SHORT U: ў => 2623783435453b +CYRILLIC SMALL LETTER SHORT U: ў => fe þ => þ -CYRILLIC SMALL LETTER DZHE: џ => 2623783435463b +CYRILLIC SMALL LETTER DZHE: џ => ff ÿ => ÿ