You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
5.2 KiB
218 lines
5.2 KiB
https://bugs.gentoo.org/917618 |
|
https://bugs.documentfoundation.org/show_bug.cgi?id=158108 |
|
|
|
From bcd5d851ebe91fc22edd3ea92be4a674bd13acba Mon Sep 17 00:00:00 2001 |
|
From: Alfred Wingate <parona@protonmail.com> |
|
Date: Mon, 20 Nov 2023 14:47:28 +0200 |
|
Subject: [PATCH] Remove use of the now removed LBCMNoChain options |
|
|
|
* This change removes its use and explicitly prevents chaining where |
|
the rule would have applied. |
|
|
|
https://github.com/unicode-org/icu/commit/84e47620692be90950d090f2f4722494b020ad96 |
|
https://github.com/unicode-org/icu/commit/9d9256f3b792100cda697c7bcf52bacfbc3bca87 |
|
|
|
Signed-off-by: Alfred Wingate <parona@protonmail.com> |
|
--- a/i18npool/source/breakiterator/data/line.txt |
|
+++ b/i18npool/source/breakiterator/data/line.txt |
|
@@ -14,7 +14,6 @@ |
|
# |
|
|
|
!!chain; |
|
-!!LBCMNoChain; |
|
|
|
|
|
!!lookAheadHardBreak; |
|
@@ -206,13 +205,13 @@ $CR $LF {100}; |
|
# |
|
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks. |
|
$CAN_CM $CM* $LB4Breaks {100}; |
|
-$CM+ $LB4Breaks {100}; |
|
+^$CM+ $LB4Breaks {100}; |
|
|
|
# LB 7 x SP |
|
# x ZW |
|
$LB4NonBreaks [$SP $ZW]; |
|
$CAN_CM $CM* [$SP $ZW]; |
|
-$CM+ [$SP $ZW]; |
|
+^$CM+ [$SP $ZW]; |
|
|
|
# |
|
# LB 8 Break after zero width space |
|
@@ -226,14 +225,14 @@ $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]]; |
|
# See definition of $CAN_CM. |
|
|
|
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules. |
|
-$CM+; |
|
+^$CM+; |
|
|
|
# |
|
# LB 11 Do not break before or after WORD JOINER & related characters. |
|
# |
|
$CAN_CM $CM* $WJcm; |
|
$LB8NonBreaks $WJcm; |
|
-$CM+ $WJcm; |
|
+^$CM+ $WJcm; |
|
|
|
$WJcm [^$CAN_CM]; |
|
$WJcm $CAN_CM $CM*; |
|
@@ -243,7 +242,7 @@ $WJcm $CAN_CM $CM*; |
|
# |
|
# (!SP) x GL |
|
[$LB8NonBreaks-$SP] $CM* $GLcm; |
|
-$CM+ $GLcm; |
|
+^$CM+ $GLcm; |
|
|
|
# GL x |
|
$GLcm ($LB8Breaks | $SP); |
|
@@ -260,19 +259,19 @@ $GLcm [$LB8NonBreaks-$SP] $CM*; # Don't let a combining mark go onto $CR, $B |
|
# |
|
$LB8NonBreaks $CL; |
|
$CAN_CM $CM* $CL; |
|
-$CM+ $CL; # by rule 10, stand-alone CM behaves as AL |
|
+^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL |
|
|
|
$LB8NonBreaks $EX; |
|
$CAN_CM $CM* $EX; |
|
-$CM+ $EX; # by rule 10, stand-alone CM behaves as AL |
|
+^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL |
|
|
|
$LB8NonBreaks $IS; |
|
$CAN_CM $CM* $IS; |
|
-$CM+ $IS; # by rule 10, stand-alone CM behaves as AL |
|
+^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL |
|
|
|
$LB8NonBreaks $SY; |
|
$CAN_CM $CM* $SY; |
|
-$CM+ $SY; # by rule 10, stand-alone CM behaves as AL |
|
+^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL |
|
|
|
|
|
# |
|
@@ -302,7 +301,7 @@ $LB18Breaks = [$LB8Breaks $SP]; |
|
# LB 19 |
|
# x QU |
|
$LB18NonBreaks $CM* $QUcm; |
|
-$CM+ $QUcm; |
|
+^$CM+ $QUcm; |
|
|
|
# QU x |
|
$QUcm .?; |
|
@@ -331,7 +330,7 @@ $HLcm ($HYcm | $BAcm) [^$CB]?; |
|
|
|
# LB 22 |
|
($ALcm | $HLcm) $INcm; |
|
-$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL |
|
+^$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL |
|
$IDcm $INcm; |
|
$INcm $INcm; |
|
$NUcm $INcm; |
|
@@ -341,7 +340,7 @@ $NUcm $INcm; |
|
$IDcm $POcm; |
|
$ALcm $NUcm; # includes $LB19 |
|
$HLcm $NUcm; |
|
-$CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL |
|
+^$CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL |
|
$NUcm $ALcm; |
|
$NUcm $HLcm; |
|
|
|
@@ -373,7 +372,7 @@ $PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm); |
|
# LB 28 Do not break between alphabetics |
|
# |
|
($ALcm | $HLcm) ($ALcm | $HLcm); |
|
-$CM+ ($ALcm | $HLcm); # The $CM+ is from rule 10, an unattached CM is treated as AL |
|
+^$CM+ ($ALcm | $HLcm); # The $CM+ is from rule 10, an unattached CM is treated as AL |
|
|
|
# LB 29 |
|
$IScm ($ALcm | $NUcm); |
|
@@ -383,7 +382,7 @@ $IScm ($ALcm | $NUcm); |
|
# and opening or closing punctuation |
|
# |
|
($ALcm | $HLcm | $NUcm) $OPcm; |
|
-$CM+ $OPcm; |
|
+^$CM+ $OPcm; |
|
$CLcm ($ALcm | $HLcm | $NUcm); |
|
|
|
# |
|
@@ -393,32 +392,32 @@ $CLcm ($ALcm | $HLcm | $NUcm); |
|
|
|
!!reverse; |
|
|
|
-$CM+ $ALPlus; |
|
-$CM+ $BA; |
|
-$CM+ $BB; |
|
-$CM+ $B2; |
|
-$CM+ $CL; |
|
-$CM+ $EX; |
|
-$CM+ $GL; |
|
-$CM+ $HL; |
|
-$CM+ $HY; |
|
-$CM+ $H2; |
|
-$CM+ $H3; |
|
-$CM+ $ID; |
|
-$CM+ $IN; |
|
-$CM+ $IS; |
|
-$CM+ $JL; |
|
-$CM+ $JV; |
|
-$CM+ $JT; |
|
-$CM+ $NS; |
|
-$CM+ $NU; |
|
-$CM+ $OP; |
|
-$CM+ $PO; |
|
-$CM+ $PR; |
|
-$CM+ $QU; |
|
-$CM+ $SY; |
|
-$CM+ $WJ; |
|
-$CM+; |
|
+^$CM+ $ALPlus; |
|
+^$CM+ $BA; |
|
+^$CM+ $BB; |
|
+^$CM+ $B2; |
|
+^$CM+ $CL; |
|
+^$CM+ $EX; |
|
+^$CM+ $GL; |
|
+^$CM+ $HL; |
|
+^$CM+ $HY; |
|
+^$CM+ $H2; |
|
+^$CM+ $H3; |
|
+^$CM+ $ID; |
|
+^$CM+ $IN; |
|
+^$CM+ $IS; |
|
+^$CM+ $JL; |
|
+^$CM+ $JV; |
|
+^$CM+ $JT; |
|
+^$CM+ $NS; |
|
+^$CM+ $NU; |
|
+^$CM+ $OP; |
|
+^$CM+ $PO; |
|
+^$CM+ $PR; |
|
+^$CM+ $QU; |
|
+^$CM+ $SY; |
|
+^$CM+ $WJ; |
|
+^$CM+; |
|
|
|
|
|
# |
|
@@ -468,7 +467,7 @@ $LF $CR; |
|
# X $CM needs to behave like X, where X is not $SP or controls. |
|
# $CM not covered by the above needs to behave like $AL |
|
# Stick together any combining sequences that don't match other rules. |
|
-$CM+ $CAN_CM; |
|
+^$CM+ $CAN_CM; |
|
|
|
|
|
# LB 11 |
|
@@ -606,8 +605,8 @@ $CM* ($ALPlus | $HL | $NU) $CM* ($CL | $SY)+ [^$SP]; |
|
!!safe_reverse; |
|
|
|
# LB 7 |
|
-$CM+ [^$CM $BK $CR $LF $NL $ZW $SP]; |
|
-$CM+ $SP / .; |
|
+^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP]; |
|
+^$CM+ $SP / .; |
|
|
|
# LB 9 |
|
$SP+ $CM* $OP; |
|
-- |
|
2.42.1 |
|
|
|
|