You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

218 lines
5.2 KiB

https://bugs.gentoo.org/917618
https://bugs.documentfoundation.org/show_bug.cgi?id=158108
From bcd5d851ebe91fc22edd3ea92be4a674bd13acba Mon Sep 17 00:00:00 2001
From: Alfred Wingate <parona@protonmail.com>
Date: Mon, 20 Nov 2023 14:47:28 +0200
Subject: [PATCH] Remove use of the now removed LBCMNoChain options
* This change removes its use and explicitly prevents chaining where
the rule would have applied.
https://github.com/unicode-org/icu/commit/84e47620692be90950d090f2f4722494b020ad96
https://github.com/unicode-org/icu/commit/9d9256f3b792100cda697c7bcf52bacfbc3bca87
Signed-off-by: Alfred Wingate <parona@protonmail.com>
--- a/i18npool/source/breakiterator/data/line.txt
+++ b/i18npool/source/breakiterator/data/line.txt
@@ -14,7 +14,6 @@
#
!!chain;
-!!LBCMNoChain;
!!lookAheadHardBreak;
@@ -206,13 +205,13 @@ $CR $LF {100};
#
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks.
$CAN_CM $CM* $LB4Breaks {100};
-$CM+ $LB4Breaks {100};
+^$CM+ $LB4Breaks {100};
# LB 7 x SP
# x ZW
$LB4NonBreaks [$SP $ZW];
$CAN_CM $CM* [$SP $ZW];
-$CM+ [$SP $ZW];
+^$CM+ [$SP $ZW];
#
# LB 8 Break after zero width space
@@ -226,14 +225,14 @@ $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# See definition of $CAN_CM.
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules.
-$CM+;
+^$CM+;
#
# LB 11 Do not break before or after WORD JOINER & related characters.
#
$CAN_CM $CM* $WJcm;
$LB8NonBreaks $WJcm;
-$CM+ $WJcm;
+^$CM+ $WJcm;
$WJcm [^$CAN_CM];
$WJcm $CAN_CM $CM*;
@@ -243,7 +242,7 @@ $WJcm $CAN_CM $CM*;
#
# (!SP) x GL
[$LB8NonBreaks-$SP] $CM* $GLcm;
-$CM+ $GLcm;
+^$CM+ $GLcm;
# GL x
$GLcm ($LB8Breaks | $SP);
@@ -260,19 +259,19 @@ $GLcm [$LB8NonBreaks-$SP] $CM*; # Don't let a combining mark go onto $CR, $B
#
$LB8NonBreaks $CL;
$CAN_CM $CM* $CL;
-$CM+ $CL; # by rule 10, stand-alone CM behaves as AL
+^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $EX;
$CAN_CM $CM* $EX;
-$CM+ $EX; # by rule 10, stand-alone CM behaves as AL
+^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $IS;
$CAN_CM $CM* $IS;
-$CM+ $IS; # by rule 10, stand-alone CM behaves as AL
+^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $SY;
$CAN_CM $CM* $SY;
-$CM+ $SY; # by rule 10, stand-alone CM behaves as AL
+^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL
#
@@ -302,7 +301,7 @@ $LB18Breaks = [$LB8Breaks $SP];
# LB 19
# x QU
$LB18NonBreaks $CM* $QUcm;
-$CM+ $QUcm;
+^$CM+ $QUcm;
# QU x
$QUcm .?;
@@ -331,7 +330,7 @@ $HLcm ($HYcm | $BAcm) [^$CB]?;
# LB 22
($ALcm | $HLcm) $INcm;
-$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
+^$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
$IDcm $INcm;
$INcm $INcm;
$NUcm $INcm;
@@ -341,7 +340,7 @@ $NUcm $INcm;
$IDcm $POcm;
$ALcm $NUcm; # includes $LB19
$HLcm $NUcm;
-$CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL
+^$CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL
$NUcm $ALcm;
$NUcm $HLcm;
@@ -373,7 +372,7 @@ $PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);
# LB 28 Do not break between alphabetics
#
($ALcm | $HLcm) ($ALcm | $HLcm);
-$CM+ ($ALcm | $HLcm); # The $CM+ is from rule 10, an unattached CM is treated as AL
+^$CM+ ($ALcm | $HLcm); # The $CM+ is from rule 10, an unattached CM is treated as AL
# LB 29
$IScm ($ALcm | $NUcm);
@@ -383,7 +382,7 @@ $IScm ($ALcm | $NUcm);
# and opening or closing punctuation
#
($ALcm | $HLcm | $NUcm) $OPcm;
-$CM+ $OPcm;
+^$CM+ $OPcm;
$CLcm ($ALcm | $HLcm | $NUcm);
#
@@ -393,32 +392,32 @@ $CLcm ($ALcm | $HLcm | $NUcm);
!!reverse;
-$CM+ $ALPlus;
-$CM+ $BA;
-$CM+ $BB;
-$CM+ $B2;
-$CM+ $CL;
-$CM+ $EX;
-$CM+ $GL;
-$CM+ $HL;
-$CM+ $HY;
-$CM+ $H2;
-$CM+ $H3;
-$CM+ $ID;
-$CM+ $IN;
-$CM+ $IS;
-$CM+ $JL;
-$CM+ $JV;
-$CM+ $JT;
-$CM+ $NS;
-$CM+ $NU;
-$CM+ $OP;
-$CM+ $PO;
-$CM+ $PR;
-$CM+ $QU;
-$CM+ $SY;
-$CM+ $WJ;
-$CM+;
+^$CM+ $ALPlus;
+^$CM+ $BA;
+^$CM+ $BB;
+^$CM+ $B2;
+^$CM+ $CL;
+^$CM+ $EX;
+^$CM+ $GL;
+^$CM+ $HL;
+^$CM+ $HY;
+^$CM+ $H2;
+^$CM+ $H3;
+^$CM+ $ID;
+^$CM+ $IN;
+^$CM+ $IS;
+^$CM+ $JL;
+^$CM+ $JV;
+^$CM+ $JT;
+^$CM+ $NS;
+^$CM+ $NU;
+^$CM+ $OP;
+^$CM+ $PO;
+^$CM+ $PR;
+^$CM+ $QU;
+^$CM+ $SY;
+^$CM+ $WJ;
+^$CM+;
#
@@ -468,7 +467,7 @@ $LF $CR;
# X $CM needs to behave like X, where X is not $SP or controls.
# $CM not covered by the above needs to behave like $AL
# Stick together any combining sequences that don't match other rules.
-$CM+ $CAN_CM;
+^$CM+ $CAN_CM;
# LB 11
@@ -606,8 +605,8 @@ $CM* ($ALPlus | $HL | $NU) $CM* ($CL | $SY)+ [^$SP];
!!safe_reverse;
# LB 7
-$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-$CM+ $SP / .;
+^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
+^$CM+ $SP / .;
# LB 9
$SP+ $CM* $OP;
--
2.42.1