From 88e1d083f508c947434e3a4cd83b9306009e95d2 Mon Sep 17 00:00:00 2001 From: Chris Keller <67823070+chr15k@users.noreply.github.com> Date: Sat, 11 Jan 2025 23:10:00 +0000 Subject: [PATCH] fix: update some preg_replace calls in MarkdownRemover and add tests (#57) --- src/TextProcessor/MarkdownRemover.php | 20 ++++++++------------ tests/TextProcessor/MarkdownRemoverTest.php | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/TextProcessor/MarkdownRemover.php b/src/TextProcessor/MarkdownRemover.php index 0d850e5..caef268 100644 --- a/src/TextProcessor/MarkdownRemover.php +++ b/src/TextProcessor/MarkdownRemover.php @@ -45,19 +45,15 @@ public function process(TextInterface $text): TextInterface // Remove blockquotes $output = \PhpSpellcheck\preg_replace('/^\s{0,3}>\s?/', '', $output); // Remove reference-style links? + $output = \PhpSpellcheck\preg_replace('/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/', '', $output); - /** - * Remove atx-style headers. - * - *@TODO find a way to merge the two regex below - * remove ## Heading ## - */ - $output = \PhpSpellcheck\preg_replace('/^#{1,6}\s+(.*)(\s+#{1,6})$/m', '$1', $output); - // remove ## Heading - $output = \PhpSpellcheck\preg_replace('/^#{1,6}\s+(.*)$/m', '$1', $output); - // Remove emphasis (repeat the line to remove double emphasis) - $output = \PhpSpellcheck\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output); - $output = \PhpSpellcheck\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output); + // Remove ## Heading + $output = \PhpSpellcheck\preg_replace('/^#{1,6}\s+(.*?)(?:\s+#{1,6})?$/m', '$1', $output); + // Remove all layers of emphasis + while (\PhpSpellcheck\preg_match('/([\*_]{1,3})(\S.*?\S{0,1})\1/', $output)) { + $output = \PhpSpellcheck\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output); + } + // Remove list items $output = \PhpSpellcheck\preg_replace('/^([^\S\r\n]*)\*\s/m', '$1', $output); // Remove code blocks diff --git a/tests/TextProcessor/MarkdownRemoverTest.php b/tests/TextProcessor/MarkdownRemoverTest.php index cac50fa..4b22775 100644 --- a/tests/TextProcessor/MarkdownRemoverTest.php +++ b/tests/TextProcessor/MarkdownRemoverTest.php @@ -113,6 +113,22 @@ public function testShouldRemoveDoubleEmphasis(): void $this->assertSame($expected, (new MarkdownRemover())->process(t($string))->getContent()); } + public function testShouldRemoveTripleEmphasis(): void + { + $string = 'This text is ***really important***.'; + $expected = 'This text is really important.'; + + $this->assertSame($expected, (new MarkdownRemover())->process(t($string))->getContent()); + } + + public function testShouldRemoveLongerEmphasis(): void + { + $string = 'This text is ******really important******.'; + $expected = 'This text is really important.'; + + $this->assertSame($expected, (new MarkdownRemover())->process(t($string))->getContent()); + } + public function testShouldRemoveHorizontalRules(): void { $string = "Some text on a line\n\n---\n\nA line below";