Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ maddy uses [semver versioning](https://semver.org/).

## Upcoming

* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Only create emphasis tags at word boundaries, i.e. `not only_internal_underscores`.
* ...

## version 1.5.0 2025-04-21
Expand Down
4 changes: 3 additions & 1 deletion include/maddy/emphasizedparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ class EmphasizedParser : public LineParser
*/
void Parse(std::string& line) override
{
// Modifed from previous version, with help from
// https://stackoverflow.com/questions/61346949/regex-for-markdown-emphasis
Copy link
Owner

@progsource progsource Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to put this comment into your commit message instead and remove it from here, because as a comment it could become outdated at some point.

static std::regex re(
R"((?!.*`.*|.*<code>.*)_(?!.*`.*|.*<\/code>.*)([^_]*)_(?!.*`.*|.*<\/code>.*))"
R"((?!.*`.*|.*<code>.*)\b_(?![\s])(?!.*`.*|.*<\/code>.*)(.*?[^\s])_\b(?!.*`.*|.*<\/code>.*))"
);
static std::string replacement = "<em>$1</em>";

Expand Down
17 changes: 17 additions & 0 deletions include/maddy/strongparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,23 @@ class StrongParser : public LineParser
*/
void Parse(std::string& line) override
{
// This version of the regex is changed exactly the same way
// that the regex for the emphasized parser was changed, and
// it then passes all the 'disabled' tests in the 'strong parser'
// test, but then it fails general parsing. For some reason,
// "__text__" translates "<i></i>text<i></i>" even though there
// are no word boundaries at the correct places. It's weird!
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The strong parser is usually handled before the emphasized one: https://github.com/progsource/maddy/blob/master/include/maddy/parser.h#L195
This is so, that double _ or * can be easier determined as strong. Therefor I guess, that is why you only get italic tags, if you do not first run it through the strong parser.


// static std::vector<std::regex> res{
// std::regex{
// R"((?!.*`.*|.*<code>.*)\b\*\*(?![\s])(?!.*`.*|.*<\/code>.*)"
// "(.*?[^\s])\*\*\b(?!.*`.*|.*<\/code>.*))"
// },
// std::regex{
// R"((?!.*`.*|.*<code>.*)\b__(?![\s])(?!.*`.*|.*<\/code>.*)"
// "(.*?[^\s])__\b(?!.*`.*|.*<\/code>.*))"
// }
// };
static std::vector<std::regex> res{
std::regex{
R"((?!.*`.*|.*<code>.*)\*\*(?!.*`.*|.*<\/code>.*)([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))"
Expand Down
113 changes: 113 additions & 0 deletions tests/maddy/test_maddy_emphasizedparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,89 @@ TEST(MADDY_EMPHASIZEDPARSER, ItReplacesMarkdownWithEmphasizedHTML)
ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresAtStringEdges)
{
std::string text = "_some text_";
std::string expected = "<em>some text</em>";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores)
{
std::string text = "some text_bla_text testing _it_ out";
std::string expected = "some text_bla_text testing <em>it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItOnlyReplacesUnderscoresAtWordBreaks)
{
std::string text = "some _text_bla_ testing _it_ out";
std::string expected = "some <em>text_bla</em> testing <em>it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresWithMultipleWords)
{
std::string text = "some _text testing it_ out";
std::string expected = "some <em>text testing it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItAllowsDoubleUnderscores)
{
// I'm not sure if this is standard or not, but this is how the github
// markdown parser behaves. Other things I've seen want it to *not*
// match.
std::string text = "some __text testing it_ out";
std::string expected = "some <em>_text testing it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code> at all";
std::string expected =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code> at all";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceUnderscoresInURLs)
{
std::string text = "[Link Title](http://example.com/what_you_didn't_know)";
std::string expected =
"[Link Title](http://example.com/what_you_didn't_know)";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode)
{
std::string text = "some text `*bla*` `/**text*/` testing _it_ out";
Expand All @@ -32,3 +115,33 @@ TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode)

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code>"
" but outside _should_.";
std::string expected =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code>"
" but outside <em>should</em>.";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideTickBlocks)
{
std::string text =
"Stuff inside `blocks _shouldn't be emphasized_ `"
" but outside _should_.";
std::string expected =
"Stuff inside `blocks _shouldn't be emphasized_ `"
" but outside <em>should</em>.";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}
115 changes: 115 additions & 0 deletions tests/maddy/test_maddy_strongparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,118 @@ TEST(MADDY_STRONGPARSER, ItDoesNotParseInsideInlineCode)
ASSERT_EQ(test.expected, test.text);
}
}

TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresAtStringEdges)
{
std::string text = "__some text__";
std::string expected = "<strong>some text</strong>";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores)
{
std::string text = "some text__bla__text testing __it__ out";
std::string expected = "some text__bla__text testing <strong>it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItOnlyReplacesUnderscoresAtWordBreaks)
{
std::string text = "some __text__bla__ testing __it__ out";
std::string expected =
"some <strong>text__bla</strong> testing <strong>it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresWithMultipleWords)
{
std::string text = "some __text testing it__ out";
std::string expected = "some <strong>text testing it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItAllowsTripleUnderscores)
{
// I'm not sure if this is standard or not, but this is how the github
// markdown parser behaves. Other things I've seen want it to *not*
// match.

std::string text = "some ___text testing it__ out";
std::string expected = "some <strong>_text testing it</strong> out";
Copy link
Owner

@progsource progsource Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the CommonMark Spec, there is this example:

markdown	"___foo__\n"
html	"<p>_<strong>foo</strong></p>\n"
example	456

Also GitHub spec: https://github.github.com/gfm/#example-465

So I guess, the _ in the expected string has to come before the strong tag.

auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks __shouldn't be strong__ </code> at all";
std::string expected =
"Stuff inside <code> blocks __shouldn't be strong__ </code> at all";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceUnderscoresInURLs)
{
std::string text = "[Link Title](http://example.com/what__you__didn't__know)";
std::string expected =
"[Link Title](http://example.com/what__you__didn't__know)";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItParsesOutsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks __shouldn't be strong__ </code>"
" but outside __should__.";
std::string expected =
"Stuff inside <code> blocks __shouldn't be strong__ </code>"
" but outside <strong>should</strong>.";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItParsesOutsideTickBlocks)
{
std::string text =
"Stuff inside `blocks __shouldn't be strong__ `"
" but outside __should__.";
std::string expected =
"Stuff inside `blocks __shouldn't be strong__ `"
" but outside <strong>should</strong>.";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}