From a4304f0ed24728e7635e5a2d000d7523334eac63 Mon Sep 17 00:00:00 2001 From: Jeffrey Goldberg Date: Mon, 27 Nov 2023 14:25:09 -0600 Subject: [PATCH] stricter mastodon hostname validation --- rendercv/data_model.py | 47 ++++++++++++++++++++++++++++++++-------- tests/test_data_model.py | 12 ++++++++++ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/rendercv/data_model.py b/rendercv/data_model.py index 8412cc3..6891a4c 100644 --- a/rendercv/data_model.py +++ b/rendercv/data_model.py @@ -1056,6 +1056,31 @@ class Connection(BaseModel): ] value: str + @staticmethod + def is_valid_fqdn(hostname: str) -> bool: + """Is hostname a valid fully qualified domain name.""" + + # cribbed from + # https://stackoverflow.com/a/33214423/1304076 + # because I couldn't find a useful method in dnspython. + if hostname[-1] == ".": + # strip exactly one dot from the right, if present + hostname = hostname[:-1] + if len(hostname) > 253: + return False + + labels = hostname.split(".") + + # the TLD must be not all-numeric + if re.match(r"[0-9]+$", labels[-1]): + return False + + # labels cannot begin with a hyphen + # labels must have at least character + # labels may not have more than 63 characters + allowed = re.compile(r"(?!-)[a-z0-9-]{1,63}(? Optional[HttpUrl]: """returns profile url from a mastodon user address. @@ -1074,29 +1099,28 @@ class Connection(BaseModel): Exceptions: ValueError if the address is malformed. + Note that well-formed addresses should never yield + syntactically invalid URLs. """ # The closest thing to a formal spec of Mastodon usernames # where these regular expressions from a (reference?) # implementation - # - # https://github.com/mastodon/mastodon/blob/852123867768e23410af5bd07ac0327bead0d9b2/app/models/account.rb#L68 + # + # https://github.com/mastodon/mastodon/blob/f1657e6d6275384c199956e8872115fdcec600b0/app/models/account.rb#L68 # # USERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i - # SERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i + # MENTION_RE = %r{(?[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?) # username part @ # separator - (?P[a-z0-9]+([a-z0-9.-]+[a-z0-9]+)?) # domain part + (?P[a-z0-9]+([a-z0-9.-]+)?) # domain part \s*$ # ignore trailing whitespace """, re.VERBOSE | re.IGNORECASE) @@ -1106,6 +1130,11 @@ class Connection(BaseModel): uname = m.group("uname") domain = m.group("domain") + # the domain part of pattern allows some things that are not + # valid names. So we run a stricter check + if not Connection.is_valid_fqdn(domain): + raise ValueError("Invalid hostname in mastodon address") + url = HttpUrl(f'https://{domain}/@{uname}') return url diff --git a/tests/test_data_model.py b/tests/test_data_model.py index 7bab82c..87b0800 100644 --- a/tests/test_data_model.py +++ b/tests/test_data_model.py @@ -889,6 +889,18 @@ class TestDataModel(unittest.TestCase): with self.assertRaises(ValueError): data_model.Connection.MastodonUname2Url(mastodon_name) + mastodon_name = 'user@bad.numeric.tld.123' + with self.subTest("All digit TLD"): + with self.assertRaises(ValueError): + data_model.Connection.MastodonUname2Url(mastodon_name) + + mastodon_name = 'a_tooter@example.exchange.' + expected = HttpUrl("https://example.exchange./@a_tooter") + result = data_model.Connection.MastodonUname2Url(mastodon_name) + with self.subTest("With FQDN root '.'"): + self.assertEqual(result, expected) + + if __name__ == '__main__':