Disallow _ in domain names

This commit is contained in:
Jeffrey Goldberg 2023-11-27 02:37:09 -06:00
parent bb65cade3a
commit f0d3e705d8
2 changed files with 43 additions and 11 deletions

View File

@ -1057,8 +1057,24 @@ class Connection(BaseModel):
value: str
@staticmethod
def MastodonUname2Url(id: str) -> Optional[HttpUrl]:
"""From a Mastodon id "user@domain.example" returns profile url."""
def MastodonUname2Url(address: str) -> Optional[HttpUrl]:
"""returns profile url from a mastodon user address.
Args:
address (str): A Mastodon user address. E.g., "user@social.example"
Returns:
A pydantic HttpUrl object with the https URL for the user profile
Example:
```
url = MastodonUname2Url("user@social.example")
assert(url == HttpUrl(http://social.example/@user))
```
Exceptions:
ValueError if the address is malformed.
"""
# The closest thing to a formal spec of Mastodon usernames
# where these regular expressions from a (reference?)
@ -1068,19 +1084,23 @@ class Connection(BaseModel):
#
# USERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i
# SERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i
# MENTION_RE = %r{(?<![=/[:word:]])@((#{USERNAME_RE})(?:@[[:word:].-]+[[:word:]]+)?)}i
# URL_PREFIX_RE = %r{\Ahttp(s?)://[^/]+}
#
# Note that the SERNAME expersion would allow underscores in DNS
# hostname labels. That would lead to invalid hostnames.
#
# I consider that a bug and will not be carrying that over here.
# (It is possible that pydantic would catch the error)
pattern = re.compile(r"""
^\s* # ignore leading spaces
@? # Optional @ prefix
(?P<uname>[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?) # username part
@ # separator
(?P<domain>[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?) # domain part
(?P<domain>[a-z0-9]+([a-z0-9.-]+[a-z0-9]+)?) # domain part
\s*$ # ignore trailing whitespace
""", re.VERBOSE | re.IGNORECASE)
m = pattern.match(id)
m = pattern.match(address)
if m is None:
raise ValueError("Invalid mastodon address")
uname = m.group("uname")

View File

@ -862,14 +862,14 @@ class TestDataModel(unittest.TestCase):
data_model.read_input_file("nonexistent.json")
def test_mastodon_parsing(self):
mastodon_name = 'jpgoldberg@ioc.exchange'
expected = HttpUrl("https://ioc.exchange/@jpgoldberg")
mastodon_name = 'a_tooter@example.exchange'
expected = HttpUrl("https://example.exchange/@a_tooter")
result = data_model.Connection.MastodonUname2Url(mastodon_name)
with self.subTest("Without '@' prefix"):
self.assertEqual(result, expected)
mastodon_name = '@jpgoldberg@ioc.exchange'
expected = HttpUrl("https://ioc.exchange/@jpgoldberg")
mastodon_name = '@a_tooter@example.exchange'
expected = HttpUrl("https://example.exchange/@a_tooter")
result = data_model.Connection.MastodonUname2Url(mastodon_name)
with self.subTest("With '@' prefix"):
self.assertEqual(result, expected)
@ -879,5 +879,17 @@ class TestDataModel(unittest.TestCase):
with self.assertRaises(ValueError):
data_model.Connection.MastodonUname2Url(mastodon_name)
mastodon_name = '@not_enough_at_symbols'
with self.subTest("Missing '@' separator"):
with self.assertRaises(ValueError):
data_model.Connection.MastodonUname2Url(mastodon_name)
mastodon_name = 'user@bad_domain.example'
with self.subTest("Underscore in domain portion"):
with self.assertRaises(ValueError):
data_model.Connection.MastodonUname2Url(mastodon_name)
if __name__ == '__main__':
unittest.main()