From d1336a0e23cf4ed579caaeec66588c05e462d78a Mon Sep 17 00:00:00 2001 From: Bram Date: Fri, 29 Mar 2024 07:15:27 +0100 Subject: [PATCH] Add server name + user id parsers --- src/Internal/Grammar/ServerName.elm | 198 ++++++++++++++++++++++++++++ src/Internal/Grammar/UserId.elm | 68 ++++++++++ 2 files changed, 266 insertions(+) create mode 100644 src/Internal/Grammar/ServerName.elm create mode 100644 src/Internal/Grammar/UserId.elm diff --git a/src/Internal/Grammar/ServerName.elm b/src/Internal/Grammar/ServerName.elm new file mode 100644 index 0000000..8018617 --- /dev/null +++ b/src/Internal/Grammar/ServerName.elm @@ -0,0 +1,198 @@ +module Internal.Grammar.ServerName exposing (..) +{-| A homeserver is uniquely identified by its server name. The server name +represents the address at which the homeserver in question can be reached by +other homeservers. +-} + +import Internal.Tools.ParserExtra as PE +import Parser as P exposing (Parser, (|.), (|=)) +import Internal.Config.Log exposing (Log, log) +import Internal.Config.Text as Text + +{-| The hostname is the location where the server can be found. + +Notice how the Matrix spec specifies that the hostname can either be a DNS name, +an IPv4Address or an IPv6Address. Since the IPv4Address is compatible with the +specification of DNS names, however, and RFC1123 (section 2.1) does not require +a client to distinguish them, we treat IPv4Addresses like DNS names. +-} +type HostName + = DNS String + | IPv6 IPv6Address + +{-| The IPv6Address is represented by a list of items BEFORE and AFTER the +double colons (::). +-} +type alias IPv6Address = { front : List String, back : List String } + +{-| The server name is a combination of a hostname and an optional port. +-} +type ServerName = ServerName { host : HostName, port_ : Maybe Int } + +{-| Parser for the DNS name record. The Matrix spec bases its grammar on the +standard for internet host names, as specified by RFC1123, section 2.1, with an +extension IPv6 literals. + + [RFC-1123 §2.2] + + The syntax of a legal Internet host name was specified in RFC-952 + [DNS:4]. One aspect of host name syntax is hereby changed: the + restriction on the first character is relaxed to allow either a + letter or a digit. Host software MUST support this more liberal + syntax. + + Host software MUST handle host names of up to 63 characters and + SHOULD handle host names of up to 255 characters. + + [RFC-952 §Assumptions-1] + + A "name" (Net, Host, Gateway, or Domain name) is a text string up + to 24 characters drawn from the alphabet (A-Z), digits (0-9), minus + sign (-), and period (.). Note that periods are only allowed when + they serve to delimit components of "domain style names". (See + RFC-921, "Domain Name System Implementation Schedule", for + background). +-} +dnsNameParser : Parser String +dnsNameParser = + P.chompIf Char.isAlphaNum + |. P.chompWhile (\c -> Char.isAlphaNum c || c == '-' || c == '.') + |> P.getChompedString + +{-| Parse a Hostname. +-} +hostnameParser : Parser HostName +hostnameParser = + P.oneOf + [ P.succeed IPv6 + |. P.symbol "[" + |= ipv6Parser + |. P.symbol "]" + , P.succeed DNS + |= dnsNameParser + ] + +{-| Parse all values to the left of the double colon (::) +-} +ipv6LeftParser : Parser (List String) +ipv6LeftParser = + P.oneOf + [ P.succeed [] + |. P.symbol ":" + , P.succeed (|>) + |= PE.times 1 7 (ipv6NumParser |. P.symbol ":") + |= P.oneOf + [ P.succeed (\bottom tail -> tail ++ [bottom]) + |= ipv6NumParser + , P.succeed identity + ] + ] + +{-| Parse an ordinary IPv6 number +-} +ipv6NumParser : Parser String +ipv6NumParser = + P.chompIf Char.isHexDigit + |> P.getChompedString + |> PE.times 1 4 + |> P.map String.concat + +{-| Parse an IPv6 Address +-} +ipv6Parser : Parser IPv6Address +ipv6Parser = + ipv6LeftParser + |> P.andThen + (\front -> + P.succeed (IPv6Address front) + |= ipv6RightParser (8 - List.length front) + ) + +{-| Parse all values to the right of the double colon (::) +-} +ipv6RightParser : Int -> Parser (List String) +ipv6RightParser n = + P.succeed identity + |. P.symbol ":" + |= P.oneOf + [ P.succeed (::) + |= ipv6NumParser + |= PE.times 1 (n - 1) + ( P.succeed identity + |. P.symbol ":" + |= ipv6NumParser + ) + , P.succeed [] + ] + +{-| Convert an IPv6 address to a readable string format +-} +ipv6ToString : IPv6Address -> String +ipv6ToString { front, back } = + ( if List.length front == 8 then + front + else if List.length back == 8 then + back + else + List.concat [ front, [""], back ] + ) + |> List.intersperse ":" + |> String.concat + +portParser : Parser Int +portParser = + P.chompIf Char.isDigit + |. P.chompWhile Char.isDigit + |> P.getChompedString + |> P.andThen + (\v -> + case String.toInt v of + Just i -> + if 0 <= i && i <= 2^16 - 1 then + P.succeed i + else + P.problem ("Port out of range: " ++ v) + + Nothing -> + P.problem "Not a port number" + ) + +servernameParser : Parser ServerName +servernameParser = + P.succeed (\h p -> ServerName { host = h, port_ = p } ) + |= hostnameParser + |= P.oneOf + [ P.succeed Just + |. P.symbol ":" + |= portParser + , P.succeed Nothing + ] + +toString : ServerName -> String +toString (ServerName { host, port_ }) = + let + hostString : String + hostString = + case host of + DNS name -> + name + + IPv6 { front, back } -> + ( if List.length front == 8 then + front + else if List.length back == 8 then + back + else + List.concat [ front, [""], back ] + ) + |> List.intersperse ":" + |> String.concat + + portString : String + portString = + port_ + |> Maybe.map String.fromInt + |> Maybe.map ((++) ":") + |> Maybe.withDefault "" + in + hostString ++ portString diff --git a/src/Internal/Grammar/UserId.elm b/src/Internal/Grammar/UserId.elm new file mode 100644 index 0000000..d56ea9a --- /dev/null +++ b/src/Internal/Grammar/UserId.elm @@ -0,0 +1,68 @@ +module Internal.Grammar.UserId exposing (..) +{-| # User ids + +Users within Matrix are uniquely identified by their Matrix user ID. The user +ID is namespaced to the homeserver which allocated the account and has the form: + + @localpart:domain + +The localpart of a user ID is an opaque identifier for that user. It MUST NOT +be empty, and MUST contain only the characters a-z, 0-9, ., _, =, -, /, and +. + +The domain of a user ID is the server name of the homeserver which allocated +the account. + +The length of a user ID, including the @ sigil and the domain, MUST NOT exceed +255 characters. + +The complete grammar for a legal user ID is: + + user_id = "@" user_id_localpart ":" server_name + user_id_localpart = 1*user_id_char + user_id_char = DIGIT + / %x61-7A ; a-z + / "-" / "." / "=" / "_" / "/" / "+" + +Older versions of this specification were more tolerant of the characters +permitted in user ID localparts. There are currently active users whose user +IDs do not conform to the permitted character set, and a number of rooms whose +history includes events with a sender which does not conform. In order to +handle these rooms successfully, clients and servers MUST accept user IDs with +localparts from the expanded character set: + + extended_user_id_char = %x21-39 / %x3B-7E ; all ASCII printing chars except : + +-} + +import Internal.Grammar.ServerName as ServerName exposing (ServerName) +import Internal.Tools.ParserExtra as PE +import Parser as P exposing (Parser, (|.), (|=)) + +type UserID = UserID { localpart : String, domain : ServerName } + +localpartParser : Parser String +localpartParser = + P.chompIf validHistoricalUsernameChar + |> P.getChompedString + |> PE.times 1 255 + |> P.map String.concat + +toString : UserID -> String +toString (UserID { localpart, domain }) = + String.concat [ "@", localpart, ":", ServerName.toString domain ] + +userIdParser : Parser UserID +userIdParser = + P.succeed (\l d -> UserID { localpart = l, domain = d } ) + |. P.symbol "@" + |= localpartParser + |. P.symbol ":" + |= ServerName.servernameParser + +validHistoricalUsernameChar : Char -> Bool +validHistoricalUsernameChar c = + let + i : Int + i = Char.toCode c + in + (0x21 <= i && i <= 0x39) || (0x3B <= i && i <= 0x7E)