From fd569aa476866932ab39ce70d089ccdba99b356b Mon Sep 17 00:00:00 2001 From: Bram van den Heuvel Date: Wed, 10 Jan 2024 09:22:12 +0100 Subject: [PATCH] Implement complex JSON decoder --- src/Internal/Config/Leaks.elm | 23 +- src/Internal/Config/Log.elm | 38 +- src/Internal/Config/Phantom.elm | 2 +- src/Internal/Config/Text.elm | 49 ++- src/Internal/Tools/Decode.elm | 680 +++++++++++++++++++++++++++++++- 5 files changed, 766 insertions(+), 26 deletions(-) diff --git a/src/Internal/Config/Leaks.elm b/src/Internal/Config/Leaks.elm index d9f2d07..f3125ca 100644 --- a/src/Internal/Config/Leaks.elm +++ b/src/Internal/Config/Leaks.elm @@ -1,4 +1,7 @@ -module Internal.Config.Leaks exposing (accessToken, baseUrl, transaction, versions) +module Internal.Config.Leaks exposing + ( accessToken, baseUrl, transaction, versions + , allLeaks + ) {-| @@ -29,8 +32,14 @@ know 100% sure that the value isn't `Nothing`. @docs accessToken, baseUrl, transaction, versions +For safety purposes, all leaking values are stored in the following value: + +@docs allLeaks + -} +import Set exposing (Set) + {-| Placeholder access token. -} @@ -39,6 +48,18 @@ accessToken = "elm-sdk-placeholder-access-token-leaks" +allLeaks : Set String +allLeaks = + Set.union + (Set.fromList versions) + (Set.fromList + [ accessToken + , baseUrl + , transaction + ] + ) + + {-| Placeholder base URL. -} baseUrl : String diff --git a/src/Internal/Config/Log.elm b/src/Internal/Config/Log.elm index c591ec0..ec50cb7 100644 --- a/src/Internal/Config/Log.elm +++ b/src/Internal/Config/Log.elm @@ -1,5 +1,9 @@ module Internal.Config.Log exposing (caughtError, debug, error, info, securityWarn, warn) -{-| # Logs + +{-| + + +# Logs The logs module exposes various log types that can be used to indicate logs. This helps users filter for the logs that they care about. @@ -8,29 +12,39 @@ The logs are encoded as strings as to allow the addition of new log types without triggering a major update. @docs caughtError, debug, error, info, securityWarn, warn + -} + {-| A caught error is an error that has been caught elsewhere in the code, hence functioning as a secondary debug channel. -} caughtError : String -caughtError = "caught-error" +caughtError = + "caught-error" + {-| Debug logs are logs that can be used to debug API interactions. -} debug : String -debug = "debug" +debug = + "debug" + {-| Error strings indicate that something unexpected has happened. As a result, something has stopped working. -} error : String -error = "error" +error = + "error" + {-| Info contains relevant info for the user -} info : String -info = "info" +info = + "info" + {-| Security warnings are warnings that contain red flags. @@ -40,15 +54,19 @@ contains, but it can raise a user's attention to suspicious situations. For example, if the homeserver returns room ids that do not look like usernames at all, the homeserver can raise a security warning, which indicates that: -1. The homeserver might be bugged -2. The Elm SDK might be severaly outdated -3. The homeserver might be compromised and/or trying to attack the Elm SDK +1. The homeserver might be bugged +2. The Elm SDK might be severaly outdated +3. The homeserver might be compromised and/or trying to attack the Elm SDK + -} securityWarn : String -securityWarn = "security-warn" +securityWarn = + "security-warn" + {-| Warning logs are logs that are unusual, but that can be dealt with. Warnings are debug logs that are out of the ordinary. -} warn : String -warn = "warn" +warn = + "warn" diff --git a/src/Internal/Config/Phantom.elm b/src/Internal/Config/Phantom.elm index 7ad0489..c378581 100644 --- a/src/Internal/Config/Phantom.elm +++ b/src/Internal/Config/Phantom.elm @@ -29,7 +29,7 @@ The phantom types in this module help you in the following way: {-| Opaque type that encapsulates a bool. -} -type PBool a +type PBool ph = PBool Bool diff --git a/src/Internal/Config/Text.elm b/src/Internal/Config/Text.elm index 0a7bd62..c86bd4c 100644 --- a/src/Internal/Config/Text.elm +++ b/src/Internal/Config/Text.elm @@ -1,7 +1,8 @@ module Internal.Config.Text exposing - ( versionsFoundLocally, versionsReceived, versionsFailedToDecode - , accessTokenFoundLocally, accessTokenExpired, accessTokenInvalid + ( accessTokenFoundLocally, accessTokenExpired, accessTokenInvalid + , versionsFoundLocally, versionsReceived, versionsFailedToDecode , unsupportedVersionForEndpoint + , decodedDictSize, leakingValueFound ) {-| Throughout the Elm SDK, there are lots of pieces of text being used for @@ -23,14 +24,6 @@ This is a risky feature, keep in mind that even a patch update might break this! You should only do this if you know what you're doing. -## API Versions - -Messages sent as API logs while the Elm SDK is figuring out how modern the -homeserver is and how it can best communicate. - -@docs versionsFoundLocally, versionsReceived, versionsFailedToDecode - - ## API Authentication Messages sent as API logs during the authentication phase of the API @@ -41,12 +34,27 @@ interaction. offers room for translation, re-wording and refactors. +## API Versions + +Messages sent as API logs while the Elm SDK is figuring out how modern the +homeserver is and how it can best communicate. + +@docs versionsFoundLocally, versionsReceived, versionsFailedToDecode + + ## API miscellaneous messages Messages sent as API logs during communication with the API. @docs unsupportedVersionForEndpoint + +## JSON decoder + +Messages sent as API logs when a JSON value is being decoded. + +@docs decodedDictSize, leakingValueFound + -} @@ -73,6 +81,27 @@ accessTokenInvalid = "Matrix API rejected access token as invalid" +decodedDictSize : Int -> Int -> String +decodedDictSize from to = + String.concat + [ "JSON dict contained duplicate keys (JSON had " + , String.fromInt from + , " keys, Elm dict has " + , String.fromInt to + , " keys)" + ] + + +{-| The Elm SDK occassionally uses [leaking values](Internal-Config-Leaks), +which might indicate exceptional behaviour. As such, this log is sent when one +of those leaking values is found: to alert the user that something fishy might +be going on. +-} +leakingValueFound : String -> String +leakingValueFound leaking_value = + "Found leaking value : " ++ leaking_value + + {-| The Matrix homeserver can specify how it wishes to communicate, and the Elm SDK aims to communicate accordingly. This may fail in some scenarios, however, in which case it will throw this error. diff --git a/src/Internal/Tools/Decode.elm b/src/Internal/Tools/Decode.elm index 7948e91..7e8068d 100644 --- a/src/Internal/Tools/Decode.elm +++ b/src/Internal/Tools/Decode.elm @@ -1,5 +1,17 @@ -module Internal.Tools.Decode exposing (Decoder) -{-| # Advanced security Json.Decode +module Internal.Tools.Decode exposing + ( Decoder, string, bool, int, float + , nullable, list, slowDict, fastDict, keyValuePairs + , field, at, index, opField, opFieldWithDefault + , maybe, oneOf + , map, map2, map3, map4, map5, map6, map7, map8, map9, map10, map11 + , lazy, value, null, succeed, fail, andThen + , pString, pBool, pInt, pList + ) + +{-| + + +# Advanced security Json.Decode This module extends the standard JSON encode / decode library for security measures. Most Elm libraries do not access an API this often without insight @@ -9,11 +21,671 @@ what is going on. Additionally, the decoder will warn for suspicious values, and provide helpful errors when the JSON fails to decode. + ## Primitives -@docs Decoder +@docs Decoder, string, bool, int, float + + +## Data structures + +@docs nullable, list, slowDict, fastDict, keyValuePairs + + +## Object primitives + +@docs field, at, index, opField, opFieldWithDefault + + +## Inconsistent structure + +@docs maybe, oneOf + + +## Mapping + +@docs map, map2, map3, map4, map5, map6, map7, map8, map9, map10, map11 + + +## Fancy decoding + +@docs lazy, value, null, succeed, fail, andThen + + +## Phantom decoding + +Phantom decoders allow you to create phantom types of standard Elm types. + +@docs pString, pBool, pInt, pList + -} +import Dict as SDict +import FastDict as FDict +import Internal.Config.Leaks as L +import Internal.Config.Log as Log +import Internal.Config.Phantom as Phantom +import Internal.Config.Text as Text +import Internal.Tools.DecodeExtra as D import Json.Decode as D +import Set -type Decoder a = D.Decoder { value : a, messages : List String } \ No newline at end of file + +{-| A value that knows how to decode JSON values. +-} +type alias Decoder a = + D.Decoder { content : a, messages : List ( String, String ) } + + +{-| Create decoders that depend on previous results. +-} +andThen : (a -> Decoder b) -> Decoder a -> Decoder b +andThen func = + D.andThen + (\a -> + D.map + (\b -> { b | messages = a.messages ++ b.messages }) + (func a.content) + ) + + +{-| Decode a nested JSON object, requiring certain fields. +-} +at : List String -> Decoder a -> Decoder a +at = + D.at + + +{-| Decode a JSON boolean into an Elm bool. +-} +bool : Decoder Bool +bool = + D.map empty D.bool + + +{-| Initialize a standard object for the decoder. +-} +empty : a -> { content : a, messages : List ( String, String ) } +empty x = + { content = x, messages = [] } + + +{-| Ignore the JSON and make the decoder fail. +-} +fail : String -> Decoder a +fail = + D.fail + + +{-| Decode a JSON object into a fast Elm dict from miniBill/elm-fast-dict. +-} +fastDict : Decoder a -> Decoder (FDict.Dict String a) +fastDict x = + keyValuePairs x + |> andThen + (\pairs -> + let + dict = + FDict.fromList pairs + + oldLength = + List.length pairs + + newLength = + FDict.size dict + in + if oldLength == newLength then + succeed dict + + else + D.succeed + { content = dict + , messages = + [ ( Log.warn, Text.decodedDictSize oldLength newLength ) ] + } + ) + + +{-| Decode a JSON object, requiring a particular field. +-} +field : String -> Decoder a -> Decoder a +field = + D.field + + +{-| Decode a JSON number into an Elm flaot. +-} +float : Decoder Float +float = + D.map empty D.float + + +{-| Decode a JSON array, requiring a particular index. +-} +index : Int -> Decoder a -> Decoder a +index = + D.index + + +{-| Decode a JSON number into an Elm int. +-} +int : Decoder Int +int = + D.map empty D.int + + +{-| Decode a JSON object into a list of pairs. +-} +keyValuePairs : Decoder a -> Decoder (List ( String, a )) +keyValuePairs x = + D.map + (\result -> + { content = List.map (Tuple.mapSecond .content) result + , messages = + result + |> List.map Tuple.second + |> List.map .messages + |> List.concat + } + ) + (D.keyValuePairs x) + + +{-| Sometimes you have JSON with recursive structure, like nested comments. +You can use `lazy` to make sure your decoder unrolls lazily. +-} +lazy : (() -> Decoder a) -> Decoder a +lazy = + D.lazy + + +{-| Decode a JSON array into an Elm list. +-} +list : Decoder a -> Decoder (List a) +list x = + D.map + (\result -> + { content = List.map .content result + , messages = + result + |> List.map .messages + |> List.concat + } + ) + (D.list x) + + +{-| Transform a decoder. +-} +map : (a -> value) -> Decoder a -> Decoder value +map func da = + D.map + (\a -> + { content = func a.content + , messages = a.messages + } + ) + da + + +{-| Try two decoders and combine the result. +-} +map2 : (a -> b -> value) -> Decoder a -> Decoder b -> Decoder value +map2 func da db = + D.map2 + (\a b -> + { content = func a.content b.content + , messages = + List.concat + [ a.messages + , b.messages + ] + } + ) + da + db + + +{-| Try three decoders and combine the result. +-} +map3 : (a -> b -> c -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder value +map3 func da db dc = + D.map3 + (\a b c -> + { content = func a.content b.content c.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + ] + } + ) + da + db + dc + + +{-| Try four decoders and combine the result. +-} +map4 : (a -> b -> c -> d -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder value +map4 func da db dc dd = + D.map4 + (\a b c d -> + { content = func a.content b.content c.content d.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + ] + } + ) + da + db + dc + dd + + +{-| Try five decoders and combine the result. +-} +map5 : (a -> b -> c -> d -> e -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder e -> Decoder value +map5 func da db dc dd de = + D.map5 + (\a b c d e -> + { content = func a.content b.content c.content d.content e.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + ] + } + ) + da + db + dc + dd + de + + +{-| Try six decoders and combine the result. +-} +map6 : (a -> b -> c -> d -> e -> f -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder e -> Decoder f -> Decoder value +map6 func da db dc dd de df = + D.map6 + (\a b c d e f -> + { content = func a.content b.content c.content d.content e.content f.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + ] + } + ) + da + db + dc + dd + de + df + + +{-| Try seven decoders and combine the result. +-} +map7 : (a -> b -> c -> d -> e -> f -> g -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder e -> Decoder f -> Decoder g -> Decoder value +map7 func da db dc dd de df dg = + D.map7 + (\a b c d e f g -> + { content = func a.content b.content c.content d.content e.content f.content g.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + , g.messages + ] + } + ) + da + db + dc + dd + de + df + dg + + +{-| Try eight decoders and combine the result. +-} +map8 : (a -> b -> c -> d -> e -> f -> g -> h -> value) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder e -> Decoder f -> Decoder g -> Decoder h -> Decoder value +map8 func da db dc dd de df dg dh = + D.map8 + (\a b c d e f g h -> + { content = func a.content b.content c.content d.content e.content f.content g.content h.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + , g.messages + , h.messages + ] + } + ) + da + db + dc + dd + de + df + dg + dh + + +{-| Try 9 decoders and combine the result. +-} +map9 : + (a -> b -> c -> d -> e -> f -> g -> h -> i -> value) + -> Decoder a + -> Decoder b + -> Decoder c + -> Decoder d + -> Decoder e + -> Decoder f + -> Decoder g + -> Decoder h + -> Decoder i + -> Decoder value +map9 func da db dc dd de df dg dh di = + D.map8 + (\a b c d e f g ( h, i ) -> + { content = func a.content b.content c.content d.content e.content f.content g.content h.content i.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + , g.messages + , h.messages + , i.messages + ] + } + ) + da + db + dc + dd + de + df + dg + (D.map2 Tuple.pair dh di) + + +{-| Try 10 decoders and combine the result. +-} +map10 : + (a -> b -> c -> d -> e -> f -> g -> h -> i -> j -> value) + -> Decoder a + -> Decoder b + -> Decoder c + -> Decoder d + -> Decoder e + -> Decoder f + -> Decoder g + -> Decoder h + -> Decoder i + -> Decoder j + -> Decoder value +map10 func da db dc dd de df dg dh di dj = + D.map8 + (\a b c d e f ( g, h ) ( i, j ) -> + { content = func a.content b.content c.content d.content e.content f.content g.content h.content i.content j.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + , g.messages + , h.messages + , i.messages + , j.messages + ] + } + ) + da + db + dc + dd + de + df + (D.map2 Tuple.pair dg dh) + (D.map2 Tuple.pair di dj) + + +{-| Try 11 decoders and combine the result. +-} +map11 : + (a -> b -> c -> d -> e -> f -> g -> h -> i -> j -> k -> value) + -> Decoder a + -> Decoder b + -> Decoder c + -> Decoder d + -> Decoder e + -> Decoder f + -> Decoder g + -> Decoder h + -> Decoder i + -> Decoder j + -> Decoder k + -> Decoder value +map11 func da db dc dd de df dg dh di dj dk = + D.map8 + (\a b c d e ( f, g ) ( h, i ) ( j, k ) -> + { content = func a.content b.content c.content d.content e.content f.content g.content h.content i.content j.content k.content + , messages = + List.concat + [ a.messages + , b.messages + , c.messages + , d.messages + , e.messages + , f.messages + , g.messages + , h.messages + , i.messages + , j.messages + , k.messages + ] + } + ) + da + db + dc + dd + de + (D.map2 Tuple.pair df dg) + (D.map2 Tuple.pair dh di) + (D.map2 Tuple.pair dj dk) + + +{-| Helpful for dealing with optional fields +-} +maybe : Decoder a -> Decoder (Maybe a) +maybe x = + D.map + (\result -> + case result of + Just { content, messages } -> + { content = Just content, messages = messages } + + Nothing -> + empty Nothing + ) + (D.maybe x) + + +{-| Decode a `null` value into some Elm value. +-} +null : a -> Decoder a +null = + D.null >> D.map empty + + +{-| Decode a nullable JSON value into an Elm value. +-} +nullable : Decoder a -> Decoder (Maybe a) +nullable x = + D.map + (\result -> + case result of + Just { content, messages } -> + { content = Just content, messages = messages } + + Nothing -> + empty Nothing + ) + (D.nullable x) + + +{-| Try a bunch of different decoders. This can be useful if the JSON may come +in a couple different formats. +-} +oneOf : List (Decoder a) -> Decoder a +oneOf = + D.oneOf + + +{-| Decode a JSON object, requiring a particular field: + + - If the field does not exist, the decoder decodes `Nothing` + - If the field DOES exist, the decoder must always return a `Just content` or fail + +-} +opField : String -> Decoder a -> Decoder (Maybe a) +opField key x = + D.map + (\result -> + case result of + Just { content, messages } -> + { content = Just content, messages = messages } + + Nothing -> + empty Nothing + ) + (D.opField key x) + + +{-| Decode a JSON object, requiring a particular field or raising a default: + + - If the field does not exist, the decoder returns the default + - If the field DOES exist, the decoder must always return value or fail + +-} +opFieldWithDefault : String -> a -> Decoder a -> Decoder a +opFieldWithDefault key default x = + opField key x |> map (Maybe.withDefault default) + + +{-| Transform a JSON boolean into a phantom Elm bool. +-} +pBool : Decoder Bool -> Decoder (Phantom.PBool ph) +pBool = + map Phantom.PBool + + +{-| Transform a JSON number into a phantom Elm int. +-} +pInt : Decoder Int -> Decoder (Phantom.PInt ph) +pInt = + map Phantom.PInt + + +{-| Transform a JSON list into a phantom Elm list. +-} +pList : Decoder (List a) -> Decoder (Phantom.PList ph a) +pList = + map Phantom.PList + + +{-| Transform a JSON string into a phantom Elm string. +-} +pString : Decoder String -> Decoder (Phantom.PString ph) +pString = + map Phantom.PString + + +{-| Decode a JSON object into an Elm dict. +-} +slowDict : Decoder a -> Decoder (SDict.Dict String a) +slowDict x = + D.map + (\result -> + { content = + result + |> List.map (Tuple.mapSecond .content) + |> SDict.fromList + , messages = + result + |> List.map Tuple.second + |> List.map .messages + |> List.concat + } + ) + (D.keyValuePairs x) + + +{-| Decode a JSON string into an Elm string. + +This decoder also checks for suspicious inputs, such as the +[Leaking values](Internal-Config-Leaks), to look for suspicious inputs. + +-} +string : Decoder String +string = + D.map + (\content -> + { content = content + , messages = + if Set.member content L.allLeaks then + [ ( Log.securityWarn, Text.leakingValueFound content ) + ] + + else + [] + } + ) + D.string + + +{-| Ignore the JSON and produce a certain Elm value. +-} +succeed : a -> Decoder a +succeed = + D.succeed >> D.map empty + + +{-| Do not do anything with a JSON value, just bring it into Elm as a `Value`. +-} +value : Decoder D.Value +value = + D.map empty D.value