From 54f8cb3ff2ece7d37a6263173d8f5b08136ec702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joosep=20J=C3=A4=C3=A4ger?= Date: Mon, 25 May 2026 16:32:33 +0300 Subject: [PATCH 1/5] Make CTree module exports explicit --- CHANGELOG.md | 4 ++++ cuddle.cabal | 2 +- src/Codec/CBOR/Cuddle/CDDL/CTree.hs | 12 +++++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e86fba20..4aacdd6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog for `cuddle` +## 1.9.0.0 + +* + ## 1.8.0.0 * Change `validateCBOR` return type to `Either ValidateCBORError (Evidenced ValidationTrace)` diff --git a/cuddle.cabal b/cuddle.cabal index ba4edad5..4efbe4d7 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -1,6 +1,6 @@ cabal-version: 3.4 name: cuddle -version: 1.8.0.0 +version: 1.9.0.0 synopsis: CDDL Generator and test utilities description: Cuddle is a library for generating and manipulating [CDDL](https://datatracker.ietf.org/doc/html/rfc8610). diff --git a/src/Codec/CBOR/Cuddle/CDDL/CTree.hs b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs index 9294d43e..68815de0 100644 --- a/src/Codec/CBOR/Cuddle/CDDL/CTree.hs +++ b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs @@ -3,7 +3,17 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} -module Codec.CBOR.Cuddle.CDDL.CTree where +module Codec.CBOR.Cuddle.CDDL.CTree ( + XXCTree, + CTree (..), + traverseCTree, + foldCTree, + Node, + CTreeRoot (..), + PTerm (..), + uintMax, + nintMin, +) where import Codec.CBOR.Cuddle.CDDL (Name, OccurrenceIndicator, RangeBound, Value) import Codec.CBOR.Cuddle.CDDL.CtlOp From 49d042ebc57dca11c31f008bc807525b0027979b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joosep=20J=C3=A4=C3=A4ger?= Date: Mon, 25 May 2026 13:15:58 +0300 Subject: [PATCH 2/5] Add Codec.CBOR.Cuddle.CBOR.Canonical --- cuddle.cabal | 1 + src/Codec/CBOR/Cuddle/CBOR/Canonical.hs | 139 ++++++++++++++++++ src/Codec/CBOR/Cuddle/CBOR/Gen.hs | 5 +- src/Codec/CBOR/Cuddle/CBOR/Validator.hs | 1 + src/Codec/CBOR/Cuddle/CDDL/CTree.hs | 9 +- test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs | 122 +++++++++++++++ 6 files changed, 266 insertions(+), 11 deletions(-) create mode 100644 src/Codec/CBOR/Cuddle/CBOR/Canonical.hs create mode 100644 test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs diff --git a/cuddle.cabal b/cuddle.cabal index 4efbe4d7..d5a988fe 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -38,6 +38,7 @@ common warnings library import: warnings exposed-modules: + Codec.CBOR.Cuddle.CBOR.Canonical Codec.CBOR.Cuddle.CBOR.Gen Codec.CBOR.Cuddle.CBOR.Validator Codec.CBOR.Cuddle.CBOR.Validator.Trace diff --git a/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs b/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs new file mode 100644 index 00000000..771a73a6 --- /dev/null +++ b/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs @@ -0,0 +1,139 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE LambdaCase #-} + +module Codec.CBOR.Cuddle.CBOR.Canonical ( + CanonicalTerm (..), + NInt, + toNInt, + fromNInt, + toCanonical, + uintMax, + nintMin, +) where + +import Codec.CBOR.Term (Term (..)) +import Data.Bifunctor (Bifunctor (..)) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Lazy qualified as BSL +import Data.Map.Strict (Map) +import Data.Map.Strict qualified as Map +import Data.Text (Text) +import Data.Text.Lazy qualified as TL +import Data.Word (Word64, Word8) +import GHC.Generics (Generic) +import Numeric.Half (Half, toHalf) +import Test.QuickCheck (Arbitrary (..)) + +-- | A negative integer in the range @[-2^64, -1]@: exactly the values +-- representable by CBOR major type 1 (RFC 8949 §3.1). The range is wider +-- than 'Int64' on the negative side, so it can't be stored as a plain +-- signed integer. +newtype NInt = NInt Word64 + deriving (Eq, Ord, Bounded) + +instance Show NInt where + showsPrec p x = + showParen (p > 10) $ showString "toNInt " . showsPrec 11 (fromNInt x) + +instance Arbitrary NInt where + arbitrary = NInt <$> arbitrary + +toNInt :: Integer -> Maybe NInt +toNInt x + | x >= nintMin && x < 0 = Just . NInt . fromInteger $ x - nintMin + | otherwise = Nothing + +fromNInt :: NInt -> Integer +fromNInt (NInt n) = nintMin + toInteger n + +toCanonical :: Term -> CanonicalTerm +toCanonical = \case + TInt i -> integerToCanonical $ toInteger i + TInteger n -> integerToCanonical n + TBytes bs -> CTBytes bs + TBytesI bs -> CTBytes $ BSL.toStrict bs + TString s -> CTString s + TStringI s -> CTString $ TL.toStrict s + TList ts -> CTList $ toCanonical <$> ts + TListI ts -> CTList $ toCanonical <$> ts + TMap kvs -> mkMap kvs + TMapI kvs -> mkMap kvs + TTagged 2 inner + | Just bs <- tagBytes inner -> integerToCanonical $ bytesToUnsigned bs + TTagged 3 inner + | Just bs <- tagBytes inner -> integerToCanonical $ -1 - bytesToUnsigned bs + TTagged w t -> CTTagged w $ toCanonical t + TBool False -> CTSimple 20 + TBool True -> CTSimple 21 + TNull -> CTSimple 22 + TSimple w -> CTSimple w + THalf f -> CTHalf $ toHalf f + TFloat f -> CTFloat f + TDouble d -> CTDouble d + where + mkMap = CTMap . Map.fromList . fmap (bimap toCanonical toCanonical) + tagBytes (TBytes bs) = Just bs + tagBytes (TBytesI bs) = Just $ BSL.toStrict bs + tagBytes _ = Nothing + +integerToCanonical :: Integer -> CanonicalTerm +integerToCanonical n + | n >= 0, n <= uintMax = CTInt $ fromInteger n + | n < 0, n >= nintMin = CTNInt . NInt . fromInteger $ n - nintMin + | n > uintMax = CTTagged 2 . CTBytes $ unsignedToBytes n + | otherwise = CTTagged 3 . CTBytes . unsignedToBytes $ -1 - n + +bytesToUnsigned :: ByteString -> Integer +bytesToUnsigned = BS.foldl' (\acc b -> acc * 256 + toInteger b) 0 + +unsignedToBytes :: Integer -> ByteString +unsignedToBytes = BS.pack . reverse . go + where + go 0 = [] + go n = fromInteger (n `mod` 256) : go (n `div` 256) + +-- | A canonical representation of CBOR data items. Two 'CanonicalTerm's +-- compare equal exactly when the underlying CBOR items are equivalent +-- under the /extended generic data model/ of RFC 8949 §3.4.3 — the same +-- notion of equality that determines whether two map keys are duplicates +-- (RFC 8949 §3.1, §5.6). +-- +-- Differences from 'Codec.CBOR.Term.Term': +-- +-- * Major types 0 and 1 are split into 'CTInt' and 'CTNInt' (matching +-- the on-wire structure) instead of overlapping in 'TInt'/'TInteger'. +-- * Bignums (tags 2 and 3) whose value fits in @[-2^64, 2^64 - 1]@ +-- are normalized into 'CTInt' / 'CTNInt' (RFC 8949 §3.4.3). +-- * Definite- and indefinite-length variants are merged: there is no +-- separate constructor for what 'Codec.CBOR.Term.TBytesI', +-- 'Codec.CBOR.Term.TStringI', 'Codec.CBOR.Term.TListI', +-- 'Codec.CBOR.Term.TMapI' represent. +-- * Maps use 'Data.Map.Strict.Map' rather than a list of pairs, so +-- duplicate keys collapse and key order is irrelevant. +-- * 'TBool' and 'TNull' are folded into 'CTSimple' (values 20, 21, 22 +-- per RFC 8949 §3.3). +-- * 'THalf', 'TFloat', 'TDouble' remain distinct constructors: the +-- generic data model treats different float widths as distinct items +-- (RFC 8949 §2). +data CanonicalTerm + = CTInt !Word64 + | CTNInt !NInt + | CTBytes !ByteString + | CTString !Text + | CTList ![CanonicalTerm] + | CTMap !(Map CanonicalTerm CanonicalTerm) + | CTTagged !Word64 !CanonicalTerm + | CTSimple !Word8 + | CTHalf !Half + | CTFloat !Float + | CTDouble !Double + deriving (Generic, Eq, Ord, Show) + +-- Bounds + +uintMax :: Integer +uintMax = 2 ^ (64 :: Int) - 1 + +nintMin :: Integer +nintMin = -(2 ^ (64 :: Int)) diff --git a/src/Codec/CBOR/Cuddle/CBOR/Gen.hs b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs index 5fe79e25..17fa354d 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Gen.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs @@ -23,6 +23,7 @@ module Codec.CBOR.Cuddle.CBOR.Gen ( #if MIN_VERSION_random(1,3,0) #endif +import Codec.CBOR.Cuddle.CBOR.Canonical (nintMin, toCanonical, uintMax) import Codec.CBOR.Cuddle.CDDL ( GRef (..), Name (..), @@ -34,8 +35,6 @@ import Codec.CBOR.Cuddle.CDDL ( import Codec.CBOR.Cuddle.CDDL.CTree ( CTree (..), PTerm (..), - nintMin, - uintMax, ) import Codec.CBOR.Cuddle.CDDL.CTree qualified as CTree import Codec.CBOR.Cuddle.CDDL.CtlOp qualified as CtlOp @@ -371,7 +370,7 @@ genMap nodes = do go !n | n > 0 = do k <- unS <$> scale (`div` 2) (withAntiGen (withAnnotation "key") $ genForCTree kNode) - if Map.notMember k m + if toCanonical k `elem` (toCanonical <$> Map.keys m) then do v <- unS <$> scale (`div` 2) (withAntiGen (withAnnotation "value") $ genForCTree vNode) pure $ Just (k, v) diff --git a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs index 6c4b7747..4d9ca206 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs @@ -11,6 +11,7 @@ module Codec.CBOR.Cuddle.CBOR.Validator ( ValidateCBORError (..), ) where +import Codec.CBOR.Cuddle.CBOR.Canonical (nintMin, uintMax) import Codec.CBOR.Cuddle.CBOR.Validator.Trace ( ControlInfo (..), Evidenced (..), diff --git a/src/Codec/CBOR/Cuddle/CDDL/CTree.hs b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs index 68815de0..20bf641d 100644 --- a/src/Codec/CBOR/Cuddle/CDDL/CTree.hs +++ b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs @@ -15,6 +15,7 @@ module Codec.CBOR.Cuddle.CDDL.CTree ( nintMin, ) where +import Codec.CBOR.Cuddle.CBOR.Canonical (nintMin, uintMax) import Codec.CBOR.Cuddle.CDDL (Name, OccurrenceIndicator, RangeBound, Value) import Codec.CBOR.Cuddle.CDDL.CtlOp import Control.Monad.Identity (Identity (..)) @@ -155,11 +156,3 @@ instance Arbitrary PTerm where arbitrary = genericArbitraryU instance Hashable PTerm - --- Bounds - -uintMax :: Integer -uintMax = 2 ^ (64 :: Int) - 1 - -nintMin :: Integer -nintMin = -(2 ^ (64 :: Int)) diff --git a/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs new file mode 100644 index 00000000..b3485522 --- /dev/null +++ b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs @@ -0,0 +1,122 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Test.Codec.CBOR.Cuddle.CBOR.Canonical (spec) where + +import Codec.CBOR.Cuddle.CBOR.Canonical ( + CanonicalTerm (..), + NInt, + fromNInt, + nintMin, + toCanonical, + toNInt, + uintMax, + ) +import Codec.CBOR.Term (Term (..)) +import Data.ByteString qualified as BS +import Data.ByteString.Lazy qualified as BSL +import Data.Text.Lazy qualified as TL +import Data.Word (Word64) +import Test.Hspec +import Test.Hspec.QuickCheck (prop) +import Test.QuickCheck + +spec :: Spec +spec = do + describe "NInt" $ do + it "toNInt accepts exactly [-2^64, -1]" $ do + toNInt 0 `shouldBe` Nothing + toNInt 1 `shouldBe` Nothing + toNInt (nintMin - 1) `shouldBe` Nothing + toNInt nintMin `shouldSatisfy` (/= Nothing) + toNInt (-1) `shouldSatisfy` (/= Nothing) + + prop "toNInt . fromNInt = Just" $ \n -> + toNInt (fromNInt n) === Just n + + it "boundary values roundtrip" $ do + fmap fromNInt (toNInt (-1)) `shouldBe` Just (-1) + fmap fromNInt (toNInt nintMin) `shouldBe` Just nintMin + + prop "Ord on NInt agrees with Ord on the represented Integer" $ \a b -> + compare a b === compare (fromNInt a) (fromNInt b) + + describe "toCanonical" $ do + describe "integer normalization" $ do + prop "TInt and TInteger of the same value are equal" $ \i -> + toCanonical (TInt i) === toCanonical (TInteger (toInteger i)) + + prop "in-range positive bignum collapses to CTInt" $ \(w :: Word64) -> + let n = toInteger w + in toCanonical (TTagged 2 (TBytes (unsignedBE n))) + === toCanonical (TInteger n) + + prop "in-range negative bignum collapses to CTNInt" $ \nint -> + let n = fromNInt nint + in toCanonical (TTagged 3 (TBytes (unsignedBE (-1 - n)))) + === toCanonical (TInteger n) + + it "bignum with leading zeros canonicalizes" $ + toCanonical (TTagged 2 (TBytes (BS.pack [0, 0, 5]))) + `shouldBe` CTInt 5 + + prop "bignum from TBytesI matches TBytes" $ \(w :: Word64) -> + let bs = unsignedBE (toInteger w) + in toCanonical (TTagged 2 (TBytesI (BSL.fromStrict bs))) + === toCanonical (TTagged 2 (TBytes bs)) + + it "true bignum (above uintMax) stays tagged" $ + let n = uintMax + 1 + in toCanonical (TInteger n) + `shouldBe` CTTagged 2 (CTBytes (unsignedBE n)) + + it "true bignum (below nintMin) stays tagged" $ + let n = nintMin - 1 + in toCanonical (TInteger n) + `shouldBe` CTTagged 3 (CTBytes (unsignedBE (-1 - n))) + + describe "definite/indefinite variants merge" $ do + it "TBytes ≡ TBytesI" $ + toCanonical (TBytes "abc") + `shouldBe` toCanonical (TBytesI (BSL.fromStrict "abc")) + + it "TString ≡ TStringI" $ + toCanonical (TString "abc") + `shouldBe` toCanonical (TStringI (TL.fromStrict "abc")) + + it "TList ≡ TListI" $ + toCanonical (TList [TInt 1, TInt 2]) + `shouldBe` toCanonical (TListI [TInt 1, TInt 2]) + + it "TMap ≡ TMapI" $ + toCanonical (TMap [(TInt 1, TInt 2)]) + `shouldBe` toCanonical (TMapI [(TInt 1, TInt 2)]) + + describe "bool/null go through CTSimple" $ do + it "TBool False ≡ TSimple 20" $ + toCanonical (TBool False) `shouldBe` toCanonical (TSimple 20) + it "TBool True ≡ TSimple 21" $ + toCanonical (TBool True) `shouldBe` toCanonical (TSimple 21) + it "TNull ≡ TSimple 22" $ + toCanonical TNull `shouldBe` toCanonical (TSimple 22) + + describe "maps" $ do + it "duplicate keys collapse (last wins)" $ + toCanonical (TMap [(TInt 1, TInt 10), (TInt 1, TInt 20)]) + `shouldBe` CTMap (Map.singleton (CTInt 1) (CTInt 20)) + + it "key order is irrelevant" $ + toCanonical (TMap [(TInt 1, TInt 10), (TInt 2, TInt 20)]) + `shouldBe` toCanonical (TMap [(TInt 2, TInt 20), (TInt 1, TInt 10)]) + + describe "floats stay distinct by width" $ do + it "THalf 1.0 ≠ TFloat 1.0" $ + toCanonical (THalf 1.0) `shouldNotBe` toCanonical (TFloat 1.0) + it "TFloat 1.0 ≠ TDouble 1.0" $ + toCanonical (TFloat 1.0) `shouldNotBe` toCanonical (TDouble 1.0) + +-- | Encode a non-negative Integer as a big-endian byte string with no leading zeros. +unsignedBE :: Integer -> BS.ByteString +unsignedBE = BS.pack . reverse . go + where + go 0 = [] + go n = fromInteger (n `mod` 256) : go (n `div` 256) From e9f3fa8a51e2865131b410efd94b5073e353cfa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joosep=20J=C3=A4=C3=A4ger?= Date: Mon, 25 May 2026 14:03:06 +0300 Subject: [PATCH 3/5] Make validator check for duplicate keys in maps --- cuddle.cabal | 1 + src/Codec/CBOR/Cuddle/CBOR/Gen.hs | 2 +- src/Codec/CBOR/Cuddle/CBOR/Validator.hs | 49 ++++++++++++------- src/Codec/CBOR/Cuddle/CBOR/Validator/Trace.hs | 13 +++++ test/Main.hs | 2 + test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs | 1 - test/Test/Codec/CBOR/Cuddle/CDDL/Validator.hs | 15 +++--- 7 files changed, 56 insertions(+), 27 deletions(-) diff --git a/cuddle.cabal b/cuddle.cabal index d5a988fe..7d4b0ca0 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -129,6 +129,7 @@ test-suite cuddle-test other-modules: Paths_cuddle + Test.Codec.CBOR.Cuddle.CBOR.Canonical Test.Codec.CBOR.Cuddle.CDDL.Examples Test.Codec.CBOR.Cuddle.CDDL.Examples.Huddle Test.Codec.CBOR.Cuddle.CDDL.Gen diff --git a/src/Codec/CBOR/Cuddle/CBOR/Gen.hs b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs index 17fa354d..d54fae23 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Gen.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs @@ -370,7 +370,7 @@ genMap nodes = do go !n | n > 0 = do k <- unS <$> scale (`div` 2) (withAntiGen (withAnnotation "key") $ genForCTree kNode) - if toCanonical k `elem` (toCanonical <$> Map.keys m) + if toCanonical k `notElem` (toCanonical <$> Map.keys m) then do v <- unS <$> scale (`div` 2) (withAntiGen (withAnnotation "value") $ genForCTree vNode) pure $ Just (k, v) diff --git a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs index 4d9ca206..5fd579ea 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs @@ -11,7 +11,7 @@ module Codec.CBOR.Cuddle.CBOR.Validator ( ValidateCBORError (..), ) where -import Codec.CBOR.Cuddle.CBOR.Canonical (nintMin, uintMax) +import Codec.CBOR.Cuddle.CBOR.Canonical (CanonicalTerm, toCanonical) import Codec.CBOR.Cuddle.CBOR.Validator.Trace ( ControlInfo (..), Evidenced (..), @@ -53,6 +53,8 @@ import Data.List.NonEmpty (NonEmpty (..)) import Data.List.NonEmpty qualified as NE import Data.Map.Strict qualified as Map import Data.Maybe +import Data.Set (Set) +import Data.Set qualified as Set import Data.Text qualified as T import Data.Text.Encoding (encodeUtf8) import Data.Text.Lazy qualified as TL @@ -914,14 +916,18 @@ validateMap cddl terms (CTreeE (VValidator v _)) = runCustomValidator cddl (Sing validateMap cddl terms rule = case rule of Postlude PTAny -> terminal rule - Map rules -> mapTrace MapTrace $ validate [] terms rules + Map rules -> mapTrace MapTrace $ validate [] terms rules mempty Choice opts -> validateChoice (validateMap cddl terms) opts _ -> unapplicable rule where validate :: - [CTree ValidatorPhase] -> [(Term, Term)] -> [CTree ValidatorPhase] -> Evidenced MapValidationTrace - validate _ [] [] = evidence MapValidationDone - validate exhausted (kv : _) [] = + [CTree ValidatorPhase] -> + [(Term, Term)] -> + [CTree ValidatorPhase] -> + Set CanonicalTerm -> + Evidenced MapValidationTrace + validate _ [] [] _ = evidence MapValidationDone + validate exhausted (kv : _) [] _ = let unwrapOccur (Occur ct _) = ct unwrapOccur ct = ct @@ -938,20 +944,25 @@ validateMap cddl terms rule = _ -> Just $ maximumBy (compare `on` (measureProgress . snd)) attempts in evidence $ MapValidationLeftoverKVs kv bestAttempt - validate [] [] rs = + validate [] [] rs _ = case NE.nonEmpty $ filter (not . isOptional) rs of Nothing -> evidence MapValidationDone Just requiredRules -> evidence $ MapValidationUnappliedRules (mapIndex <$> requiredRules) - validate exhausted kvs (r : rs) = + validate exhausted kvs (r : rs) seen = let consume (KV k v _) f = case kvs of ((tk, tv) : leftover) -> - case validateTerm cddl tk k of - Evidenced SValid kTrc -> - case validateTerm cddl tv v of - Evidenced SValid vTrc -> mapTrace (MapValidationConsume (mapIndex r) kTrc vTrc) $ f leftover - Evidenced SInvalid vTrc -> evidence $ MapValidationInvalidValue (mapIndex r) kTrc vTrc - Evidenced SInvalid _ -> evidence $ MapValidationUnappliedRules (NE.singleton $ mapIndex r) + let + cKey = toCanonical tk + in + case validateTerm cddl tk k of + Evidenced SValid kTrc + | cKey `Set.notMember` seen -> + case validateTerm cddl tv v of + Evidenced SValid vTrc -> mapTrace (MapValidationConsume (mapIndex r) kTrc vTrc) $ f leftover (Set.insert cKey seen) + Evidenced SInvalid vTrc -> evidence $ MapValidationInvalidValue (mapIndex r) kTrc vTrc + | otherwise -> evidence $ MapValidationDuplicateKeys (mapIndex r) cKey kTrc + Evidenced SInvalid _ -> evidence $ MapValidationUnappliedRules (NE.singleton $ mapIndex r) [] -> error "No remaining KV pairs" consume x _ = error $ "Unexpected value in map: " <> showSimple x postponeRule l = validate (r : exhausted) l rs @@ -963,20 +974,20 @@ validateMap cddl terms rule = Occur ct oi -> case oi of OIOptional -> - consume ct resetDropRule <> postponeRule kvs + consume ct resetDropRule <> postponeRule kvs seen OIZeroOrMore -> - consume ct (rewriteRule r) <> postponeRule kvs + consume ct (rewriteRule r) <> postponeRule kvs seen OIOneOrMore -> - consume ct (rewriteRule (Occur ct OIZeroOrMore)) <> postponeRule kvs + consume ct (rewriteRule (Occur ct OIZeroOrMore)) <> postponeRule kvs seen OIBounded mlb mub | Just lb <- mlb, Just ub <- mub, lb > ub -> error "Unsatisfiable range encountered" | otherwise -> case compare 0 <$> mub of - Just EQ -> dropRule kvs + Just EQ -> dropRule kvs seen Just GT -> error "Unsatisfiable range encountered" _ -> consume ct (rewriteRule (Occur ct $ decrementBounds (mlb, mub))) - <> postponeRule kvs - _ -> consume r resetDropRule <> postponeRule kvs + <> postponeRule kvs seen + _ -> consume r resetDropRule <> postponeRule kvs seen -------------------------------------------------------------------------------- -- Choices diff --git a/src/Codec/CBOR/Cuddle/CBOR/Validator/Trace.hs b/src/Codec/CBOR/Cuddle/CBOR/Validator/Trace.hs index 062348dc..1c296717 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Validator/Trace.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Validator/Trace.hs @@ -30,6 +30,7 @@ module Codec.CBOR.Cuddle.CBOR.Validator.Trace ( foldEvidenced, ) where +import Codec.CBOR.Cuddle.CBOR.Canonical (CanonicalTerm) import Codec.CBOR.Cuddle.CDDL (Name (..)) import Codec.CBOR.Cuddle.CDDL.CTree (CTree (..)) import Codec.CBOR.Cuddle.CDDL.CtlOp (CtlOp) @@ -152,6 +153,11 @@ data MapValidationTrace (v :: Validity) where ValidationTrace IsValid -> MapValidationTrace v -> MapValidationTrace v + MapValidationDuplicateKeys :: + CTree MonoSimplePhase -> + CanonicalTerm -> + ValidationTrace IsValid -> + MapValidationTrace IsInvalid deriving instance Show (MapValidationTrace v) @@ -255,6 +261,7 @@ instance IsValidationTrace MapValidationTrace where MapValidationLeftoverKVs _ _ -> SInvalid MapValidationUnappliedRules _ -> SInvalid MapValidationInvalidValue {} -> SInvalid + MapValidationDuplicateKeys {} -> SInvalid MapValidationConsume _ _ _ x -> traceValidity x measureProgress = \case @@ -266,6 +273,7 @@ instance IsValidationTrace MapValidationTrace where measureProgress kTrc <> measureProgress vTrc <> measureProgress x MapValidationInvalidValue _ kTrc vTrc -> measureProgress kTrc <> measureProgress vTrc + MapValidationDuplicateKeys _ _ trc -> measureProgress trc evidence :: (Show (t v), IsValidationTrace t) => t v -> Evidenced t evidence x = Evidenced (traceValidity x) x @@ -381,6 +389,11 @@ prettyMapValidationResult opts@TraceOptions {..} = \case , "value:" <+> annotate (color Red) "(fail)" , nestContainer $ prettyValidationTrace opts v ] + MapValidationDuplicateKeys _ _ trc -> + vsep + [ "key: " <> annotate (color Red) "(duplicate)" + , nestContainer $ prettyValidationTrace opts trc + ] where foldValid !n (MapValidationConsume _ _ _ c) = foldValid (n + 1) c foldValid !n t = diff --git a/test/Main.hs b/test/Main.hs index 67504596..ee8eda82 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,6 +1,7 @@ module Main (main) where import System.IO (BufferMode (..), hSetBuffering, hSetEncoding, stdout, utf8) +import Test.Codec.CBOR.Cuddle.CBOR.Canonical qualified as Canonical import Test.Codec.CBOR.Cuddle.CDDL.Examples qualified as Examples import Test.Codec.CBOR.Cuddle.CDDL.GeneratorSpec qualified as Generator import Test.Codec.CBOR.Cuddle.CDDL.Parser (parserSpec) @@ -23,6 +24,7 @@ main = do hSetBuffering stdout LineBuffering hSetEncoding stdout utf8 hspecWith hspecConfig $ do + describe "Canonical" Canonical.spec describe "Parser" parserSpec describe "Huddle" huddleSpec describe "Examples" Examples.spec diff --git a/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs index b3485522..9b1c61ac 100644 --- a/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs +++ b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs @@ -4,7 +4,6 @@ module Test.Codec.CBOR.Cuddle.CBOR.Canonical (spec) where import Codec.CBOR.Cuddle.CBOR.Canonical ( CanonicalTerm (..), - NInt, fromNInt, nintMin, toCanonical, diff --git a/test/Test/Codec/CBOR/Cuddle/CDDL/Validator.hs b/test/Test/Codec/CBOR/Cuddle/CDDL/Validator.hs index 25d2f99a..c5829f82 100644 --- a/test/Test/Codec/CBOR/Cuddle/CDDL/Validator.hs +++ b/test/Test/Codec/CBOR/Cuddle/CDDL/Validator.hs @@ -11,6 +11,7 @@ module Test.Codec.CBOR.Cuddle.CDDL.Validator ( validateCBOR_, ) where +import Codec.CBOR.Cuddle.CBOR.Canonical (toCanonical) import Codec.CBOR.Cuddle.CBOR.Gen (generateFromName) import Codec.CBOR.Cuddle.CBOR.Validator ( ValidatorPhase, @@ -53,7 +54,7 @@ import Control.Monad (forM_) import Data.ByteString (ByteString) import Data.ByteString qualified as BS import Data.ByteString.Lazy qualified as LBS -import Data.Containers.ListUtils (nubOrd, nubOrdOn) +import Data.Containers.ListUtils (nubOrdOn) import Data.Either (fromRight) import Data.Map qualified as Map import Data.Text (Text) @@ -149,14 +150,15 @@ genAndValidateFromFile path = do mapCDDLDropExt cddl genAndValidateCddl path resolvedCddl -genInfiniteUniqueList :: Ord a => Gen a -> Gen [a] -genInfiniteUniqueList = fmap nubOrd . infiniteListOf +genInfiniteUniqueListOn :: Ord b => (a -> b) -> Gen a -> Gen [a] +genInfiniteUniqueListOn f = fmap (nubOrdOn f) . infiniteListOf genHuddleRangeMap :: (Int, Int) -> Gen Term genHuddleRangeMap rng@(lo, hi) = do n <- choose rng let genKV = (,) <$> fmap TInt arbitrary <*> fmap TBool arbitrary - genMapTerm . take n =<< scale (const $ max lo hi) (genInfiniteUniqueList genKV) + genMapTerm . take n + =<< scale (const $ max lo hi) (genInfiniteUniqueListOn (toCanonical . fst) genKV) genHuddleArrayRequiredTerms :: Gen [Term] genHuddleArrayRequiredTerms = do @@ -249,9 +251,10 @@ genFullMap = do , pure [] ] strFields <- - nubOrdOn fst <$> listOf ((,) <$> (genStringTerm . T.pack =<< arbitrary) <*> (TInt <$> arbitrary)) + nubOrdOn (toCanonical . fst) + <$> listOf ((,) <$> (genStringTerm . T.pack =<< arbitrary) <*> (TInt <$> arbitrary)) bytesFields <- - nubOrdOn fst + nubOrdOn (toCanonical . fst) <$> listOf1 ((,) <$> (genBytesTerm =<< arbitraryByteString) <*> arbitraryTerm) allFields <- shuffle $ field1 : lField2 <> strFields <> bytesFields genMapTerm allFields From 736b4c576ab3ed0611f6ad4bf0f498e8da529802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joosep=20J=C3=A4=C3=A4ger?= Date: Mon, 25 May 2026 16:05:18 +0300 Subject: [PATCH 4/5] Fail canonicalization when duplicate keys present --- src/Codec/CBOR/Cuddle/CBOR/Canonical.hs | 51 ++++++++++--------- src/Codec/CBOR/Cuddle/CBOR/Validator.hs | 5 +- test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs | 10 ++-- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs b/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs index 771a73a6..af498846 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Canonical.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE LambdaCase #-} module Codec.CBOR.Cuddle.CBOR.Canonical ( @@ -12,7 +11,7 @@ module Codec.CBOR.Cuddle.CBOR.Canonical ( ) where import Codec.CBOR.Term (Term (..)) -import Data.Bifunctor (Bifunctor (..)) +import Control.Monad (guard) import Data.ByteString (ByteString) import Data.ByteString qualified as BS import Data.ByteString.Lazy qualified as BSL @@ -47,32 +46,38 @@ toNInt x fromNInt :: NInt -> Integer fromNInt (NInt n) = nintMin + toInteger n -toCanonical :: Term -> CanonicalTerm +-- | Convert a `cborg` @Term@ to a @CanonicalTerm@. +-- Will return `Nothing` if any map contains duplicate elements. +toCanonical :: Term -> Maybe CanonicalTerm toCanonical = \case - TInt i -> integerToCanonical $ toInteger i - TInteger n -> integerToCanonical n - TBytes bs -> CTBytes bs - TBytesI bs -> CTBytes $ BSL.toStrict bs - TString s -> CTString s - TStringI s -> CTString $ TL.toStrict s - TList ts -> CTList $ toCanonical <$> ts - TListI ts -> CTList $ toCanonical <$> ts + TInt i -> pure . integerToCanonical $ toInteger i + TInteger n -> pure $ integerToCanonical n + TBytes bs -> pure $ CTBytes bs + TBytesI bs -> pure . CTBytes $ BSL.toStrict bs + TString s -> pure $ CTString s + TStringI s -> pure . CTString $ TL.toStrict s + TList ts -> mkList ts + TListI ts -> mkList ts TMap kvs -> mkMap kvs TMapI kvs -> mkMap kvs TTagged 2 inner - | Just bs <- tagBytes inner -> integerToCanonical $ bytesToUnsigned bs + | Just bs <- tagBytes inner -> pure . integerToCanonical $ bytesToUnsigned bs TTagged 3 inner - | Just bs <- tagBytes inner -> integerToCanonical $ -1 - bytesToUnsigned bs - TTagged w t -> CTTagged w $ toCanonical t - TBool False -> CTSimple 20 - TBool True -> CTSimple 21 - TNull -> CTSimple 22 - TSimple w -> CTSimple w - THalf f -> CTHalf $ toHalf f - TFloat f -> CTFloat f - TDouble d -> CTDouble d + | Just bs <- tagBytes inner -> pure . integerToCanonical $ -1 - bytesToUnsigned bs + TTagged w t -> CTTagged w <$> toCanonical t + TBool False -> pure $ CTSimple 20 + TBool True -> pure $ CTSimple 21 + TNull -> pure $ CTSimple 22 + TSimple w -> pure $ CTSimple w + THalf f -> pure . CTHalf $ toHalf f + TFloat f -> pure $ CTFloat f + TDouble d -> pure $ CTDouble d where - mkMap = CTMap . Map.fromList . fmap (bimap toCanonical toCanonical) + mkMap kvs = do + pairs <- traverse (\(k, v) -> (,) <$> toCanonical k <*> toCanonical v) kvs + let m = Map.fromList pairs + CTMap m <$ guard (Map.size m == length pairs) + mkList ts = CTList <$> traverse toCanonical ts tagBytes (TBytes bs) = Just bs tagBytes (TBytesI bs) = Just $ BSL.toStrict bs tagBytes _ = Nothing @@ -110,7 +115,7 @@ unsignedToBytes = BS.pack . reverse . go -- 'Codec.CBOR.Term.TStringI', 'Codec.CBOR.Term.TListI', -- 'Codec.CBOR.Term.TMapI' represent. -- * Maps use 'Data.Map.Strict.Map' rather than a list of pairs, so --- duplicate keys collapse and key order is irrelevant. +-- key order is irrelevant. -- * 'TBool' and 'TNull' are folded into 'CTSimple' (values 20, 21, 22 -- per RFC 8949 §3.3). -- * 'THalf', 'TFloat', 'TDouble' remain distinct constructors: the diff --git a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs index 5fd579ea..14f657c2 100644 --- a/src/Codec/CBOR/Cuddle/CBOR/Validator.hs +++ b/src/Codec/CBOR/Cuddle/CBOR/Validator.hs @@ -953,7 +953,10 @@ validateMap cddl terms rule = consume (KV k v _) f = case kvs of ((tk, tv) : leftover) -> let - cKey = toCanonical tk + cKey = + fromMaybe + (error "IMPOSSIBLE: key validated but failed to canonicalize\nPlease make a bug report") + (toCanonical tk) in case validateTerm cddl tk k of Evidenced SValid kTrc diff --git a/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs index 9b1c61ac..0cd6ce7b 100644 --- a/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs +++ b/test/Test/Codec/CBOR/Cuddle/CBOR/Canonical.hs @@ -56,7 +56,7 @@ spec = do it "bignum with leading zeros canonicalizes" $ toCanonical (TTagged 2 (TBytes (BS.pack [0, 0, 5]))) - `shouldBe` CTInt 5 + `shouldBe` Just (CTInt 5) prop "bignum from TBytesI matches TBytes" $ \(w :: Word64) -> let bs = unsignedBE (toInteger w) @@ -66,12 +66,12 @@ spec = do it "true bignum (above uintMax) stays tagged" $ let n = uintMax + 1 in toCanonical (TInteger n) - `shouldBe` CTTagged 2 (CTBytes (unsignedBE n)) + `shouldBe` Just (CTTagged 2 (CTBytes (unsignedBE n))) it "true bignum (below nintMin) stays tagged" $ let n = nintMin - 1 in toCanonical (TInteger n) - `shouldBe` CTTagged 3 (CTBytes (unsignedBE (-1 - n))) + `shouldBe` Just (CTTagged 3 (CTBytes (unsignedBE (-1 - n)))) describe "definite/indefinite variants merge" $ do it "TBytes ≡ TBytesI" $ @@ -99,10 +99,6 @@ spec = do toCanonical TNull `shouldBe` toCanonical (TSimple 22) describe "maps" $ do - it "duplicate keys collapse (last wins)" $ - toCanonical (TMap [(TInt 1, TInt 10), (TInt 1, TInt 20)]) - `shouldBe` CTMap (Map.singleton (CTInt 1) (CTInt 20)) - it "key order is irrelevant" $ toCanonical (TMap [(TInt 1, TInt 10), (TInt 2, TInt 20)]) `shouldBe` toCanonical (TMap [(TInt 2, TInt 20), (TInt 1, TInt 10)]) From b8e0229d2b79b83535cb8cd5152ecdfd9cb30194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joosep=20J=C3=A4=C3=A4ger?= Date: Mon, 25 May 2026 16:10:11 +0300 Subject: [PATCH 5/5] Changelog --- CHANGELOG.md | 6 ++++-- cuddle.cabal | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aacdd6b..87ccee2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # Changelog for `cuddle` -## 1.9.0.0 +## 1.8.1.0 -* +* Fix map generator so that it does not generate duplicate keys +* Add `MapValidationDuplicateKeys`; validator now checks for duplicate elements +* Add `Codec.CBOR.Cuddle.CBOR.Canonical` ## 1.8.0.0 diff --git a/cuddle.cabal b/cuddle.cabal index 7d4b0ca0..8547e3bd 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -1,6 +1,6 @@ cabal-version: 3.4 name: cuddle -version: 1.9.0.0 +version: 1.8.1.0 synopsis: CDDL Generator and test utilities description: Cuddle is a library for generating and manipulating [CDDL](https://datatracker.ietf.org/doc/html/rfc8610).