[ Reason ]

pandoc is vulnerable to CVE-2023-35936: Arbitrary file write
vulnerability via specially crafted image element in the input when generating
files using the `--extract-media` option or outputting to PDF format.

The Security Team decided not to issue a DSA for that CVE, but it's now fixed in
buster-security (2.2.1-3+deb10u1) as well as sid (, so it makes sense
to fix it via (o)s-pu too.

[ Impact ]

For users uprading from buster-security to bookworm, that would be a security

[ Tests ]

A new unit test was added upstream, and backported along with the code fixes.  I
also manually verified that the PoC were fixed.

[ Risks ]

Regression risks are low: all upstream commits applied cleanly, and test 
is good.  (Upstream changes to pandoc.cabal are a no-op as far as debian 
is concerned.)

[ Checklist ]

  [x] *all* changes are documented in the d/changelog
  [x] I reviewed all changes and I approve them
  [x] attach debdiff against the package in stable
  [x] the issue is verified as fixed in unstable

[ Changes ]

  * Add d/salsa-ci.yml for Salsa CI.
  * Fix CVE-2023-35936 and CVE-2023-38745: Arbitrary file write vulnerability 
    specially crafted image element in the input when generating files using the
    `--extract-media` option or outputting to PDF format. (Closes: #1041976)

diffstat for pandoc- pandoc-

 changelog                         |   17 +++++
 copyright_hints                   |    6 +
 patches/020230620~5e381e3.patch   |  116 ++++++++++++++++++++++++++++++++++++++
 patches/020230623.1~54561e9.patch |   24 +++++++
 patches/020230623.2~df4f13b.patch |   85 +++++++++++++++++++++++++++
 patches/020230623.3~fe62da6.patch |   87 ++++++++++++++++++++++++++++
 patches/020230623.4~5246f02.patch |   52 +++++++++++++++++
 patches/020230720~eddedbf.patch   |   90 +++++++++++++++++++++++++++++
 patches/series                    |    6 +
 salsa-ci.yml                      |    9 ++
 10 files changed, 492 insertions(+)

diff -Nru pandoc- pandoc-
--- pandoc-    2022-11-19 14:13:51.000000000 +0100
+++ pandoc-    2023-07-25 23:01:50.000000000 +0200
@@ -1,3 +1,20 @@
+pandoc ( bookworm; urgency=high
+  * Non-maintainer upload.
+  * Rebuild for bookworm.
+  * Add d/salsa-ci.yml for Salsa CI.
+ -- Guilhem Moulin <>  Tue, 25 Jul 2023 23:01:50 +0200
+pandoc ( unstable; urgency=high
+  * add patches cherry-picked upstream
+    to fix arbitrary file write vulnerability;
+    closes: bug#1041976, thanks to Guilhem Moulin;
+    CVE-2023-35936 CVE-2023-35936
+ -- Jonas Smedegaard <>  Tue, 25 Jul 2023 18:43:57 +0200
 pandoc ( unstable; urgency=low
   * Non-maintainer upload.
diff -Nru pandoc- 
--- pandoc-      2022-08-13 16:27:42.000000000 
+++ pandoc-      2023-07-25 23:01:50.000000000 
@@ -236,6 +236,12 @@
+ debian/patches/020230620~5e381e3.patch
+ debian/patches/020230623.1~54561e9.patch
+ debian/patches/020230623.2~df4f13b.patch
+ debian/patches/020230623.3~fe62da6.patch
+ debian/patches/020230623.4~5246f02.patch
+ debian/patches/020230720~eddedbf.patch
diff -Nru pandoc- 
--- pandoc-      1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-      2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,116 @@
+Description: fix a security vulnerability in MediaBag and 
+ This vulnerability, discovered by Entroy C,
+ allows users to write arbitrary files to any location
+ by feeding pandoc a specially crafted URL in an image element.
+ The vulnerability is serious
+ for anyone using pandoc to process untrusted input.
+ The vulnerability does not affect pandoc
+ when run with the `--sandbox` flag.
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/src/Text/Pandoc/Class/IO.hs
++++ b/src/Text/Pandoc/Class/IO.hs
+@@ -49,7 +49,7 @@
+ import Network.HTTP.Client.TLS (mkManagerSettings)
+ import Network.HTTP.Types.Header ( hContentType )
+ import Network.Socket (withSocketsDo)
+-import Network.URI (unEscapeString)
++import Network.URI (URI(..), parseURI)
+ import System.Directory (createDirectoryIfMissing)
+ import System.Environment (getEnv)
+ import System.FilePath ((</>), takeDirectory, normalise)
+@@ -120,11 +120,11 @@
+ openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe 
+ openURL u
+- | Just u'' <- T.stripPrefix "data:" u = do
+-     let mime     = T.takeWhile (/=',') u''
+-     let contents = UTF8.fromString $
+-                     unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile 
(/=',') u''
+-     return (decodeLenient contents, Just mime)
++ | Just (URI{ uriScheme = "data:",
++              uriPath = upath }) <- parseURI (T.unpack u) = do
++     let (mime, rest) = break (== '.') upath
++     let contents = UTF8.fromString $ drop 1 rest
++     return (decodeLenient contents, Just (T.pack mime))
+  | otherwise = do
+      let toReqHeader (n, v) = ( (UTF8.fromText n), UTF8.fromText v)
+      customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
+@@ -222,7 +222,7 @@
+            -> m ()
+ writeMedia dir (fp, _mt, bs) = do
+   -- we normalize to get proper path separators for the platform
+-  let fullpath = normalise $ dir </> unEscapeString fp
++  let fullpath = normalise $ dir </> fp
+   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+   logIOError $ BL.writeFile fullpath bs
+--- a/src/Text/Pandoc/MediaBag.hs
++++ b/src/Text/Pandoc/MediaBag.hs
+@@ -28,12 +28,13 @@
+ import qualified Data.Map as M
+ import Data.Maybe (fromMaybe, isNothing)
+ import Data.Typeable (Typeable)
++import Network.URI (unEscapeString)
+ import System.FilePath
+ import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
+ import Data.Text (Text)
+ import qualified Data.Text as T
+ import Data.Digest.Pure.SHA (sha1, showDigest)
+-import Network.URI (URI (..), parseURI)
++import Network.URI (URI (..), parseURI, isURI)
+ data MediaItem =
+   MediaItem
+@@ -52,9 +53,12 @@
+ instance Show MediaBag where
+   show bag = "MediaBag " ++ show (mediaDirectory bag)
+--- | We represent paths with /, in normalized form.
++-- | We represent paths with /, in normalized form.  Percent-encoding
++-- is resolved.
+ canonicalize :: FilePath -> Text
+-canonicalize = T.replace "\\" "/" . T.pack . normalise
++canonicalize fp
++  | isURI fp = T.pack fp
++  | otherwise = T.replace "\\" "/" . T.pack . normalise . unEscapeString $ fp
+ -- | Delete a media item from a 'MediaBag', or do nothing if no item 
+ -- to the given path.
+@@ -77,22 +81,22 @@
+                              , mediaContents = contents
+                              , mediaMimeType = mt }
+         fp' = canonicalize fp
++        fp'' = T.unpack fp'
+         uri = parseURI fp
+-        newpath = if isRelative fp
++        newpath = if isRelative fp''
+                        && isNothing uri
+-                       && ".." `notElem` splitDirectories fp
+-                     then T.unpack fp'
++                       && not (".." `T.isInfixOf` fp')
++                     then fp''
+                      else showDigest (sha1 contents) <> "." <> ext
+-        fallback = case takeExtension fp of
+-                        ".gz" -> getMimeTypeDef $ dropExtension fp
+-                        _     -> getMimeTypeDef fp
++        fallback = case takeExtension fp'' of
++                        ".gz" -> getMimeTypeDef $ dropExtension fp''
++                        _     -> getMimeTypeDef fp''
+         mt = fromMaybe fallback mbMime
+-        path = maybe fp uriPath uri
++        path = maybe fp'' (unEscapeString . uriPath) uri
+         ext = case takeExtension path of
+                 '.':e -> e
+                 _ -> maybe "" T.unpack $ extensionFromMimeType mt
+ -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+ lookupMedia :: FilePath
+             -> MediaBag
diff -Nru pandoc- 
--- pandoc-    1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-    2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,24 @@
+Description: fix bug in git commit 5e381e3
+ In the new code a comma mysteriously turned into a period.
+ This would have prevented proper separation
+ of the mime type and content in data uris.
+ Thanks to @hseg for catching this.
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/src/Text/Pandoc/Class/IO.hs
++++ b/src/Text/Pandoc/Class/IO.hs
+@@ -122,7 +122,7 @@
+ openURL u
+  | Just (URI{ uriScheme = "data:",
+               uriPath = upath }) <- parseURI (T.unpack u) = do
+-     let (mime, rest) = break (== '.') upath
++     let (mime, rest) = break (== ',') upath
+      let contents = UTF8.fromString $ drop 1 rest
+      return (decodeLenient contents, Just (T.pack mime))
+  | otherwise = do
diff -Nru pandoc- 
--- pandoc-    1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-    2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,85 @@
+Description: more fixes to git commit 5e381e3
+ These changes recognize that parseURI does not unescape the path.
+ .
+ Another change is that the canonical form
+ of the path used as the MediaBag key
+ retains percent-encoding, if present;
+ we only unescape the string when writing to a file.
+ .
+ Some tests are needed before the issue can be closed.
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/src/Text/Pandoc/Class/IO.hs
++++ b/src/Text/Pandoc/Class/IO.hs
+@@ -49,7 +49,7 @@
+ import Network.HTTP.Client.TLS (mkManagerSettings)
+ import Network.HTTP.Types.Header ( hContentType )
+ import Network.Socket (withSocketsDo)
+-import Network.URI (URI(..), parseURI)
++import Network.URI (URI(..), parseURI, unEscapeString)
+ import System.Directory (createDirectoryIfMissing)
+ import System.Environment (getEnv)
+ import System.FilePath ((</>), takeDirectory, normalise)
+@@ -122,7 +122,7 @@
+ openURL u
+  | Just (URI{ uriScheme = "data:",
+               uriPath = upath }) <- parseURI (T.unpack u) = do
+-     let (mime, rest) = break (== ',') upath
++     let (mime, rest) = break (== ',') $ unEscapeString upath
+      let contents = UTF8.fromString $ drop 1 rest
+      return (decodeLenient contents, Just (T.pack mime))
+  | otherwise = do
+@@ -222,7 +222,7 @@
+            -> m ()
+ writeMedia dir (fp, _mt, bs) = do
+   -- we normalize to get proper path separators for the platform
+-  let fullpath = normalise $ dir </> fp
++  let fullpath = normalise $ dir </> unEscapeString fp
+   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+   logIOError $ BL.writeFile fullpath bs
+--- a/src/Text/Pandoc/MediaBag.hs
++++ b/src/Text/Pandoc/MediaBag.hs
+@@ -35,6 +35,7 @@
+ import qualified Data.Text as T
+ import Data.Digest.Pure.SHA (sha1, showDigest)
+ import Network.URI (URI (..), parseURI, isURI)
++import Data.List (isInfixOf)
+ data MediaItem =
+   MediaItem
+@@ -54,11 +55,11 @@
+   show bag = "MediaBag " ++ show (mediaDirectory bag)
+ -- | We represent paths with /, in normalized form.  Percent-encoding
+--- is resolved.
++-- is not resolved.
+ canonicalize :: FilePath -> Text
+ canonicalize fp
+   | isURI fp = T.pack fp
+-  | otherwise = T.replace "\\" "/" . T.pack . normalise . unEscapeString $ fp
++  | otherwise = T.replace "\\" "/" . T.pack . normalise $ fp
+ -- | Delete a media item from a 'MediaBag', or do nothing if no item 
+ -- to the given path.
+@@ -81,11 +82,11 @@
+                              , mediaContents = contents
+                              , mediaMimeType = mt }
+         fp' = canonicalize fp
+-        fp'' = T.unpack fp'
++        fp'' = unEscapeString $ T.unpack fp'
+         uri = parseURI fp
+         newpath = if isRelative fp''
+                        && isNothing uri
+-                       && not (".." `T.isInfixOf` fp')
++                       && not (".." `isInfixOf` fp'')
+                      then fp''
+                      else showDigest (sha1 contents) <> "." <> ext
+         fallback = case takeExtension fp'' of
diff -Nru pandoc- 
--- pandoc-    1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-    2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,87 @@
+Description: add tests for fillMediaBag/extractMedia
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/pandoc.cabal
++++ b/pandoc.cabal
+@@ -791,6 +791,7 @@
+                   tasty-lua         >= 1.0     && < 1.1,
+                   tasty-quickcheck  >= 0.8     && < 0.11,
+                   text              >= && < 2.1,
++                  temporary         >= 1.1     && < 1.4,
+                   time              >= 1.5     && < 1.14,
+                   xml               >= 1.3.12  && < 1.4,
+                   zip-archive       >= && < 0.5
+@@ -800,6 +801,7 @@
+                   Tests.Lua
+                   Tests.Lua.Module
+                   Tests.Shared
++                  Tests.MediaBag
+                   Tests.Readers.LaTeX
+                   Tests.Readers.HTML
+                   Tests.Readers.JATS
+--- /dev/null
++++ b/test/Tests/MediaBag.hs
+@@ -0,0 +1,39 @@
++{-# LANGUAGE OverloadedStrings #-}
++module Tests.MediaBag (tests) where
++import Test.Tasty
++import Test.Tasty.HUnit
++-- import Tests.Helpers
++import Text.Pandoc.Class (extractMedia, fillMediaBag, runIOorExplode)
++import System.IO.Temp (withTempDirectory)
++import System.FilePath
++import Text.Pandoc.Builder as B
++import System.Directory (doesFileExist, copyFile, setCurrentDirectory, 
++tests :: [TestTree]
++tests = [
++  testCase "test fillMediaBag & extractMedia" $
++      withTempDirectory "." "extractMediaTest" $ \tmpdir -> do
++        olddir <- getCurrentDirectory
++        setCurrentDirectory tmpdir
++        copyFile "../../test/lalune.jpg" "moon.jpg"
++        let d = B.doc $
++                  B.para (B.image "../../test/lalune.jpg" "" mempty) <>
++                  B.para (B.image "moon.jpg" "" mempty) <>
++                  B.para (B.image 
 "" mempty) <>
++                  B.para (B.image 
 "" mempty)
++        runIOorExplode $ do
++          fillMediaBag d
++          extractMedia "foo" d
++        exists1 <- doesFileExist ("foo" </> "moon.jpg")
++        assertBool "file in directory extract with original name" exists1
++        exists2 <- doesFileExist ("foo" </> 
++        assertBool "file above directory extracted with hashed name" exists2
++        exists3 <- doesFileExist ("foo" </> 
++        exists4 <- doesFileExist "a.lua"
++        assertBool "data uri with malicious payload does not get written to 
arbitrary location"
++          (exists3 && not exists4)
++        exists5 <- doesFileExist ("foo" </> 
++        assertBool "data uri with gif is properly decoded" exists5
++        setCurrentDirectory olddir
++  ]
+--- a/test/test-pandoc.hs
++++ b/test/test-pandoc.hs
+@@ -51,6 +51,7 @@
+ import qualified Tests.Writers.AnnotatedTable
+ import qualified Tests.Writers.TEI
+ import qualified Tests.Writers.Markua
++import qualified Tests.MediaBag
+ import Text.Pandoc.Shared (inDirectory)
+ tests :: FilePath -> TestTree
+@@ -58,6 +59,7 @@
+         [ Tests.Command.tests
+         , testGroup "Old" (Tests.Old.tests pandocPath)
+         , testGroup "Shared" Tests.Shared.tests
++        , testGroup "MediaBag" Tests.MediaBag.tests
+         , testGroup "Writers"
+           [ testGroup "Native" Tests.Writers.Native.tests
+           , testGroup "ConTeXt" Tests.Writers.ConTeXt.tests
diff -Nru pandoc- 
--- pandoc-    1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-    2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,52 @@
+Description: improve tests for fillMediaBag/extractMedia
+ Ensure that the current directory is not changed up if a test fails,
+ and fix messages for the assertion failures.
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/test/Tests/MediaBag.hs
++++ b/test/Tests/MediaBag.hs
+@@ -6,16 +6,15 @@
+ -- import Tests.Helpers
+ import Text.Pandoc.Class (extractMedia, fillMediaBag, runIOorExplode)
+ import System.IO.Temp (withTempDirectory)
++import Text.Pandoc.Shared (inDirectory)
+ import System.FilePath
+ import Text.Pandoc.Builder as B
+-import System.Directory (doesFileExist, copyFile, setCurrentDirectory, 
++import System.Directory (doesFileExist, copyFile)
+ tests :: [TestTree]
+ tests = [
+   testCase "test fillMediaBag & extractMedia" $
+-      withTempDirectory "." "extractMediaTest" $ \tmpdir -> do
+-        olddir <- getCurrentDirectory
+-        setCurrentDirectory tmpdir
++      withTempDirectory "." "extractMediaTest" $ \tmpdir -> inDirectory 
tmpdir $ do
+         copyFile "../../test/lalune.jpg" "moon.jpg"
+         let d = B.doc $
+                   B.para (B.image "../../test/lalune.jpg" "" mempty) <>
+@@ -26,14 +25,13 @@
+           fillMediaBag d
+           extractMedia "foo" d
+         exists1 <- doesFileExist ("foo" </> "moon.jpg")
+-        assertBool "file in directory extract with original name" exists1
++        assertBool "file in directory is not extracted with original name" 
+         exists2 <- doesFileExist ("foo" </> 
+-        assertBool "file above directory extracted with hashed name" exists2
++        assertBool "file above directory is not extracted with hashed name" 
+         exists3 <- doesFileExist ("foo" </> 
+         exists4 <- doesFileExist "a.lua"
+-        assertBool "data uri with malicious payload does not get written to 
arbitrary location"
++        assertBool "data uri with malicious payload gets written outside of 
destination dir"
+           (exists3 && not exists4)
+         exists5 <- doesFileExist ("foo" </> 
+-        assertBool "data uri with gif is properly decoded" exists5
+-        setCurrentDirectory olddir
++        assertBool "data uri with gif is not properly decoded" exists5
+   ]
diff -Nru pandoc- 
--- pandoc-      1970-01-01 
01:00:00.000000000 +0100
+++ pandoc-      2023-07-25 
23:01:50.000000000 +0200
@@ -0,0 +1,90 @@
+Description: ix new variant of the vulnerability in CVE-2023-35936
+ Guilhem Moulin noticed that the fix to CVE-2023-35936 was incomplete.
+ An attacker could get around it
+ by double-encoding the malicious extension
+ to create or override arbitrary files.
+ .
+        $ echo 
+        $ .cabal/bin/pandoc --extract-media=bar
+        <p><img
+        $ cat b.lua
+        print "hello"
+        $ find bar
+        bar/
+        bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+
+ .
+ This commit adds a test case for this more complex attack
+ and fixes the vulnerability.
+ (The fix is quite simple:
+ if the URL-unescaped filename or extension contains a '%',
+ we just use the sha1 hash of the contents as the canonical name,
+ just as we do if the filename contains '..'.)
+Origin: upstream,
+Author: John MacFarlane <>
+Forwarded: yes
+Last-Update: 2023-07-25
+This patch header follows DEP-3:
+--- a/src/Text/Pandoc/Class/IO.hs
++++ b/src/Text/Pandoc/Class/IO.hs
+@@ -222,6 +222,8 @@
+            -> m ()
+ writeMedia dir (fp, _mt, bs) = do
+   -- we normalize to get proper path separators for the platform
++  -- we unescape URI encoding, but given how insertMedia
++  -- is written, we shouldn't have any % in a canonical media name...
+   let fullpath = normalise $ dir </> unEscapeString fp
+   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+   logIOError $ BL.writeFile fullpath bs
+--- a/src/Text/Pandoc/MediaBag.hs
++++ b/src/Text/Pandoc/MediaBag.hs
+@@ -87,16 +87,17 @@
+         newpath = if isRelative fp''
+                        && isNothing uri
+                        && not (".." `isInfixOf` fp'')
++                       && '%' `notElem` fp''
+                      then fp''
+-                     else showDigest (sha1 contents) <> "." <> ext
++                     else showDigest (sha1 contents) <> ext
+         fallback = case takeExtension fp'' of
+                         ".gz" -> getMimeTypeDef $ dropExtension fp''
+                         _     -> getMimeTypeDef fp''
+         mt = fromMaybe fallback mbMime
+         path = maybe fp'' (unEscapeString . uriPath) uri
+         ext = case takeExtension path of
+-                '.':e -> e
+-                _ -> maybe "" T.unpack $ extensionFromMimeType mt
++                '.':e | '%' `notElem` e -> '.':e
++                _ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType 
+ -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+ lookupMedia :: FilePath
+--- a/test/Tests/MediaBag.hs
++++ b/test/Tests/MediaBag.hs
+@@ -19,7 +19,7 @@
+         let d = B.doc $
+                   B.para (B.image "../../test/lalune.jpg" "" mempty) <>
+                   B.para (B.image "moon.jpg" "" mempty) <>
+-                  B.para (B.image 
 "" mempty) <>
++                  B.para (B.image 
"" mempty) <>
+                   B.para (B.image 
 "" mempty)
+         runIOorExplode $ do
+           fillMediaBag d
+@@ -34,4 +34,14 @@
+           (exists3 && not exists4)
+         exists5 <- doesFileExist ("foo" </> 
+         assertBool "data uri with gif is not properly decoded" exists5
++        -- double-encoded version:
++        let e = B.doc $
++                  B.para (B.image 
 "" mempty)
++        runIOorExplode $ do
++          fillMediaBag e
++          extractMedia "bar" e
++        exists6 <- doesFileExist ("bar" </> 
++        exists7 <- doesFileExist "b.lua"
++        assertBool "data uri with double-encoded malicious payload gets 
written outside of destination dir"
++          (exists6 && not exists7)
+   ]
diff -Nru pandoc- 
--- pandoc-       2022-08-13 16:27:42.000000000 
+++ pandoc-       2023-07-25 23:01:50.000000000 
@@ -1,4 +1,10 @@
diff -Nru pandoc- 
--- pandoc- 1970-01-01 01:00:00.000000000 +0100
+++ pandoc- 2023-07-25 23:01:50.000000000 +0200
@@ -0,0 +1,9 @@
+  -
+  RELEASE: 'bookworm'

