Skip to content

Commit

Permalink
Files limit v2 #203 (#369)
Browse files Browse the repository at this point in the history
* add WaiParseSafe
* traverse multipart req bodies only on demand
* update upload.hs
* defer form parsing
* use latest wai-extra
* get rid of inlined wai-extra code
* add hspec-wai extras
* fix files testing, add exception handling cases
* add tests for 'files' and 'filesOpts'

---------

Co-authored-by: Marco Zocca <[email protected]>
  • Loading branch information
ocramz and Marco Zocca authored Mar 9, 2024
1 parent 66d60f7 commit d376f43
Show file tree
Hide file tree
Showing 16 changed files with 434 additions and 135 deletions.
21 changes: 19 additions & 2 deletions Web/Scotty.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ module Web.Scotty
, capture, regex, function, literal
-- ** Accessing the Request and its fields
, request, header, headers, body, bodyReader
, jsonData, files
, jsonData
-- ** Accessing Path, Form and Query Parameters
, param, params
, pathParam, captureParam, formParam, queryParam
, pathParamMaybe, captureParamMaybe, formParamMaybe, queryParamMaybe
, pathParams, captureParams, formParams, queryParams
-- *** Files
, files, filesOpts, Trans.ParseRequestBodyOptions
-- ** Modifying the Response and Redirecting
, status, addHeader, setHeader, redirect
-- ** Setting Response Body
Expand Down Expand Up @@ -65,6 +67,7 @@ import Network.HTTP.Types (Status, StdMethod, ResponseHeaders)
import Network.Socket (Socket)
import Network.Wai (Application, Middleware, Request, StreamingBody)
import Network.Wai.Handler.Warp (Port)
import qualified Network.Wai.Parse as W (defaultParseRequestBodyOptions)

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.6.3

The qualified import of ‘Network.Wai.Parse’ is redundant

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.0.2

The qualified import of ‘Network.Wai.Parse’ is redundant

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.2.8

The qualified import of ‘Network.Wai.Parse’ is redundant

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 8.10.7

The qualified import of ‘Network.Wai.Parse’ is redundant

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.6.2

The qualified import of ‘Network.Wai.Parse’ is redundant

Check warning on line 70 in Web/Scotty.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.4.6

The qualified import of ‘Network.Wai.Parse’ is redundant

import Web.Scotty.Internal.Types (ScottyT, ActionT, ErrorHandler, Param, RoutePattern, Options, defaultOptions, File, Kilobytes, ScottyState, defaultScottyState, ScottyException, StatusError(..), Content(..))
import UnliftIO.Exception (Handler(..), catch)
Expand Down Expand Up @@ -231,9 +234,19 @@ request :: ActionM Request
request = Trans.request

-- | Get list of uploaded files.
files :: ActionM [File]
--
-- NB: Loads all file contents in memory with options 'W.defaultParseRequestBodyOptions'
files :: ActionM [File ByteString]
files = Trans.files

-- | Get list of temp files and form parameters decoded from multipart payloads.
--
-- NB the temp files are deleted when the continuation exits
filesOpts :: Trans.ParseRequestBodyOptions
-> ([Param] -> [File FilePath] -> ActionM a) -- ^ temp files validation, storage etc
-> ActionM a
filesOpts = Trans.filesOpts

-- | Get a request header. Header name is case-insensitive.
header :: Text -> ActionM (Maybe Text)
header = Trans.header
Expand All @@ -243,6 +256,8 @@ headers :: ActionM [(Text, Text)]
headers = Trans.headers

-- | Get the request body.
--
-- NB: loads the entire request body in memory
body :: ActionM ByteString
body = Trans.body

Expand All @@ -253,6 +268,8 @@ bodyReader :: ActionM (IO BS.ByteString)
bodyReader = Trans.bodyReader

-- | Parse the request body as a JSON object and return it. Raises an exception if parse is unsuccessful.
--
-- NB: uses 'body' internally
jsonData :: FromJSON a => ActionM a
jsonData = Trans.jsonData

Expand Down
83 changes: 72 additions & 11 deletions Web/Scotty/Action.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ module Web.Scotty.Action
, file
, rawResponse
, files
, filesOpts
, W.ParseRequestBodyOptions, W.defaultParseRequestBodyOptions
, finish
, header
, headers
Expand Down Expand Up @@ -66,6 +68,7 @@ import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO(..))
import UnliftIO (MonadUnliftIO(..))
import Control.Monad.Reader (MonadReader(..), ReaderT(..))
import Control.Monad.Trans.Resource (withInternalState, runResourceT)

import Control.Concurrent.MVar

Expand All @@ -74,6 +77,7 @@ import Data.Bool (bool)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.CaseInsensitive as CI
import Data.Traversable (for)
import Data.Int
import Data.Maybe (maybeToList)
import qualified Data.Text as T
Expand All @@ -90,6 +94,8 @@ import Network.HTTP.Types
import Network.HTTP.Types.Status
#endif
import Network.Wai (Request, Response, StreamingBody, Application, requestHeaders)
import Network.Wai.Handler.Warp (InvalidRequest(..))
import qualified Network.Wai.Parse as W (FileInfo(..), ParseRequestBodyOptions, defaultParseRequestBodyOptions)

import Numeric.Natural

Expand All @@ -99,6 +105,7 @@ import UnliftIO.Exception (Handler(..), catch, catches, throwIO)

import Network.Wai.Internal (ResponseReceived(..))


-- | Evaluate a route, catch all exceptions (user-defined ones, internal and all remaining, in this order)
-- and construct the 'Response'
--
Expand Down Expand Up @@ -169,11 +176,25 @@ scottyExceptionHandler = Handler $ \case
FailedToParseParameter k v e -> do
status status400
text $ T.unwords [ "Failed to parse parameter", k, v, ":", e]
WarpRequestException we -> case we of
RequestHeaderFieldsTooLarge -> do
status status413
weo -> do -- FIXME fall-through case on InvalidRequest, it would be nice to return more specific error messages and codes here
status status400
text $ T.unwords ["Request Exception:", T.pack (show weo)]
WaiRequestParseException we -> do
status status413 -- 413 Content Too Large https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/413
text $ T.unwords ["wai-extra Exception:", T.pack (show we)]
ResourceTException rte -> do
status status500
text $ T.unwords ["resourcet Exception:", T.pack (show rte)]

-- | Uncaught exceptions turn into HTTP 500 Server Error codes
someExceptionHandler :: MonadIO m => ErrorHandler m
someExceptionHandler = Handler $ \case
(_ :: E.SomeException) -> status status500
(e :: E.SomeException) -> do
status status500
text $ T.unwords ["Uncaught server exception:", T.pack (show e)]

-- | Throw a "500 Server Error" 'StatusError', which can be caught with 'catch'.
--
Expand Down Expand Up @@ -254,8 +275,29 @@ request :: Monad m => ActionT m Request
request = ActionT $ envReq <$> ask

-- | Get list of uploaded files.
files :: Monad m => ActionT m [File]
files = ActionT $ envFiles <$> ask
--
-- NB: Loads all file contents in memory with options 'W.defaultParseRequestBodyOptions'
files :: MonadUnliftIO m => ActionT m [File BL.ByteString]
files = runResourceT $ withInternalState $ \istate -> do
(_, fs) <- formParamsAndFilesWith istate W.defaultParseRequestBodyOptions
for fs (\(fname, f) -> do
bs <- liftIO $ BL.readFile (W.fileContent f)
pure (fname, f{ W.fileContent = bs})
)


-- | Get list of uploaded temp files and form parameters decoded from multipart payloads.
--
-- NB the temp files are deleted when the continuation exits.
filesOpts :: MonadUnliftIO m =>
W.ParseRequestBodyOptions
-> ([Param] -> [File FilePath] -> ActionT m a) -- ^ temp files validation, storage etc
-> ActionT m a
filesOpts prbo io = runResourceT $ withInternalState $ \istate -> do
(ps, fs) <- formParamsAndFilesWith istate prbo
io ps fs



-- | Get a request header. Header name is case-insensitive.
header :: (Monad m) => T.Text -> ActionT m (Maybe T.Text)
Expand All @@ -272,6 +314,8 @@ headers = do
| (k,v) <- hs ]

-- | Get the request body.
--
-- NB This loads the whole request body in memory at once.
body :: (MonadIO m) => ActionT m BL.ByteString
body = ActionT ask >>= (liftIO . envBody)

Expand All @@ -290,6 +334,8 @@ bodyReader = ActionT $ envBodyChunk <$> ask
-- 422 Unprocessable Entity.
--
-- These status codes are as per https://www.restapitutorial.com/httpstatuscodes.html.
--
-- NB : Internally this uses 'body'.
jsonData :: (A.FromJSON a, MonadIO m) => ActionT m a
jsonData = do
b <- body
Expand All @@ -311,7 +357,7 @@ param :: (Parsable a, MonadIO m) => T.Text -> ActionT m a
param k = do
val <- ActionT $ (lookup k . getParams) <$> ask
case val of
Nothing -> raiseStatus status500 $ "Param: " <> k <> " not found!" -- FIXME
Nothing -> raiseStatus status500 $ "Param: " <> k <> " not found!"
Just v -> either (const next) return $ parseParam (TL.fromStrict v)
{-# DEPRECATED param "(#204) Not a good idea to treat all parameters identically. Use captureParam, formParam and queryParam instead. "#-}

Expand Down Expand Up @@ -342,8 +388,14 @@ pathParam k = do
-- * This function raises a code 400 also if the parameter is found, but 'parseParam' fails to parse to the correct type.
--
-- /Since: 0.20/
formParam :: (Parsable a, MonadIO m) => T.Text -> ActionT m a
formParam = paramWith FormFieldNotFound envFormParams
formParam :: (MonadUnliftIO m, Parsable b) => T.Text -> ActionT m b
formParam k = runResourceT $ withInternalState $ \istate -> do
(ps, _) <- formParamsAndFilesWith istate W.defaultParseRequestBodyOptions
case lookup k ps of
Nothing -> throwIO $ FormFieldNotFound k
Just v -> case parseParam $ TL.fromStrict v of
Left e -> throwIO $ FailedToParseParameter k v (TL.toStrict e)
Right a -> pure a

-- | Look up a query parameter.
--
Expand Down Expand Up @@ -378,8 +430,14 @@ captureParamMaybe = paramWithMaybe envPathParams
-- NB : Doesn't throw exceptions, so developers must 'raiseStatus' or 'throw' to signal something went wrong.
--
-- /Since: 0.21/
formParamMaybe :: (Parsable a, Monad m) => T.Text -> ActionT m (Maybe a)
formParamMaybe = paramWithMaybe envFormParams
formParamMaybe :: (MonadUnliftIO m, Parsable a) =>
T.Text -> ActionT m (Maybe a)
formParamMaybe k = runResourceT $ withInternalState $ \istate -> do
(ps, _) <- formParamsAndFilesWith istate W.defaultParseRequestBodyOptions
case lookup k ps of
Nothing -> pure Nothing
Just v -> either (const $ pure Nothing) (pure . Just) $ parseParam $ TL.fromStrict v


-- | Look up a query parameter. Returns 'Nothing' if the parameter is not found or cannot be parsed at the right type.
--
Expand Down Expand Up @@ -440,8 +498,10 @@ captureParams :: Monad m => ActionT m [Param]
captureParams = paramsWith envPathParams

-- | Get form parameters
formParams :: Monad m => ActionT m [Param]
formParams = paramsWith envFormParams
formParams :: MonadUnliftIO m => ActionT m [Param]
formParams = runResourceT $ withInternalState $ \istate -> do
fst <$> formParamsAndFilesWith istate W.defaultParseRequestBodyOptions

-- | Get query parameters
queryParams :: Monad m => ActionT m [Param]
queryParams = paramsWith envQueryParams
Expand All @@ -450,8 +510,9 @@ paramsWith :: Monad m => (ActionEnv -> a) -> ActionT m a
paramsWith f = ActionT (f <$> ask)

{-# DEPRECATED getParams "(#204) Not a good idea to treat all parameters identically" #-}
-- | Returns path and query parameters as a single list
getParams :: ActionEnv -> [Param]
getParams e = envPathParams e <> envFormParams e <> envQueryParams e
getParams e = envPathParams e <> [] <> envQueryParams e


-- === access the fields of the Response being constructed
Expand Down
111 changes: 68 additions & 43 deletions Web/Scotty/Body.hs
Original file line number Diff line number Diff line change
@@ -1,26 +1,35 @@
{-# LANGUAGE FlexibleContexts, FlexibleInstances, RecordWildCards,
OverloadedStrings, MultiWayIf #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# language OverloadedStrings #-}
{-# language ScopedTypeVariables #-}
module Web.Scotty.Body (
newBodyInfo,
cloneBodyInfo

, getFormParamsAndFilesAction
, getBodyAction
, getBodyChunkAction
-- wai-extra
, W.RequestParseException(..)
) where

import Control.Concurrent.MVar
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource (InternalState)
import Data.Bifunctor (first, bimap)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe
import qualified GHC.Exception as E (throw)
import Network.Wai (Request(..), getRequestBodyChunk)
import qualified Network.Wai.Parse as W (File, Param, getRequestBodyType, BackEnd, lbsBackEnd, sinkRequestBody)
import Web.Scotty.Action (Param)
import Web.Scotty.Internal.Types (BodyInfo(..), BodyChunkBuffer(..), BodyPartiallyStreamed(..), RouteOptions(..))
import Web.Scotty.Util (readRequestBody, strictByteStringToLazyText, decodeUtf8Lenient)
import qualified Network.Wai.Handler.Warp as Warp (InvalidRequest(..))
import qualified Network.Wai.Parse as W (File, Param, getRequestBodyType, tempFileBackEnd, RequestBodyType(..), sinkRequestBodyEx, RequestParseException(..), ParseRequestBodyOptions)
-- import UnliftIO (MonadUnliftIO(..))
import UnliftIO.Exception (Handler(..), catches, throwIO)

import Web.Scotty.Internal.Types (BodyInfo(..), BodyChunkBuffer(..), BodyPartiallyStreamed(..), RouteOptions(..), File, ScottyException(..), Param)
import Web.Scotty.Util (readRequestBody, decodeUtf8Lenient)


-- | Make a new BodyInfo with readProgress at 0 and an empty BodyChunkBuffer.
newBodyInfo :: (MonadIO m) => Request -> m BodyInfo
Expand All @@ -36,26 +45,62 @@ cloneBodyInfo (BodyInfo _ chunkBufferVar getChunk) = liftIO $ do
cleanReadProgressVar <- newMVar 0
return $ BodyInfo cleanReadProgressVar chunkBufferVar getChunk

-- | Get the form params and files from the request. Requires reading the whole body.
getFormParamsAndFilesAction :: Request -> BodyInfo -> RouteOptions -> IO ([Param], [W.File BL.ByteString])
getFormParamsAndFilesAction req bodyInfo opts = do
let shouldParseBody = isJust $ W.getRequestBodyType req
-- | Get the form params and files from the request.
--
-- NB : catches exceptions from 'warp' and 'wai-extra' and wraps them into 'ScottyException'
getFormParamsAndFilesAction ::
InternalState
-> W.ParseRequestBodyOptions
-> Request -- ^ only used for its body type
-> BodyInfo -- ^ the request body contents are read from here
-> RouteOptions
-> IO ([Param], [File FilePath])
getFormParamsAndFilesAction istate prbo req bodyInfo opts = do
let
bs2t = decodeUtf8Lenient
convertBoth = bimap bs2t bs2t
convertKey = first bs2t
bs <- getBodyAction bodyInfo opts
let
wholeBody = BL.toChunks bs
(formparams, fs) <- parseRequestBodyExBS istate prbo wholeBody (W.getRequestBodyType req) `catches` handleWaiParseSafeExceptions
return (convertBoth <$> formparams, convertKey <$> fs)

-- | Wrap exceptions from upstream libraries into 'ScottyException'
handleWaiParseSafeExceptions :: MonadIO m => [Handler m a]
handleWaiParseSafeExceptions = [h1, h2]
where
h1 = Handler (\ (e :: W.RequestParseException ) -> throwIO $ WaiRequestParseException e)
h2 = Handler (\(e :: Warp.InvalidRequest) -> throwIO $ WarpRequestException e)

-- | Adapted from wai-extra's Network.Wai.Parse, modified to accept body as list of Bytestrings.
-- Reason: WAI's requestBody is an IO action that returns the body as chunks. Once read,
-- they can't be read again. We read them into a lazy Bytestring, so Scotty user can get
-- the raw body, even if they also want to call wai-extra's parsing routines.
parseRequestBodyExBS :: MonadIO m =>
InternalState
-> W.ParseRequestBodyOptions
-> [B.ByteString]
-> Maybe W.RequestBodyType
-> m ([W.Param], [W.File FilePath])
parseRequestBodyExBS istate o bl rty =
case rty of
Nothing -> return ([], [])
Just rbt -> do
mvar <- liftIO $ newMVar bl -- MVar is a bit of a hack so we don't have to inline
-- large portions of Network.Wai.Parse
let provider = modifyMVar mvar $ \bsold -> case bsold of
[] -> return ([], B.empty)
(b:bs) -> return (bs, b)
liftIO $ W.sinkRequestBodyEx o (W.tempFileBackEnd istate) rbt provider

if shouldParseBody
then
do
bs <- getBodyAction bodyInfo opts
let wholeBody = BL.toChunks bs
(formparams, fs) <- parseRequestBody wholeBody W.lbsBackEnd req -- NB this loads the whole body into memory
let convert (k, v) = (decodeUtf8Lenient k, decodeUtf8Lenient v)
return (convert <$> formparams, fs)
else
return ([], [])

-- | Retrieve the entire body, using the cached chunks in the BodyInfo and reading any other
-- chunks if they still exist.
-- Mimic the previous behavior by throwing BodyPartiallyStreamed if the user has already
-- Mimic the previous behavior by throwing 'BodyPartiallyStreamed' if the user has already
-- started reading the body by chunks.
--
-- throw 'ScottyException' if request body too big
getBodyAction :: BodyInfo -> RouteOptions -> IO (BL.ByteString)
getBodyAction (BodyInfo readProgress chunkBufferVar getChunk) opts =
modifyMVar readProgress $ \index ->
Expand All @@ -77,25 +122,5 @@ getBodyChunkAction (BodyInfo readProgress chunkBufferVar getChunk) =
| hasFinished -> return (bcb, (index, mempty))
| otherwise -> do
newChunk <- getChunk
return (BodyChunkBuffer (newChunk == mempty) (chunks ++ [newChunk]), (index + 1, newChunk))

return (BodyChunkBuffer (B.null newChunk) (chunks ++ [newChunk]), (index + 1, newChunk))

-- Stolen from wai-extra's Network.Wai.Parse, modified to accept body as list of Bytestrings.
-- Reason: WAI's requestBody is an IO action that returns the body as chunks. Once read,
-- they can't be read again. We read them into a lazy Bytestring, so Scotty user can get
-- the raw body, even if they also want to call wai-extra's parsing routines.
parseRequestBody :: MonadIO m
=> [B.ByteString]
-> W.BackEnd y
-> Request
-> m ([W.Param], [W.File y])
parseRequestBody bl s r =
case W.getRequestBodyType r of
Nothing -> return ([], [])
Just rbt -> do
mvar <- liftIO $ newMVar bl -- MVar is a bit of a hack so we don't have to inline
-- large portions of Network.Wai.Parse
let provider = modifyMVar mvar $ \bsold -> case bsold of
[] -> return ([], B.empty)
(b:bs) -> return (bs, b)
liftIO $ W.sinkRequestBody s rbt provider
Loading

0 comments on commit d376f43

Please sign in to comment.