This is an automated email from the ASF dual-hosted git repository. nicknezis pushed a commit to branch nicknezis/file-download-fix in repository https://gitbox.apache.org/repos/asf/incubator-heron.git
commit 6abb364e7464ca346c18e88b4174d6a028b058b9 Author: Nicholas Nezis <[email protected]> AuthorDate: Sun Jan 8 05:30:22 2023 -0500 Updated to fix broken file download --- heron/shell/src/python/handlers/downloadhandler.py | 2 +- heron/tools/tracker/src/python/routers/container.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/heron/shell/src/python/handlers/downloadhandler.py b/heron/shell/src/python/handlers/downloadhandler.py index a8c1d57c5a6..6bbed273030 100644 --- a/heron/shell/src/python/handlers/downloadhandler.py +++ b/heron/shell/src/python/handlers/downloadhandler.py @@ -63,7 +63,7 @@ class DownloadHandler(tornado.web.RequestHandler): length = int(4 * 1024 * 1024) offset = int(0) while True: - data = await utils.read_chunk(path, offset=offset, length=length, escape_data=False) + data = utils.read_chunk(path, offset=offset, length=length, escape_data=False) if self.connection_closed or 'data' not in data or len(data['data']) < length: break offset += length diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index 84ff17c19ed..0a1c963a64f 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -29,6 +29,7 @@ import httpx # from fastapi import Query from fastapi import Query, APIRouter from pydantic import BaseModel, Field +from starlette.background import BackgroundTask from starlette.responses import StreamingResponse from heron.proto import common_pb2, tmanager_pb2 @@ -58,7 +59,6 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] url = f"http://{stmgr.host}:{stmgr.shell_port}/filedata/{path}" params = {"offset": offset, "length": length} - async with httpx.AsyncClient() as client: response = await client.get(url, params=params) return response.json() @@ -77,13 +77,16 @@ async def get_container_file( # pylint: disable=too-many-arguments topology = state.tracker.get_topology(cluster, role, environ, topology_name) stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] url = f"http://{stmgr.host}:{stmgr.shell_port}/download/{path}" - _, _, filename = path.rpartition("/") - async with httpx.stream("GET", url) as response: - return await StreamingResponse( - content=response.iter_bytes(), + async with httpx.AsyncClient() as client: + request = client.build_request("GET", url) + response = await client.send(request, stream=True) + + return StreamingResponse( + response.aiter_bytes(), + background=BackgroundTask(response.aclose), headers={"Content-Disposition": f"attachment; filename={filename}"}, - ) + ) @router.get("/container/filestats") async def get_container_file_listing( # pylint: disable=too-many-arguments
