file_downloader: stream downloads in a single HTTP request (#37549)

The Python file downloader was making a separate HTTP Range request per
1MB chunk via URLFile.read(), causing massive latency overhead. Use a
single streaming GET request instead, matching the old C++ behavior.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Shane Smiskol
2026-03-04 03:16:29 -08:00
committed by GitHub
parent 20d484c7cb
commit 6795b09d0a
+13 -4
View File
@@ -60,8 +60,16 @@ def cmd_download(args):
return
try:
uf = URLFile(url, cache=False)
total = uf.get_length()
# Stream the file in a single HTTP request instead of making
# a separate Range request per chunk (which was very slow).
pool = URLFile.pool_manager()
r = pool.request("GET", url, preload_content=False)
if r.status not in (200, 206):
sys.stderr.write(f"ERROR:HTTP {r.status}\n")
sys.stderr.flush()
sys.exit(1)
total = int(r.headers.get('content-length', 0))
if total <= 0:
sys.stderr.write("ERROR:File not found or empty\n")
sys.stderr.flush()
@@ -73,8 +81,7 @@ def cmd_download(args):
downloaded = 0
chunk_size = 1024 * 1024
with os.fdopen(tmp_fd, 'wb') as f:
while downloaded < total:
data = uf.read(min(chunk_size, total - downloaded))
for data in r.stream(chunk_size):
f.write(data)
downloaded += len(data)
sys.stderr.write(f"PROGRESS:{downloaded}:{total}\n")
@@ -91,6 +98,8 @@ def cmd_download(args):
except OSError:
pass
raise
finally:
r.release_conn()
except Exception as e:
sys.stderr.write(f"ERROR:{e}\n")