Skip to content

Commit

Permalink
pythongh-129005: Align FileIO.readall allocation
Browse files Browse the repository at this point in the history
Both now use a pre-allocated buffer of length `bufsize`, fill it using
a readinto, and have matching "expand buffer" logic.

On my machine this takes:

`./python -m test -M8g -uall test_largefile -m test_large_read -v`
from ~3.7 seconds to ~3.3 seconds
  • Loading branch information
cmaloney committed Jan 29, 2025
1 parent 41ad2bb commit f4a8de5
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1674,22 +1674,32 @@ def readall(self):
except OSError:
pass

result = bytearray()
result = bytearray(bufsize)
bytes_read = 0
while True:
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
if bytes_read >= bufsize:
# Parallels _io/fileio.c new_buffersize
if bufsize > 65536:
addend = bufsize >> 3
else:
addend = bufsize + 256
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
bufsize += addend
result[bytes_read:bufsize] = b'\0'
assert bufsize - bytes_read > 0, "Should always try and read at least one byte"
n = bufsize - len(result)
try:
chunk = os.read(self._fd, n)
n = os.readinto(self._fd, memoryview(result)[bytes_read:])
except BlockingIOError:
if result:
if bytes_read > 0:
break
return None
if not chunk: # reached the end of the file
if n == 0: # reached the end of the file
break
result += chunk
bytes_read += n

del result[bytes_read:]
return bytes(result)

def readinto(self, buffer):
Expand Down

0 comments on commit f4a8de5

Please sign in to comment.