Created
January 29, 2024 16:28
-
-
Save sunipkm/a29a0122404be0670bc2f6767d89aa89 to your computer and use it in GitHub Desktop.
Read a file as text line-by-line from the end
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # %% Imports | |
| from __future__ import annotations | |
| import os | |
| # %% | |
| class FileBackwards: | |
| """Read a file line by line from the end. | |
| """ | |
| def __init__(self, fname: str, bufsize: int = 8192): | |
| file = open(fname, 'rb') | |
| file.seek(0, os.SEEK_END) | |
| fsize = fremain = file.tell() | |
| self._file = file | |
| self._fsize = fsize | |
| self._fremain = fremain | |
| self._segment = None | |
| self._lines = [] | |
| self._ofst = 0 | |
| self._bufsize = bufsize | |
| def __iter__(self): | |
| return self | |
| def __next__(self): | |
| if self._segment is not None and len(self._segment) == 0 and self._fremain <= 0: | |
| raise StopIteration | |
| if self._fremain > 0: | |
| self._ofst = min(self._fsize, self._ofst + self._bufsize) | |
| self._file.seek(self._fsize - self._ofst) | |
| buffer = self._file.read(min(self._fremain, self._bufsize)) | |
| # remove file's last "\n" if it exists, only for the first buffer | |
| if self._fremain == self._fsize and buffer[-1] == ord('\n'): | |
| buffer = buffer[:-1] | |
| self._fremain -= self._bufsize | |
| self._lines = buffer.split('\n'.encode()) | |
| # append last chunk's segment to this chunk's last line | |
| if self._segment is not None: | |
| self._lines[-1] += self._segment | |
| self._segment = self._lines[0] | |
| self._lines = self._lines[1:] | |
| # yield lines in this chunk except the segment | |
| for line in reversed(self._lines): | |
| # only decode on a parsed line, to avoid utf-8 decode error | |
| ret = line.decode() | |
| self._lines.remove(line) | |
| return ret | |
| # Don't yield None if the file was empty | |
| if self._segment is not None: | |
| ret = self._segment.decode() | |
| self._segment = '' | |
| return ret | |
| # %% Test | |
| test = FileBackwards('test.txt') | |
| for line in test: | |
| print(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment