|
| 1 | +import os, sys |
| 2 | + |
| 3 | +# Implement a function that works like tail. |
| 4 | +# tail -n 10 file.txt |
| 5 | +# will return last 10 lines from the file. |
| 6 | + |
| 7 | +# Issues: |
| 8 | +# Q: do we have to worry about -f? |
| 9 | +# A: No. |
| 10 | +# Q: what do we do if tail count is greater than lines of file? |
| 11 | +# A: give entire file |
| 12 | +# Q: how do we handle if last line doesn't contain a \n? |
| 13 | +# A: treat it like it has \n |
| 14 | + |
| 15 | +# Algorithm idea: |
| 16 | +# go to end keeping track of the i-th |
| 17 | +# from the end line end, where i goes from 0 to n; |
| 18 | +# n is the number of lines to tail. |
| 19 | +# Then seek to that position and print the remaining bytes. |
| 20 | +# |
| 21 | +# Time Complexity: |
| 22 | +# Let m be the size of the file and n size of the output |
| 23 | +# and k be the blocksize |
| 24 | +# Complexity is O(min(m,n)/k) since we may have to read and print the entire file |
| 25 | +# Space Complexity: |
| 26 | +# O(min(m, k)) |
| 27 | + |
| 28 | +def get_tail_start(path, count, blocksize): |
| 29 | + """Return the position of where to start the tail listing. |
| 30 | + This is \n positions from the end of the path. None |
| 31 | + """ |
| 32 | + # seek to end - blocksize |
| 33 | + file_size = os.stat(path).st_size |
| 34 | + seek_position = file_size |
| 35 | + # print(seek_position) |
| 36 | + remaining = count+1 |
| 37 | + at_end = False |
| 38 | + pos = -1 |
| 39 | + first_time = True |
| 40 | + with open(path) as fd: |
| 41 | + while remaining > 0 and seek_position >= 0 and not at_end: |
| 42 | + seek_position -= blocksize |
| 43 | + if seek_position < 0: seek_position = 0 |
| 44 | + fd.seek(seek_position) |
| 45 | + block = fd.read(blocksize) |
| 46 | + if first_time: |
| 47 | + if block[-1:] != "\n": remaining -= 1 |
| 48 | + first_time = False |
| 49 | + at_end = len(block) < blocksize |
| 50 | + pos, remaining = find_line_end(block, remaining) |
| 51 | + if pos is None: pos = -1 |
| 52 | + # print(seek_position + pos + 1) |
| 53 | + return seek_position + pos + 1 |
| 54 | + |
| 55 | + |
| 56 | +def find_line_end(block, remaining): |
| 57 | + """Search backwards from block for at most *remaining* \n's |
| 58 | + and return tuple [position, 0] if found in this block or |
| 59 | + [None, remaining] if we have more to go""" |
| 60 | + last_position = len(block) |
| 61 | + while remaining != 0: |
| 62 | + position = block.rfind("\n", 0, last_position) |
| 63 | + if position == -1: return [None, remaining] |
| 64 | + remaining -= 1 |
| 65 | + last_position = position |
| 66 | + return [last_position, 0] |
| 67 | + |
| 68 | +def print_tail(path, pos, blocksize): |
| 69 | + with open(path) as fd: |
| 70 | + fd.seek(pos) |
| 71 | + block = fd.read(blocksize) |
| 72 | + while len(block) > 0: |
| 73 | + if sys.version_info[0] < 3: |
| 74 | + os.write(sys.stdout.fileno(), block) |
| 75 | + else: |
| 76 | + os.write(sys.stdout.fileno(), bytes(block, 'UTF-8')) |
| 77 | + block = fd.read(blocksize) |
| 78 | + |
| 79 | +if __name__ == '__main__': |
| 80 | + count = 5 |
| 81 | + path = __file__ |
| 82 | + # path = './test.txt' |
| 83 | + # blocksize = 10000 |
| 84 | + # pos = get_tail_start(path, count, blocksize) |
| 85 | + # print_tail(path, pos, blocksize) |
| 86 | + # print('-----') |
| 87 | + for blocksize in [5, 100, 100000]: |
| 88 | + for count in range(5): |
| 89 | + pos = get_tail_start(path, count, blocksize) |
| 90 | + print('-------------------- %d' % count) |
| 91 | + print_tail(path, pos, blocksize) |
| 92 | + print('====================') |
0 commit comments