Blog of random things: Medium / Hard - Python file based questions

Python Coding Questions

Medium

Question: Write a Python function to check if a given port is open on localhost.

import socket

def is_port_open(port):
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        try:
            s.connect(('127.0.0.1', port))
            return True  # Port is open
        except ConnectionRefusedError:
            return False  # Port is closed or not listening

# Example usage:
if is_port_open(8080):
    print("Port 8080 is open")
else:
    print("Port 8080 is not open")

Should the function handle timeouts? Should it support checking ports on remote hosts?

Question: Write a Python function to parse /proc/cpuinfo and return the number of CPU cores.

def get_cpu_cores():
    cores = 0
    with open("/proc/cpuinfo", "r") as f:
        for line in f:
            if line.startswith("cpu cores"):
                cores = int(line.split(":")[1].strip())
                break # Stop once core count is found
    return cores


print(f"Number of CPU cores: {get_cpu_cores()}")

How should the function handle cases where /proc/cpuinfo is not available or has an unexpected format? How to handle hyperthreading (logical vs. physical cores)?

Question: Write a Python script that efficiently finds all files larger than 1GB in a given directory (and its subdirectories).

import os

def find_large_files(directory, size_threshold=1073741824):  # 1GB in bytes
    large_files = []
    for root, _, files in os.walk(directory):  # os.walk for recursive directory traversal
        for file in files:
            filepath = os.path.join(root, file)
            try:
                size = os.path.getsize(filepath)
                if size > size_threshold:
                    large_files.append(filepath)
            except OSError:  # Handle potential permission errors
                print(f"Error accessing: ") # Indicate error
    return large_files

# Example Usage
large_files = find_large_files("/") # Search root directory, be careful!
for file in large_files:
    print(file)

How can we optimize this for very large directory structures? How to handle errors when accessing files (permissions, etc.)?

Hard

Question: Write a Python function to implement a basic tail -f functionality. It should continuously monitor a given file and print any new lines appended to it. (Hint: Consider using os.stat() and file seeking)

import os
import time

def tail_f(filepath):
    try:
        with open(filepath, 'r') as f:
            st_results = os.stat(filepath)
            current_size = st_results.st_size
            f.seek(current_size)

            while True:
                where = f.tell()
                line = f.readline()
                if not line:
                    try:  # Handle potential OSError during stat
                        new_size = os.stat(filepath).st_size
                        if new_size < current_size:  # File truncated
                            f.seek(0)  # Go to the beginning of the file
                            current_size = new_size # Reset the size
                        else: # No truncation, possibly new content
                            time.sleep(0.1) # Only wait if no truncation
                            f.seek(where)  # Rewind for next readline

                    except OSError: # Handle errors if file is removed etc.
                        print(f"Error accessing file: {filepath}")
                        time.sleep(0.1) # Prevent excessive retries
                        continue  # Continue to the next iteration of the loop
                    
                else:
                    print(line, end="")
                    current_size = f.tell()  # Update current size

    except FileNotFoundError:
        print(f"File not found: {filepath}")
        return
    except KeyboardInterrupt:
        print("\nExiting tail.")
        return


# Example Usage
tail_f("/var/log/syslog") # Test with a log file, or create a test file.

How can this be made more robust (e.g., handle file rotation)? How can we optimize for performance if the file is very actively being written to? How to handle cases where the file is deleted and recreated during monitoring? How to gracefully handle errors like permission denied or if the file is moved or renamed?

Blog of random things

Wednesday, March 12, 2025

Medium / Hard - Python file based questions - 1