Python File Handling and I/O Operations: A Practical Guide


Python File Handling and I/O Operations: A Practical Guide

File handling is a fundamental aspect of programming, allowing you to persist data, process large datasets, and interact with the file system. This guide covers Python's file handling capabilities and includes practical examples and a complete project.

Basic File Operations

Reading Files

Python offers several ways to read files:

# Reading entire file
with open('example.txt', 'r') as file:
    content = file.read()
    print(content)

# Reading line by line
with open('example.txt', 'r') as file:
    for line in file:
        print(line.strip())

# Reading specific number of characters
with open('example.txt', 'r') as file:
    chunk = file.read(100)  # Read first 100 characters

Writing Files

# Writing to a file
with open('output.txt', 'w') as file:
    file.write('Hello, World!\n')
    file.write('This is a new line.')

# Appending to a file
with open('output.txt', 'a') as file:
    file.write('\nAppending new content')

File Modes

  • 'r': Read (default)
  • 'w': Write (overwrites)
  • 'a': Append
  • 'x': Exclusive creation
  • 'b': Binary mode
  • '+': Read and write

Working with Different File Types

CSV Files

import csv

# Writing CSV
data = [
    ['Name', 'Age', 'City'],
    ['Alice', 25, 'New York'],
    ['Bob', 30, 'San Francisco']
]

with open('people.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data)

# Reading CSV
with open('people.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row)

JSON Files

import json

# Writing JSON
data = {
    'name': 'Alice',
    'age': 25,
    'city': 'New York',
    'hobbies': ['reading', 'hiking']
}

with open('data.json', 'w') as file:
    json.dump(data, file, indent=4)

# Reading JSON
with open('data.json', 'r') as file:
    loaded_data = json.load(file)
    print(loaded_data)

Binary Files

# Writing binary data
data = bytes([0x48, 0x65, 0x6C, 0x6C, 0x6F])  # "Hello" in hex
with open('binary.bin', 'wb') as file:
    file.write(data)

# Reading binary data
with open('binary.bin', 'rb') as file:
    content = file.read()
    print(content)  # b'Hello'

File System Operations

import os
import shutil

# Check if file exists
if os.path.exists('file.txt'):
    print('File exists')

# Create directory
os.makedirs('new_directory', exist_ok=True)

# List directory contents
files = os.listdir('.')
print(files)

# Copy file
shutil.copy('source.txt', 'destination.txt')

# Move file
os.rename('old_name.txt', 'new_name.txt')

# Delete file
os.remove('file_to_delete.txt')

Error Handling in File Operations

try:
    with open('nonexistent.txt', 'r') as file:
        content = file.read()
except FileNotFoundError:
    print('File not found')
except PermissionError:
    print('Permission denied')
except IOError as e:
    print(f'An I/O error occurred: {e}')

Project: File Management Utility

Let's create a practical file management utility that can organize files by extension, search for files, and perform basic file operations.

import os
import shutil
from datetime import datetime

class FileManager:
    """A utility class for managing files"""
    
    def __init__(self, root_dir):
        self.root_dir = root_dir
    
    def organize_by_extension(self):
        """Organize files into directories based on their extensions"""
        for filename in os.listdir(self.root_dir):
            if os.path.isfile(os.path.join(self.root_dir, filename)):
                # Get file extension
                ext = os.path.splitext(filename)[1][1:].lower()
                if not ext:
                    ext = 'no_extension'
                
                # Create directory if it doesn't exist
                dir_path = os.path.join(self.root_dir, ext)
                os.makedirs(dir_path, exist_ok=True)
                
                # Move file
                src = os.path.join(self.root_dir, filename)
                dst = os.path.join(dir_path, filename)
                shutil.move(src, dst)
    
    def search_files(self, pattern):
        """Search for files matching a pattern"""
        results = []
        for root, _, files in os.walk(self.root_dir):
            for filename in files:
                if pattern.lower() in filename.lower():
                    results.append(os.path.join(root, filename))
        return results
    
    def get_file_info(self, filepath):
        """Get detailed information about a file"""
        if not os.path.exists(filepath):
            return None
        
        stats = os.stat(filepath)
        return {
            'name': os.path.basename(filepath),
            'size': stats.st_size,
            'created': datetime.fromtimestamp(stats.st_ctime),
            'modified': datetime.fromtimestamp(stats.st_mtime),
            'is_file': os.path.isfile(filepath)
        }
    
    def create_backup(self, filepath):
        """Create a backup of a file"""
        if not os.path.exists(filepath):
            raise FileNotFoundError("File not found")
        
        backup_name = f"{filepath}.backup"
        shutil.copy2(filepath, backup_name)
        return backup_name

# Example usage
if __name__ == '__main__':
    # Initialize file manager
    manager = FileManager('my_files')
    
    # Organize files by extension
    manager.organize_by_extension()
    
    # Search for files
    pdf_files = manager.search_files('.pdf')
    print("PDF files found:", pdf_files)
    
    # Get file information
    if pdf_files:
        info = manager.get_file_info(pdf_files[0])
        print("File info:", info)
    
    # Create backup
    try:
        backup_path = manager.create_backup('important.txt')
        print(f"Backup created at: {backup_path}")
    except FileNotFoundError:
        print("File not found")

Best Practices

  1. Always Use Context Managers

    • Use with statements to ensure proper file closure
    • Prevents resource leaks and file corruption
  2. Error Handling

    • Always handle potential file operation errors
    • Use specific exception types for better error handling
  3. File Paths

    • Use os.path or pathlib for cross-platform compatibility
    • Always validate file paths before operations
  4. Large Files

    • Read large files in chunks to manage memory
    • Use generators for efficient processing
  5. Binary vs Text Mode

    • Use binary mode ('rb', 'wb') for non-text files
    • Specify encoding when dealing with text files

Common Pitfalls to Avoid

  1. Not closing files properly
  2. Hardcoding file paths
  3. Not handling encoding issues
  4. Ignoring file permissions
  5. Not validating file existence before operations

Conclusion

File handling is a critical skill in Python programming. By mastering these concepts, you can:

  • Efficiently manage and process files
  • Build robust file management systems
  • Handle different file formats effectively
  • Create backup and organization utilities

Practice these concepts by working on real file management tasks and exploring different file formats and operations.


Further Reading