⚡ Tối Ưu Hiệu Suất Python - Performance Tips
Tóm tắt: Hướng dẫn tối ưu hiệu suất Python từ cơ bản đến nâng cao, bao gồm profiling, benchmarking và các kỹ thuật optimization thực chiến.
🎯 Tại Sao Cần Tối Ưu Performance?
Hãy tưởng tượng bạn đang chạy marathon 🏃♂️. Nếu bạn mang theo ba lô nặng 20kg, giày không vừa và chạy sai technique... bạn sẽ rất chậm và mệt!
Code cũng vậy! Performance optimization như "training" để code chạy nhanh hơn, ít tốn tài nguyên hơn. Điều quan trọng: "Premature optimization is the root of all evil" - chỉ optimize khi thật sự cần thiết!
📊 1. PROFILING - ĐO LƯỜNG PERFORMANCE
Basic Timing
import time
import timeit
from functools import wraps
def timing_decorator(func):
"""Decorator đo thời gian thực thi function."""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
return result
return wrapper
@timing_decorator
def slow_calculation():
"""Function chậm để test."""
total = 0
for i in range(1000000):
total += i ** 2
return total
# Sử dụng timeit cho micro-benchmarks
def compare_list_creation():
"""So sánh các cách tạo list."""
# Method 1: For loop
def create_with_loop():
result = []
for i in range(1000):
result.append(i * 2)
return result
# Method 2: List comprehension
def create_with_comprehension():
return [i * 2 for i in range(1000)]
# Method 3: Map
def create_with_map():
return list(map(lambda x: x * 2, range(1000)))
# Benchmark
loop_time = timeit.timeit(create_with_loop, number=1000)
comp_time = timeit.timeit(create_with_comprehension, number=1000)
map_time = timeit.timeit(create_with_map, number=1000)
print(f"For loop: {loop_time:.4f}s")
print(f"List comprehension: {comp_time:.4f}s")
print(f"Map: {map_time:.4f}s")
compare_list_creation()
Professional Profiling
import cProfile
import pstats
from pstats import SortKey
def profile_function(func, *args, **kwargs):
"""Profile một function và hiển thị kết quả."""
profiler = cProfile.Profile()
profiler.enable()
# Chạy function
result = func(*args, **kwargs)
profiler.disable()
# Phân tích kết quả
stats = pstats.Stats(profiler)
stats.sort_stats(SortKey.TIME)
print("=== TOP 10 SLOWEST FUNCTIONS ===")
stats.print_stats(10)
return result
# Ví dụ function phức tạp để profile
def complex_student_processing():
"""Xử lý dữ liệu học sinh phức tạp."""
students = []
# Tạo dữ liệu
for i in range(10000):
student = {
'id': f'SV{i:05d}',
'name': f'Student {i}',
'scores': [random.randint(0, 10) for _ in range(10)]
}
students.append(student)
# Xử lý dữ liệu
processed = []
for student in students:
avg_score = sum(student['scores']) / len(student['scores'])
grade = 'A' if avg_score >= 8 else 'B' if avg_score >= 6 else 'C'
processed.append({
'id': student['id'],
'name': student['name'],
'average': avg_score,
'grade': grade
})
return processed
# Profile function
import random
# result = profile_function(complex_student_processing)
Memory Profiling
import tracemalloc
import psutil
import os
def memory_profiler(func):
"""Decorator để đo memory usage."""
@wraps(func)
def wrapper(*args, **kwargs):
# Start tracing
tracemalloc.start()
# Get initial memory
process = psutil.Process(os.getpid())
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
# Run function
result = func(*args, **kwargs)
# Get final memory
final_memory = process.memory_info().rss / 1024 / 1024 # MB
# Get tracemalloc stats
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f"{func.__name__} Memory Usage:")
print(f" Initial: {initial_memory:.2f} MB")
print(f" Final: {final_memory:.2f} MB")
print(f" Difference: {final_memory - initial_memory:.2f} MB")
print(f" Peak traced: {peak / 1024 / 1024:.2f} MB")
return result
return wrapper
@memory_profiler
def memory_intensive_task():
"""Task tốn nhiều memory."""
# Tạo list lớn
big_list = [i for i in range(1000000)]
# Tạo dictionary lớn
big_dict = {i: f"value_{i}" for i in range(100000)}
# Process data
result = sum(big_list) + len(big_dict)
return result
# memory_intensive_task()
🚀 2. DATA STRUCTURES OPTIMIZATION
List vs Set vs Dict Performance
import random
import time
def benchmark_data_structures():
"""So sánh performance của các data structures."""
# Tạo test data
test_data = list(range(100000))
random.shuffle(test_data)
# Setup data structures
test_list = test_data.copy()
test_set = set(test_data)
test_dict = {item: True for item in test_data}
# Test membership (in operator)
search_items = random.sample(test_data, 1000)
# List membership - O(n)
start = time.perf_counter()
for item in search_items:
_ = item in test_list
list_time = time.perf_counter() - start
# Set membership - O(1)
start = time.perf_counter()
for item in search_items:
_ = item in test_set
set_time = time.perf_counter() - start
# Dict membership - O(1)
start = time.perf_counter()
for item in search_items:
_ = item in test_dict
dict_time = time.perf_counter() - start
print("Membership Testing (1000 lookups):")
print(f"List (O(n)): {list_time:.4f}s")
print(f"Set (O(1)): {set_time:.4f}s")
print(f"Dict (O(1)): {dict_time:.4f}s")
print(f"Set is {list_time/set_time:.1f}x faster than list")
# benchmark_data_structures()
def optimize_student_lookup():
"""Tối ưu việc tìm kiếm học sinh."""
# ❌ Slow approach - list of dicts
students_list = [
{'id': f'SV{i:05d}', 'name': f'Student {i}', 'grade': random.randint(0, 10)}
for i in range(10000)
]
def find_student_slow(student_id):
for student in students_list:
if student['id'] == student_id:
return student
return None
# ✅ Fast approach - dict lookup
students_dict = {
f'SV{i:05d}': {'name': f'Student {i}', 'grade': random.randint(0, 10)}
for i in range(10000)
}
def find_student_fast(student_id):
return students_dict.get(student_id)
# Benchmark
test_ids = [f'SV{random.randint(0, 9999):05d}' for _ in range(1000)]
start = time.perf_counter()
for student_id in test_ids:
find_student_slow(student_id)
slow_time = time.perf_counter() - start
start = time.perf_counter()
for student_id in test_ids:
find_student_fast(student_id)
fast_time = time.perf_counter() - start
print(f"Slow lookup: {slow_time:.4f}s")
print(f"Fast lookup: {fast_time:.4f}s")
print(f"Speedup: {slow_time/fast_time:.1f}x")
# optimize_student_lookup()
Collections Module Optimizations
from collections import deque, defaultdict, Counter, namedtuple
import time
def compare_list_vs_deque():
"""So sánh list vs deque cho operations ở đầu."""
n = 100000
# List - O(n) for insertions at beginning
test_list = []
start = time.perf_counter()
for i in range(n):
test_list.insert(0, i) # O(n) operation
list_time = time.perf_counter() - start
# Deque - O(1) for insertions at both ends
test_deque = deque()
start = time.perf_counter()
for i in range(n):
test_deque.appendleft(i) # O(1) operation
deque_time = time.perf_counter() - start
print(f"List insert at beginning: {list_time:.4f}s")
print(f"Deque appendleft: {deque_time:.4f}s")
print(f"Deque is {list_time/deque_time:.1f}x faster")
def use_defaultdict_efficiently():
"""Sử dụng defaultdict để tránh key checking."""
# ❌ Slow approach - manual key checking
student_grades = {}
grades_data = [('SV001', 8), ('SV002', 9), ('SV001', 7), ('SV003', 6)]
start = time.perf_counter()
for student_id, grade in grades_data * 10000:
if student_id not in student_grades:
student_grades[student_id] = []
student_grades[student_id].append(grade)
slow_time = time.perf_counter() - start
# ✅ Fast approach - defaultdict
from collections import defaultdict
student_grades_fast = defaultdict(list)
start = time.perf_counter()
for student_id, grade in grades_data * 10000:
student_grades_fast[student_id].append(grade) # No key checking needed
fast_time = time.perf_counter() - start
print(f"Manual key checking: {slow_time:.4f}s")
print(f"defaultdict: {fast_time:.4f}s")
print(f"Speedup: {slow_time/fast_time:.1f}x")
def use_counter_for_statistics():
"""Sử dụng Counter cho thống kê hiệu quả."""
# Sample data - grades of many students
all_grades = [random.choice(['A', 'B', 'C', 'D', 'F']) for _ in range(100000)]
# ❌ Manual counting
start = time.perf_counter()
grade_counts = {}
for grade in all_grades:
if grade in grade_counts:
grade_counts[grade] += 1
else:
grade_counts[grade] = 1
manual_time = time.perf_counter() - start
# ✅ Counter
start = time.perf_counter()
grade_counter = Counter(all_grades)
counter_time = time.perf_counter() - start
print(f"Manual counting: {manual_time:.4f}s")
print(f"Counter: {counter_time:.4f}s")
print(f"Most common grades: {grade_counter.most_common(3)}")
# compare_list_vs_deque()
# use_defaultdict_efficiently()
# use_counter_for_statistics()
🔄 3. ALGORITHM OPTIMIZATION
Loop Optimization
def optimize_loops():
"""Các kỹ thuật tối ưu loops."""
# ❌ Slow: Function calls inside loop
def slow_sum_of_squares(numbers):
total = 0
for num in numbers:
total += pow(num, 2) # Function call mỗi iteration
return total
# ✅ Fast: Avoid function calls
def fast_sum_of_squares(numbers):
total = 0
for num in numbers:
total += num * num # Direct operation
return total
# ✅ Fastest: List comprehension + built-in sum
def fastest_sum_of_squares(numbers):
return sum(num * num for num in numbers)
# Benchmark
test_numbers = list(range(100000))
slow_time = timeit.timeit(lambda: slow_sum_of_squares(test_numbers), number=10)
fast_time = timeit.timeit(lambda: fast_sum_of_squares(test_numbers), number=10)
fastest_time = timeit.timeit(lambda: fastest_sum_of_squares(test_numbers), number=10)
print(f"Slow (with pow): {slow_time:.4f}s")
print(f"Fast (direct): {fast_time:.4f}s")
print(f"Fastest (comprehension): {fastest_time:.4f}s")
def optimize_string_operations():
"""Tối ưu string operations."""
# ❌ Slow: String concatenation in loop
def slow_string_building(items):
result = ""
for item in items:
result += str(item) + ", " # Creates new string each time
return result.rstrip(", ")
# ✅ Fast: Join method
def fast_string_building(items):
return ", ".join(str(item) for item in items)
# ✅ Fast: f-string formatting
def fast_string_formatting(items):
return ", ".join(f"Item: {item}" for item in items)
# Benchmark
test_items = list(range(10000))
slow_time = timeit.timeit(lambda: slow_string_building(test_items), number=10)
fast_time = timeit.timeit(lambda: fast_string_building(test_items), number=10)
format_time = timeit.timeit(lambda: fast_string_formatting(test_items), number=10)
print(f"Slow concatenation: {slow_time:.4f}s")
print(f"Fast join: {fast_time:.4f}s")
print(f"Fast formatting: {format_time:.4f}s")
# optimize_loops()
# optimize_string_operations()
Caching và Memoization
from functools import lru_cache, cache
import time
# ❌ Slow: Recursive without memoization
def fibonacci_slow(n):
"""Fibonacci chậm - O(2^n)."""
if n <= 1:
return n
return fibonacci_slow(n-1) + fibonacci_slow(n-2)
# ✅ Fast: With memoization
@lru_cache(maxsize=None)
def fibonacci_fast(n):
"""Fibonacci nhanh với memoization - O(n)."""
if n <= 1:
return n
return fibonacci_fast(n-1) + fibonacci_fast(n-2)
# ✅ Fastest: Iterative approach
def fibonacci_iterative(n):
"""Fibonacci iterative - O(n) time, O(1) space."""
if n <= 1:
return n
a, b = 0, 1
for _ in range(2, n + 1):
a, b = b, a + b
return b
def benchmark_fibonacci():
"""Benchmark các implementation fibonacci."""
n = 35
# Slow version
start = time.perf_counter()
result_slow = fibonacci_slow(n)
slow_time = time.perf_counter() - start
# Fast version (cached)
start = time.perf_counter()
result_fast = fibonacci_fast(n)
fast_time = time.perf_counter() - start
# Iterative version
start = time.perf_counter()
result_iter = fibonacci_iterative(n)
iter_time = time.perf_counter() - start
print(f"Fibonacci({n}) = {result_slow}")
print(f"Recursive: {slow_time:.4f}s")
print(f"Memoized: {fast_time:.4f}s")
print(f"Iterative: {iter_time:.4f}s")
print(f"Memoized is {slow_time/fast_time:.0f}x faster than recursive")
# benchmark_fibonacci()
# Custom caching for complex objects
class StudentGradeCache:
"""Cache cho grade calculations."""
def __init__(self):
self._cache = {}
self._hits = 0
self._misses = 0
def get_student_gpa(self, student_id, grades):
"""Get GPA với caching."""
# Create cache key từ student_id và grades hash
grades_hash = hash(tuple(sorted(grades.items())))
cache_key = (student_id, grades_hash)
if cache_key in self._cache:
self._hits += 1
return self._cache[cache_key]
# Calculate GPA (expensive operation)
self._misses += 1
gpa = self._calculate_gpa(grades)
self._cache[cache_key] = gpa
return gpa
def _calculate_gpa(self, grades):
"""Simulate expensive GPA calculation."""
time.sleep(0.01) # Simulate slow calculation
if not grades:
return 0.0
return sum(grades.values()) / len(grades)
def get_cache_stats(self):
"""Get cache statistics."""
total = self._hits + self._misses
hit_rate = self._hits / total if total > 0 else 0
return {
'hits': self._hits,
'misses': self._misses,
'hit_rate': hit_rate
}
# Test caching
cache = StudentGradeCache()
test_grades = {'Math': 8.5, 'Science': 9.0, 'English': 7.5}
# First call - cache miss
gpa1 = cache.get_student_gpa('SV001', test_grades)
# Second call - cache hit
gpa2 = cache.get_student_gpa('SV001', test_grades)
print(f"Cache stats: {cache.get_cache_stats()}")