📊 Cấu Trúc Dữ Liệu - Data Structures
Tóm tắt: Hướng dẫn chi tiết về các cấu trúc dữ liệu trong Python, từ cơ bản như list, dict đến nâng cao như heap, tree. Mỗi structure được giải thích bằng ví dụ thực tế và code demo.
🎯 Tại Sao Cần Hiểu Data Structures?
Hãy tưởng tượng data structures như các loại "container" khác nhau! 📦
- List như tủ có ngăn đánh số - truy cập theo thứ tự
- Dictionary như từ điển - tìm theo key
- Set như hộp không trùng lặp - mỗi item chỉ có 1
- Stack như chồng đĩa - vào sau, ra trước
- Queue như hàng đợi - vào trước, ra trước
Chọn đúng structure = code nhanh và hiệu quả! 🚀
📋 1. BUILT-IN DATA STRUCTURES
List (Danh sách)
Đặc điểm: Có thứ tự, có thể thay đổi, cho phép trùng lặp
class ListDemo:
"""Demo các operations với List."""
def __init__(self):
self.demo_list = [1, 2, 3, 4, 5]
self.mixed_list = ["Python", 3.14, True, [1, 2], {"key": "value"}]
def basic_operations(self):
"""Các thao tác cơ bản với list."""
print("=== LIST BASIC OPERATIONS ===")
# Access - O(1)
print(f"First element: {self.demo_list[0]}")
print(f"Last element: {self.demo_list[-1]}")
print(f"Slice [1:4]: {self.demo_list[1:4]}")
# Modify - O(1)
original = self.demo_list.copy()
self.demo_list[0] = 10
print(f"After modify index 0: {self.demo_list}")
self.demo_list = original # Reset
# Add elements
self.demo_list.append(6) # O(1) - add to end
print(f"After append(6): {self.demo_list}")
self.demo_list.insert(0, 0) # O(n) - add to beginning
print(f"After insert(0, 0): {self.demo_list}")
# Remove elements
removed = self.demo_list.pop() # O(1) - remove from end
print(f"Popped element: {removed}, List: {self.demo_list}")
self.demo_list.remove(0) # O(n) - remove by value
print(f"After remove(0): {self.demo_list}")
def advanced_operations(self):
"""Thao tác nâng cao với list."""
print(f"\n=== LIST ADVANCED OPERATIONS ===")
numbers = [3, 1, 4, 1, 5, 9, 2, 6, 5]
# Sorting
print(f"Original: {numbers}")
print(f"Sorted: {sorted(numbers)}") # Không thay đổi original
numbers_copy = numbers.copy()
numbers_copy.sort() # Thay đổi original
print(f"After .sort(): {numbers_copy}")
# List comprehension
squares = [x**2 for x in numbers]
print(f"Squares: {squares}")
even_numbers = [x for x in numbers if x % 2 == 0]
print(f"Even numbers: {even_numbers}")
# Statistical operations
print(f"Count of 1: {numbers.count(1)}")
print(f"Index of 4: {numbers.index(4)}")
print(f"Min: {min(numbers)}, Max: {max(numbers)}")
print(f"Sum: {sum(numbers)}, Length: {len(numbers)}")
def performance_analysis(self):
"""Phân tích performance của list operations."""
print(f"\n=== LIST PERFORMANCE ===")
import time
# Test với list lớn
large_list = list(range(100000))
# Access by index - O(1)
start = time.perf_counter()
_ = large_list[50000]
access_time = time.perf_counter() - start
# Append - O(1)
start = time.perf_counter()
large_list.append(100000)
append_time = time.perf_counter() - start
# Insert at beginning - O(n)
start = time.perf_counter()
large_list.insert(0, -1)
insert_time = time.perf_counter() - start
# Search - O(n)
start = time.perf_counter()
_ = 50000 in large_list
search_time = time.perf_counter() - start
print(f"Access by index: {access_time:.8f}s")
print(f"Append: {append_time:.8f}s")
print(f"Insert at beginning: {insert_time:.8f}s")
print(f"Search: {search_time:.8f}s")
# Demo List
list_demo = ListDemo()
list_demo.basic_operations()
list_demo.advanced_operations()
list_demo.performance_analysis()
Dictionary (Từ điển)
Đặc điểm: Key-value pairs, không có thứ tự (Python 3.7+ insertion order preserved), có thể thay đổi
class DictDemo:
"""Demo các operations với Dictionary."""
def __init__(self):
self.student = {
"name": "Nguyễn Văn An",
"age": 20,
"grades": {"math": 8.5, "physics": 9.0, "chemistry": 7.5},
"is_active": True
}
def basic_operations(self):
"""Thao tác cơ bản với dict."""
print("=== DICT BASIC OPERATIONS ===")
# Access - O(1) average
print(f"Name: {self.student['name']}")
print(f"Age: {self.student.get('age', 'Unknown')}")
print(f"Grade in math: {self.student['grades']['math']}")
# Add/Update - O(1) average
self.student['email'] = "[email protected]"
self.student['age'] = 21 # Update existing
print(f"After updates: {self.student}")
# Delete - O(1) average
removed_email = self.student.pop('email')
print(f"Removed email: {removed_email}")
del self.student['is_active']
print(f"After delete is_active: {list(self.student.keys())}")
def iteration_methods(self):
"""Các cách duyệt dictionary."""
print(f"\n=== DICT ITERATION ===")
grades = {"math": 8.5, "physics": 9.0, "chemistry": 7.5, "biology": 8.8}
# Iterate keys
print("Keys:")
for subject in grades.keys():
print(f" {subject}")
# Iterate values
print("Values:")
for grade in grades.values():
print(f" {grade}")
# Iterate key-value pairs
print("Key-Value pairs:")
for subject, grade in grades.items():
print(f" {subject}: {grade}")
# Dict comprehension
high_grades = {k: v for k, v in grades.items() if v >= 8.5}
print(f"High grades: {high_grades}")
# Transform values
letter_grades = {k: self._grade_to_letter(v) for k, v in grades.items()}
print(f"Letter grades: {letter_grades}")
def _grade_to_letter(self, grade):
"""Convert numeric grade to letter."""
if grade >= 9: return 'A'
elif grade >= 8: return 'B'
elif grade >= 7: return 'C'
elif grade >= 6: return 'D'
else: return 'F'
def advanced_dict_methods(self):
"""Methods nâng cao của dictionary."""
print(f"\n=== ADVANCED DICT METHODS ===")
# Merge dictionaries
dict1 = {"a": 1, "b": 2}
dict2 = {"b": 3, "c": 4}
# Method 1: update()
merged1 = dict1.copy()
merged1.update(dict2)
print(f"Using update(): {merged1}")
# Method 2: ** unpacking (Python 3.5+)
merged2 = {**dict1, **dict2}
print(f"Using unpacking: {merged2}")
# Method 3: | operator (Python 3.9+)
try:
merged3 = dict1 | dict2
print(f"Using | operator: {merged3}")
except TypeError:
print("| operator not available (Python < 3.9)")
# setdefault() - set if not exists
config = {"host": "localhost", "port": 8080}
config.setdefault("timeout", 30) # Sets only if 'timeout' doesn't exist
config.setdefault("port", 3000) # Won't change existing 'port'
print(f"Config with defaults: {config}")
# fromkeys() - create dict from keys
subjects = ["math", "physics", "chemistry"]
empty_grades = dict.fromkeys(subjects, 0)
print(f"Empty grades: {empty_grades}")
# Demo Dictionary
dict_demo = DictDemo()
dict_demo.basic_operations()
dict_demo.iteration_methods()
dict_demo.advanced_dict_methods()
Set (Tập hợp)
Đặc điểm: Không có thứ tự, không trùng lặp, có thể thay đổi
class SetDemo:
"""Demo các operations với Set."""
def __init__(self):
self.set1 = {1, 2, 3, 4, 5}
self.set2 = {4, 5, 6, 7, 8}
self.fruits = {"apple", "banana", "orange", "apple"} # Duplicate ignored
def basic_operations(self):
"""Thao tác cơ bản với set."""
print("=== SET BASIC OPERATIONS ===")
print(f"Set1: {self.set1}")
print(f"Set2: {self.set2}")
print(f"Fruits (duplicates removed): {self.fruits}")
# Add/Remove - O(1) average
self.fruits.add("grape")
print(f"After add grape: {self.fruits}")
self.fruits.discard("banana") # Won't raise error if not found
print(f"After discard banana: {self.fruits}")
try:
self.fruits.remove("kiwi") # Will raise error if not found
except KeyError:
print("Kiwi not found (using remove())")
# Membership test - O(1) average
print(f"'apple' in fruits: {'apple' in self.fruits}")
print(f"'kiwi' in fruits: {'kiwi' in self.fruits}")
def set_mathematics(self):
"""Các phép toán tập hợp."""
print(f"\n=== SET MATHEMATICS ===")
print(f"Set1: {self.set1}")
print(f"Set2: {self.set2}")
# Union - Hợp
union = self.set1 | self.set2
union_method = self.set1.union(self.set2)
print(f"Union (|): {union}")
print(f"Union (method): {union_method}")
# Intersection - Giao
intersection = self.set1 & self.set2
intersection_method = self.set1.intersection(self.set2)
print(f"Intersection (&): {intersection}")
print(f"Intersection (method): {intersection_method}")
# Difference - Hiệu
difference = self.set1 - self.set2
difference_method = self.set1.difference(self.set2)
print(f"Difference (-): {difference}")
print(f"Difference (method): {difference_method}")
# Symmetric Difference - Hiệu đối xứng
sym_diff = self.set1 ^ self.set2
sym_diff_method = self.set1.symmetric_difference(self.set2)
print(f"Symmetric Difference (^): {sym_diff}")
print(f"Symmetric Difference (method): {sym_diff_method}")
# Subset/Superset
small_set = {2, 3}
print(f"Is {small_set} subset of {self.set1}? {small_set.issubset(self.set1)}")
print(f"Is {self.set1} superset of {small_set}? {self.set1.issuperset(small_set)}")
# Disjoint
disjoint_set = {10, 11, 12}
print(f"Are {self.set1} and {disjoint_set} disjoint? {self.set1.isdisjoint(disjoint_set)}")
def practical_examples(self):
"""Ví dụ thực tế sử dụng set."""
print(f"\n=== PRACTICAL SET EXAMPLES ===")
# 1. Remove duplicates from list
numbers_with_dups = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5]
unique_numbers = list(set(numbers_with_dups))
print(f"Remove duplicates: {numbers_with_dups} -> {unique_numbers}")
# 2. Find common elements
class1_students = {"An", "Bình", "Cường", "Dung"}
class2_students = {"Bình", "Dung", "Em", "Phương"}
common_students = class1_students & class2_students
print(f"Students in both classes: {common_students}")
# 3. Find unique skills
job_requirements = {"Python", "SQL", "Git", "Docker"}
candidate_skills = {"Python", "Java", "Git", "Linux"}
missing_skills = job_requirements - candidate_skills
extra_skills = candidate_skills - job_requirements
print(f"Missing skills: {missing_skills}")
print(f"Extra skills: {extra_skills}")
# 4. Fast membership testing
large_set = set(range(100000))
large_list = list(range(100000))
import time
# Test set membership
start = time.perf_counter()
result = 99999 in large_set
set_time = time.perf_counter() - start
# Test list membership
start = time.perf_counter()
result = 99999 in large_list
list_time = time.perf_counter() - start
print(f"Set membership: {set_time:.8f}s")
print(f"List membership: {list_time:.8f}s")
print(f"Set is {list_time/set_time:.0f}x faster for membership testing")
# Demo Set
set_demo = SetDemo()
set_demo.basic_operations()
set_demo.set_mathematics()
set_demo.practical_examples()
Tuple (Bộ)
Đặc điểm: Có thứ tự, không thể thay đổi, cho phép trùng lặp
class TupleDemo:
"""Demo các operations với Tuple."""
def __init__(self):
self.point = (3, 4) # 2D point
self.rgb_color = (255, 128, 0) # RGB color
self.student_record = ("SV001", "Nguyễn Văn An", 20, 8.5)
def basic_operations(self):
"""Thao tác cơ bản với tuple."""
print("=== TUPLE BASIC OPERATIONS ===")
# Access - O(1)
print(f"Point: {self.point}")
print(f"X coordinate: {self.point[0]}")
print(f"Y coordinate: {self.point[1]}")
# Unpacking
x, y = self.point
print(f"Unpacked coordinates: x={x}, y={y}")
# Multiple assignment
student_id, name, age, grade = self.student_record
print(f"Student: {name} (ID: {student_id}, Age: {age}, Grade: {grade})")
# Slicing
print(f"RGB color: {self.rgb_color}")
print(f"Red component: {self.rgb_color[0]}")
print(f"RGB slice [0:2]: {self.rgb_color[0:2]}")
# Count and index
numbers = (1, 2, 3, 2, 4, 2, 5)
print(f"Numbers: {numbers}")
print(f"Count of 2: {numbers.count(2)}")
print(f"Index of first 3: {numbers.index(3)}")
def tuple_as_keys(self):
"""Sử dụng tuple làm keys trong dictionary."""
print(f"\n=== TUPLE AS DICTIONARY KEYS ===")
# Geographic coordinates as keys
city_coordinates = {
(21.0285, 105.8542): "Hà Nội",
(10.8231, 106.6297): "TP.HCM",
(16.0544, 108.2022): "Đà Nẵng",
(10.0452, 105.7469): "Cần Thơ"
}
# Access by coordinate
hanoi_coord = (21.0285, 105.8542)
print(f"City at {hanoi_coord}: {city_coordinates[hanoi_coord]}")
# Student grades by (subject, semester)
grades = {
("Math", "2023-1"): 8.5,
("Math", "2023-2"): 9.0,
("Physics", "2023-1"): 8.0,
("Physics", "2023-2"): 8.5
}
print(f"Math grade in 2023-1: {grades[('Math', '2023-1')]}")
# Iterate through coordinate-based dict
print("All cities:")
for (lat, lon), city in city_coordinates.items():
print(f" {city}: ({lat}, {lon})")
def named_tuples(self):
"""Sử dụng namedtuple để tạo tuple có tên."""
print(f"\n=== NAMED TUPLES ===")
from collections import namedtuple
# Define named tuple types
Point = namedtuple('Point', ['x', 'y'])
Student = namedtuple('Student', ['id', 'name', 'age', 'grade'])
# Create instances
p1 = Point(3, 4)
p2 = Point(x=5, y=12) # Can use keyword arguments
student = Student('SV001', 'Nguyễn Văn An', 20, 8.5)
# Access by name (more readable)
print(f"Point 1: ({p1.x}, {p1.y})")
print(f"Point 2: ({p2.x}, {p2.y})")
print(f"Student: {student.name}, Grade: {student.grade}")
# Still works as regular tuple
print(f"Point 1[0]: {p1[0]}")
print(f"Point 1 + Point 2: ({p1.x + p2.x}, {p1.y + p2.y})")
# Named tuple methods
print(f"Student fields: {student._fields}")
print(f"Student as dict: {student._asdict()}")
# Create new instance with some fields changed
older_student = student._replace(age=21)
print(f"Older student: {older_student}")
def performance_comparison(self):
"""So sánh performance tuple vs list."""
print(f"\n=== TUPLE VS LIST PERFORMANCE ===")
import time
import sys
# Memory usage
tuple_data = tuple(range(1000))
list_data = list(range(1000))
print(f"Tuple memory: {sys.getsizeof(tuple_data)} bytes")
print(f"List memory: {sys.getsizeof(list_data)} bytes")
print(f"Memory difference: {sys.getsizeof(list_data) - sys.getsizeof(tuple_data)} bytes")
# Creation time
start = time.perf_counter()
for _ in range(10000):
t = tuple(range(100))
tuple_creation_time = time.perf_counter() - start
start = time.perf_counter()
for _ in range(10000):
l = list(range(100))
list_creation_time = time.perf_counter() - start
print(f"Tuple creation: {tuple_creation_time:.4f}s")
print(f"List creation: {list_creation_time:.4f}s")
print(f"Tuple is {list_creation_time/tuple_creation_time:.1f}x faster to create")
# Demo Tuple
tuple_demo = TupleDemo()
tuple_demo.basic_operations()
tuple_demo.tuple_as_keys()
tuple_demo.named_tuples()
tuple_demo.performance_comparison()