Generator Functions in Python
Generator functions use yield instead of return to produce values lazily. They're memory-efficient for large sequences and create iterator objects that generate values on demand.
Contents
Basic generator functions
Use yield instead of return to create a generator function.
def count_up_to(n):
count = 1
while count <= n:
yield count
count += 1
for num in count_up_to(5):
print(num)
>>> 1
>>> 2
>>> 3
>>> 4
>>> 5
Calling a generator function returns a generator object, not the values directly.
def simple_generator():
yield 1
yield 2
yield 3
gen = simple_generator()
print(type(gen))
>>> <class 'generator'>
print(list(gen))
>>> [1, 2, 3]
How generators work
Generators maintain their state between calls. Execution pauses at yield and resumes when the next value is requested.
def number_generator():
print("Starting generator")
yield 1
print("After first yield")
yield 2
print("After second yield")
yield 3
print("Generator finished")
gen = number_generator()
print("Created generator")
print(next(gen))
>>> Starting generator
>>> 1
print(next(gen))
>>> After first yield
>>> 2
print(next(gen))
>>> After second yield
>>> 3
When a generator is exhausted, it raises StopIteration.
def simple_gen():
yield 1
yield 2
gen = simple_gen()
print(next(gen))
>>> 1
print(next(gen))
>>> 2
# print(next(gen)) # Raises StopIteration
Generator expressions
Generator expressions provide a concise way to create generators, similar to list comprehensions.
# Generator expression
squares = (x ** 2 for x in range(5))
print(type(squares))
>>> <class 'generator'>
print(list(squares))
>>> [0, 1, 4, 9, 16]
Generator expressions are more memory-efficient than list comprehensions for large datasets.
# List comprehension: creates entire list in memory
squares_list = [x ** 2 for x in range(1000000)]
# Generator expression: creates generator, doesn't store values
squares_gen = (x ** 2 for x in range(1000000))
print(type(squares_list))
>>> <class 'list'>
print(type(squares_gen))
>>> <class 'generator'>
Memory efficiency
Generators produce values on demand, making them ideal for large datasets or infinite sequences.
def read_large_file(filename):
with open(filename, 'r') as file:
for line in file:
yield line.strip()
# This doesn't load the entire file into memory
# for line in read_large_file('large_file.txt'):
# process(line)
Generators are more memory-efficient than building lists.
def fibonacci_list(n):
result = []
a, b = 0, 1
for _ in range(n):
result.append(a)
a, b = b, a + b
return result
def fibonacci_generator(n):
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b
# List version stores all values
fib_list = fibonacci_list(1000)
# Generator version produces values on demand
fib_gen = fibonacci_generator(1000)
print(next(fib_gen))
>>> 0
Infinite generators
Generators can produce infinite sequences since values are generated on demand.
def infinite_counter():
count = 0
while True:
yield count
count += 1
counter = infinite_counter()
print([next(counter) for _ in range(5)])
>>> [0, 1, 2, 3, 4]
You can create infinite sequences for mathematical series.
def fibonacci_infinite():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
fib = fibonacci_infinite()
print([next(fib) for _ in range(10)])
>>> [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
Sending values to generators
Generators can receive values using the send() method, enabling two-way communication.
def accumulator():
total = 0
while True:
value = yield total
if value is None:
break
total += value
acc = accumulator()
next(acc) # Prime the generator
print(acc.send(5))
>>> 5
print(acc.send(3))
>>> 8
print(acc.send(2))
>>> 10
This pattern is useful for coroutines and stateful processing.
def running_average():
total = 0
count = 0
while True:
value = yield total / count if count > 0 else 0
if value is None:
break
total += value
count += 1
avg = running_average()
next(avg)
print(avg.send(10))
>>> 10.0
print(avg.send(20))
>>> 15.0
print(avg.send(30))
>>> 20.0
Generator pipelines
Generators can be chained together to create efficient data processing pipelines.
def numbers():
for i in range(10):
yield i
def squares(seq):
for num in seq:
yield num ** 2
def evens(seq):
for num in seq:
if num % 2 == 0:
yield num
# Chain generators together
pipeline = evens(squares(numbers()))
print(list(pipeline))
>>> [0, 4, 16, 36, 64]
This approach processes data lazily, only computing what's needed.
def read_lines(filename):
with open(filename, 'r') as f:
for line in f:
yield line.strip()
def filter_empty(lines):
for line in lines:
if line:
yield line
def uppercase(lines):
for line in lines:
yield line.upper()
# Process file line by line without loading entire file
# pipeline = uppercase(filter_empty(read_lines('file.txt')))
# for line in pipeline:
# print(line)
You can use generator expressions for simple pipelines.
numbers = range(10)
# Pipeline: filter evens, square, filter > 10
result = (x ** 2 for x in numbers if x % 2 == 0)
result = (x for x in result if x > 10)
print(list(result))
>>> [16, 36, 64]