@@ -194,10 +194,10 @@ def set_topic_partitions(self, *topics):
194
194
elif isinstance (arg , tuple ):
195
195
topic = kafka_bytestring (arg [0 ])
196
196
partition = arg [1 ]
197
+ self ._consume_topic_partition (topic , partition )
197
198
if len (arg ) == 3 :
198
199
offset = arg [2 ]
199
200
self ._offsets .fetch [(topic , partition )] = offset
200
- self ._consume_topic_partition (topic , partition )
201
201
202
202
# { topic: partitions, ... } dict
203
203
elif isinstance (arg , dict ):
@@ -224,7 +224,7 @@ def set_topic_partitions(self, *topics):
224
224
topic = kafka_bytestring (key [0 ])
225
225
partition = key [1 ]
226
226
self ._consume_topic_partition (topic , partition )
227
- self ._offsets .fetch [key ] = value
227
+ self ._offsets .fetch [( topic , partition ) ] = value
228
228
229
229
else :
230
230
raise KafkaConfigurationError ('Unknown topic type (%s)' % type (arg ))
@@ -312,16 +312,16 @@ def fetch_messages(self):
312
312
max_wait_time = self ._config ['fetch_wait_max_ms' ]
313
313
min_bytes = self ._config ['fetch_min_bytes' ]
314
314
315
- # Get current fetch offsets
316
- offsets = self ._offsets .fetch
317
- if not offsets :
318
- if not self ._topics :
319
- raise KafkaConfigurationError ('No topics or partitions configured' )
315
+ if not self ._topics :
316
+ raise KafkaConfigurationError ('No topics or partitions configured' )
317
+
318
+ if not self ._offsets .fetch :
320
319
raise KafkaConfigurationError ('No fetch offsets found when calling fetch_messages' )
321
320
322
- fetches = []
323
- for topic_partition , offset in six .iteritems (offsets ):
324
- fetches .append (FetchRequest (topic_partition [0 ], topic_partition [1 ], offset , max_bytes ))
321
+ fetches = [FetchRequest (topic , partition ,
322
+ self ._offsets .fetch [(topic , partition )],
323
+ max_bytes )
324
+ for (topic , partition ) in self ._topics ]
325
325
326
326
# client.send_fetch_request will collect topic/partition requests by leader
327
327
# and send each group as a single FetchRequest to the correct broker
@@ -336,49 +336,53 @@ def fetch_messages(self):
336
336
return
337
337
338
338
for resp in responses :
339
- topic_partition = (resp .topic , resp .partition )
339
+ topic = kafka_bytestring (resp .topic )
340
+ partition = resp .partition
340
341
try :
341
342
check_error (resp )
342
343
except OffsetOutOfRangeError :
343
- logger .warning ('OffsetOutOfRange: topic %s, partition %d, offset %d '
344
- '(Highwatermark: %d)' ,
345
- resp .topic , resp .partition ,
346
- offsets [topic_partition ], resp .highwaterMark )
344
+ logger .warning ('OffsetOutOfRange: topic %s, partition %d, '
345
+ 'offset %d (Highwatermark: %d)' ,
346
+ topic , partition ,
347
+ self .offsets ._fetch [(topic , partition )],
348
+ resp .highwaterMark )
347
349
# Reset offset
348
- self ._offsets .fetch [topic_partition ] = self ._reset_partition_offset (topic_partition )
350
+ self ._offsets .fetch [(topic , partition )] = (
351
+ self ._reset_partition_offset ((topic , partition ))
352
+ )
349
353
continue
350
354
351
355
except NotLeaderForPartitionError :
352
356
logger .warning ("NotLeaderForPartitionError for %s - %d. "
353
357
"Metadata may be out of date" ,
354
- resp . topic , resp . partition )
358
+ topic , partition )
355
359
self ._refresh_metadata_on_error ()
356
360
continue
357
361
358
362
except RequestTimedOutError :
359
363
logger .warning ("RequestTimedOutError for %s - %d" ,
360
- resp . topic , resp . partition )
364
+ topic , partition )
361
365
continue
362
366
363
367
# Track server highwater mark
364
- self ._offsets .highwater [topic_partition ] = resp .highwaterMark
368
+ self ._offsets .highwater [( topic , partition ) ] = resp .highwaterMark
365
369
366
370
# Yield each message
367
371
# Kafka-python could raise an exception during iteration
368
372
# we are not catching -- user will need to address
369
373
for (offset , message ) in resp .messages :
370
374
# deserializer_class could raise an exception here
371
- msg = KafkaMessage ( resp . topic ,
372
- resp . partition ,
373
- offset , message . key ,
374
- self . _config [ 'deserializer_class' ]( message . value ))
375
-
376
- if offset < self ._offsets .fetch [topic_partition ]:
377
- logger .debug ('Skipping message %s because its offset is less than the consumer offset' ,
378
- msg )
375
+ val = self . _config [ 'deserializer_class' ]( message . value )
376
+ msg = KafkaMessage ( topic , partition , offset , message . key , val )
377
+
378
+ # in some cases the server will return earlier messages
379
+ # than we requested. skip them per kafka spec
380
+ if offset < self ._offsets .fetch [( topic , partition ) ]:
381
+ logger .debug ('message offset less than fetched offset '
382
+ 'skipping: %s' , msg )
379
383
continue
380
384
# Only increment fetch offset if we safely got the message and deserialized
381
- self ._offsets .fetch [topic_partition ] = offset + 1
385
+ self ._offsets .fetch [( topic , partition ) ] = offset + 1
382
386
383
387
# Then yield to user
384
388
yield msg
0 commit comments