@@ -197,22 +197,54 @@ class AzureBlobImportStorageBase(AzureBlobStorageMixin, ImportStorage):
197197 presign_ttl = models .PositiveSmallIntegerField (
198198 _ ('presign_ttl' ), default = 1 , help_text = 'Presigned URLs TTL (in minutes)'
199199 )
200+ recursive_scan = models .BooleanField (
201+ _ ('recursive scan' ),
202+ default = False ,
203+ db_default = False ,
204+ null = True ,
205+ help_text = _ ('Perform recursive scan over the container content' ),
206+ )
200207
201208 def iter_objects (self ):
202209 container = self .get_container ()
203- prefix = str (self .prefix ) if self .prefix else ''
204- files = container .list_blobs (name_starts_with = prefix )
210+ prefix = (str (self .prefix ).rstrip ('/' ) + '/' ) if self .prefix else ''
205211 regex = re .compile (str (self .regex_filter )) if self .regex_filter else None
206212
207- for file in files :
208- # skip folder
209- if file .name == (prefix .rstrip ('/' ) + '/' ):
210- continue
211- # check regex pattern filter
212- if regex and not regex .match (file .name ):
213- logger .debug (file .name + ' is skipped by regex filter' )
214- continue
215- yield file
213+ if self .recursive_scan :
214+ # Recursive scan - use list_blobs to get all blobs
215+ files_iter = container .list_blobs (name_starts_with = prefix )
216+ for file in files_iter :
217+ # skip folder placeholders
218+ if file .name == (prefix .rstrip ('/' ) + '/' ):
219+ continue
220+ # check regex pattern filter
221+ if regex and not regex .match (file .name ):
222+ logger .debug (file .name + ' is skipped by regex filter' )
223+ continue
224+ yield file
225+ else :
226+ # Non-recursive scan - use walk_blobs with delimiter to handle hierarchical structure
227+ def _iter_hierarchical (current_prefix = '' ):
228+ search_prefix = prefix + current_prefix if current_prefix else (prefix or None )
229+ files_iter = container .walk_blobs (name_starts_with = search_prefix , delimiter = '/' )
230+
231+ for item in files_iter :
232+ if hasattr (item , 'name' ) and hasattr (item , 'size' ):
233+ # This is a blob (file)
234+ # skip folder placeholders
235+ if item .name == (prefix .rstrip ('/' ) + '/' ):
236+ continue
237+ # check regex pattern filter
238+ if regex and not regex .match (item .name ):
239+ logger .debug (item .name + ' is skipped by regex filter' )
240+ continue
241+ yield item
242+ else :
243+ # This is a BlobPrefix (directory) - skip it in non-recursive mode
244+ logger .debug (f'Skipping directory prefix: { item .name } ' )
245+ continue
246+
247+ yield from _iter_hierarchical ()
216248
217249 def iter_keys (self ):
218250 for obj in self .iter_objects ():
0 commit comments