2727class FileInfo :
2828 """
2929 Information about a single file within an artifact.
30+
31+ For regular files, only relative_path, size_bytes, and optionally checksum are set.
32+ For symlinks, is_symlink is True and symlink_target contains the link target.
33+ For broken symlinks, is_broken is True.
3034 """
3135
3236 relative_path : str
3337 size_bytes : int
3438 checksum : str | None = None
39+ is_symlink : bool | None = None
40+ symlink_target : str | None = None
41+ is_broken : bool | None = None
3542
3643 def __post_init__ (self ):
3744 """Validate file info data"""
3845 if not self .relative_path :
3946 raise ValueError ("relative_path cannot be empty" )
4047 if self .size_bytes < 0 :
4148 raise ValueError ("size_bytes must be non-negative" )
49+ if self .is_symlink and not self .symlink_target :
50+ raise ValueError ("symlink_target must be provided when is_symlink is True" )
4251
4352 @property
4453 def size_mb (self ) -> float :
@@ -124,7 +133,6 @@ def measure_package(
124133 package_path : str ,
125134 gate_name : str ,
126135 build_job_name : str ,
127- max_files : int = 20000 ,
128136 generate_checksums : bool = True ,
129137 debug : bool = False ,
130138 ) -> InPlaceArtifactReport :
@@ -153,7 +161,7 @@ def measure_package(
153161 gate_config = create_quality_gate_config (gate_name , self .config [gate_name ])
154162
155163 measurement , file_inventory = self ._extract_and_analyze_package (
156- ctx , package_path , gate_config , max_files , generate_checksums , debug
164+ ctx , package_path , gate_config , generate_checksums , debug
157165 )
158166
159167 return InPlaceArtifactReport (
@@ -177,7 +185,6 @@ def _extract_and_analyze_package(
177185 ctx : Context ,
178186 package_path : str ,
179187 config : QualityGateConfig ,
180- max_files : int = 10000 ,
181188 generate_checksums : bool = True ,
182189 debug : bool = False ,
183190 ) -> tuple [ArtifactMeasurement , list [FileInfo ]]:
@@ -188,7 +195,6 @@ def _extract_and_analyze_package(
188195 ctx: Invoke context for running commands
189196 package_path: Path to the package file
190197 config: Quality gate configuration
191- max_files: Maximum number of files to process in inventory
192198 generate_checksums: Whether to generate checksums for files
193199 debug: Enable debug logging
194200
@@ -213,7 +219,7 @@ def _extract_and_analyze_package(
213219 artifact_path = package_path , on_wire_size = wire_size , on_disk_size = disk_size
214220 )
215221
216- file_inventory = self ._walk_extracted_files (extract_dir , max_files , generate_checksums , debug )
222+ file_inventory = self ._walk_extracted_files (extract_dir , generate_checksums , debug )
217223
218224 if debug :
219225 print ("✅ Single extraction completed:" )
@@ -226,15 +232,12 @@ def _extract_and_analyze_package(
226232 except Exception as e :
227233 raise RuntimeError (f"Failed to extract and analyze package { package_path } : { e } " ) from e
228234
229- def _walk_extracted_files (
230- self , extract_dir : str , max_files : int , generate_checksums : bool , debug : bool
231- ) -> list [FileInfo ]:
235+ def _walk_extracted_files (self , extract_dir : str , generate_checksums : bool , debug : bool ) -> list [FileInfo ]:
232236 """
233237 Walk through extracted files and create file inventory.
234238
235239 Args:
236240 extract_dir: Directory containing extracted package files
237- max_files: Maximum number of files to process
238241 generate_checksums: Whether to generate checksums for files
239242 debug: Enable debug logging
240243
@@ -260,19 +263,55 @@ def _walk_extracted_files(
260263 total_size = 0
261264
262265 for file_path in extract_path .rglob ('*' ):
263- if file_path .is_file ():
264- # Respect max_files limit
265- if files_processed >= max_files :
266- if debug :
267- print (f"⚠️ Reached max files limit ({ max_files } ), stopping inventory" )
268- break
269-
270- try :
271- relative_path = str (file_path .relative_to (extract_path ))
272- file_stat = file_path .stat ()
266+ # Skip directories
267+ if file_path .is_dir ():
268+ continue
269+
270+ try :
271+ relative_path = str (file_path .relative_to (extract_path ))
272+
273+ if file_path .is_symlink ():
274+ try :
275+ symlink_target = os .readlink (file_path )
276+ logical_size = len (symlink_target )
277+ is_broken = False
278+
279+ try :
280+ resolved_target = file_path .resolve (strict = True )
281+ if resolved_target .is_relative_to (extract_path ):
282+ symlink_target_rel = str (resolved_target .relative_to (extract_path ))
283+ else :
284+ symlink_target_rel = symlink_target
285+ except (OSError , RuntimeError ):
286+ symlink_target_rel = symlink_target
287+ is_broken = True
288+
289+ file_inventory .append (
290+ FileInfo (
291+ relative_path = relative_path ,
292+ size_bytes = logical_size ,
293+ checksum = None ,
294+ is_symlink = True ,
295+ symlink_target = symlink_target_rel ,
296+ is_broken = is_broken if is_broken else None ,
297+ )
298+ )
299+
300+ if debug and files_processed % 1000 == 0 :
301+ broken_marker = " [BROKEN]" if is_broken else ""
302+ print (f"🔗 Symlink: { relative_path } -> { symlink_target_rel } { broken_marker } " )
303+
304+ except OSError as e :
305+ if debug :
306+ print (f"⚠️ Could not read symlink { file_path } : { e } " )
307+ continue
308+
309+ elif file_path .is_file ():
310+ # Regular file - use lstat to not follow symlinks
311+ file_stat = file_path .lstat ()
273312 size_bytes = file_stat .st_size
274313
275- checksum = self ._generate_checksum (file_path )
314+ checksum = self ._generate_checksum (file_path ) if generate_checksums else None
276315
277316 file_inventory .append (
278317 FileInfo (
@@ -282,16 +321,17 @@ def _walk_extracted_files(
282321 )
283322 )
284323
285- files_processed += 1
286324 total_size += size_bytes
287325
288326 if debug and files_processed % 1000 == 0 :
289327 print (f"📋 Processed { files_processed } files..." )
290328
291- except (OSError , PermissionError ) as e :
292- if debug :
293- print (f"⚠️ Skipping file { file_path } : { e } " )
294- continue
329+ files_processed += 1
330+
331+ except (OSError , PermissionError ) as e :
332+ if debug :
333+ print (f"⚠️ Skipping file { file_path } : { e } " )
334+ continue
295335
296336 # Sort by size (descending) for easier analysis
297337 file_inventory .sort (key = lambda f : f .size_bytes , reverse = True )
@@ -349,14 +389,7 @@ def save_report_to_yaml(self, report: InPlaceArtifactReport, output_path: str) -
349389 "arch" : report .arch ,
350390 "os" : report .os ,
351391 "build_job_name" : report .build_job_name ,
352- "file_inventory" : [
353- {
354- "relative_path" : file_info .relative_path ,
355- "size_bytes" : file_info .size_bytes ,
356- "checksum" : file_info .checksum ,
357- }
358- for file_info in report .file_inventory
359- ],
392+ "file_inventory" : [self ._serialize_file_info (file_info ) for file_info in report .file_inventory ],
360393 }
361394
362395 with open (output_path , 'w' ) as f :
@@ -365,6 +398,32 @@ def save_report_to_yaml(self, report: InPlaceArtifactReport, output_path: str) -
365398 except Exception as e :
366399 raise RuntimeError (f"Failed to save report to { output_path } : { e } " ) from e
367400
401+ def _serialize_file_info (self , file_info : FileInfo ) -> dict [str , Any ]:
402+ """
403+ Serialize a FileInfo object to a dictionary, excluding None/False fields for regular files.
404+
405+ Args:
406+ file_info: The FileInfo object to serialize
407+
408+ Returns:
409+ Dictionary with only relevant fields
410+ """
411+ result = {
412+ "relative_path" : file_info .relative_path ,
413+ "size_bytes" : file_info .size_bytes ,
414+ }
415+
416+ if file_info .checksum is not None :
417+ result ["checksum" ] = file_info .checksum
418+
419+ if file_info .is_symlink :
420+ result ["is_symlink" ] = True
421+ result ["symlink_target" ] = file_info .symlink_target
422+ if file_info .is_broken :
423+ result ["is_broken" ] = True
424+
425+ return result
426+
368427
369428def measure_package_local (
370429 ctx ,
@@ -373,7 +432,6 @@ def measure_package_local(
373432 config_path = "test/static/static_quality_gates.yml" ,
374433 output_path = None ,
375434 build_job_name = "local_test" ,
376- max_files = 20000 ,
377435 no_checksums = False ,
378436 debug = False ,
379437):
@@ -389,7 +447,6 @@ def measure_package_local(
389447 config_path: Path to quality gates configuration (default: test/static/static_quality_gates.yml)
390448 output_path: Path to save the measurement report (default: {gate_name}_report.yml)
391449 build_job_name: Simulated build job name (default: local_test)
392- max_files: Maximum number of files to process in inventory (default: 20000)
393450 no_checksums: Skip checksum generation for faster processing (default: false)
394451 debug: Enable debug logging for troubleshooting (default: false)
395452
@@ -437,7 +494,6 @@ def measure_package_local(
437494 package_path = package_path ,
438495 gate_name = gate_name ,
439496 build_job_name = build_job_name ,
440- max_files = max_files ,
441497 generate_checksums = not no_checksums ,
442498 debug = debug ,
443499 )
0 commit comments