Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions kt-kernel/scripts/convert_cpu_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,11 +330,12 @@ def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]) -> Dict[
"""
raise NotImplementedError("Subclasses must implement _convert_layer_experts")

def convert(self):
def convert(self, resume_layer: int = 0):
"""Convert all expert layers using subclass-specific logic."""
print("Starting conversion...")
print(f"Input: {self.input_path}")
print(f"Output: {self.output_path}")
print(f"Resuming from layer: {resume_layer}")

# Create output directory
os.makedirs(self.output_path, exist_ok=True)
Expand All @@ -355,6 +356,9 @@ def convert(self):

# Process layers with memory cleanup
for i, (layer_idx, expert_ids) in enumerate(sorted(expert_layers.items())):
if layer_idx < resume_layer:
print(f"Skipping layer {layer_idx} (resume_layer={resume_layer})")
continue
print(f"Processing layer {layer_idx} ({i+1}/{len(expert_layers)})...")

layer_tensors = self._convert_layer_experts(layer_idx, expert_ids)
Expand Down Expand Up @@ -840,6 +844,12 @@ def main():
default=False,
help="Keep layer folders without merging to safetensor files (default: False)",
)
parser.add_argument(
"--resume-layer",
type=int,
default=0,
help="Resume conversion starting at this layer index (default: 0)",
)

args = parser.parse_args()

Expand Down Expand Up @@ -893,7 +903,7 @@ def main():
)

# Run conversion
converter.convert()
converter.convert(resume_layer=args.resume_layer)

# Cleanup
converter.close()
Expand Down