Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions kt-kernel/scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,20 @@ output_dir/
- Need to process very large models on memory-constrained systems
- Want to preserve intermediate layer-wise quantized weights

### Resume Layer

For memory-constrained systems that are unable to complete quantization despite enabling low memory mode with `--no-merge-safetensor`, restart the script with the `--resume-layer` arg to specify the layer from which to continue the conversion process. In the example below, we skip layers 0-11 and resume conversion starting with layer 12.

```bash
python scripts/convert_cpu_weights.py \
--input-path /path/to/model \
--input-type bf16 \
--output /path/to/output \
--quant-method int4 \
--no-merge-safetensor
--resume-layer 12
```

## Examples

### Example 1: Quantize DeepSeek-V3.1 (FP8 → INT4)
Expand Down
21 changes: 18 additions & 3 deletions kt-kernel/scripts/convert_cpu_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,11 +330,18 @@ def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]) -> Dict[
"""
raise NotImplementedError("Subclasses must implement _convert_layer_experts")

def convert(self):
"""Convert all expert layers using subclass-specific logic."""
def convert(self, resume_layer: int = 0):
"""Convert all expert layers using subclass-specific logic.

Args:
resume_layer (int, optional): The layer index to resume conversion from.
Layers with an index lower than this will be skipped. Defaults to 0.
"""
print("Starting conversion...")
print(f"Input: {self.input_path}")
print(f"Output: {self.output_path}")
if resume_layer > 0:
print(f"Resuming from layer: {resume_layer}")

# Create output directory
os.makedirs(self.output_path, exist_ok=True)
Expand All @@ -355,6 +362,8 @@ def convert(self):

# Process layers with memory cleanup
for i, (layer_idx, expert_ids) in enumerate(sorted(expert_layers.items())):
if layer_idx < resume_layer:
continue
print(f"Processing layer {layer_idx} ({i+1}/{len(expert_layers)})...")

layer_tensors = self._convert_layer_experts(layer_idx, expert_ids)
Expand Down Expand Up @@ -840,6 +849,12 @@ def main():
default=False,
help="Keep layer folders without merging to safetensor files (default: False)",
)
parser.add_argument(
"--resume-layer",
type=int,
default=0,
help="Resume conversion starting at this layer index (default: 0)",
)

args = parser.parse_args()

Expand Down Expand Up @@ -893,7 +908,7 @@ def main():
)

# Run conversion
converter.convert()
converter.convert(resume_layer=args.resume_layer)

# Cleanup
converter.close()
Expand Down