Skip to content

Commit 4cb7995

Browse files
committed
Use generic workflow.
1 parent df8db90 commit 4cb7995

File tree

7 files changed

+889
-787
lines changed

7 files changed

+889
-787
lines changed

src/ess/nmx/_executable_helper.py

Lines changed: 2 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
from types import UnionType
1111
from typing import Literal, TypeGuard, TypeVar, Union, get_args, get_origin
1212

13-
from pydantic import BaseModel, Field
13+
from pydantic import BaseModel
1414
from pydantic.fields import FieldInfo
1515
from pydantic_core import PydanticUndefined
1616

17-
from .types import Compression
17+
from .configurations import InputConfig, OutputConfig, ReductionConfig, WorkflowConfig
1818

1919

2020
def _validate_annotation(annotation) -> TypeGuard[type]:
@@ -140,148 +140,6 @@ def add_args_from_pydantic_model(
140140
return parser
141141

142142

143-
class InputConfig(BaseModel):
144-
# Add title of the basemodel
145-
model_config = {"title": "Input Configuration"}
146-
# File IO
147-
input_file: list[str] = Field(
148-
title="Input File",
149-
description="Path to the input file. If multiple file paths are given,"
150-
" the output(histogram) will be merged(summed) "
151-
"and will not save individual outputs per input file. ",
152-
)
153-
swmr: bool = Field(
154-
title="SWMR Mode",
155-
description="Open the input file in SWMR mode",
156-
default=False,
157-
)
158-
# Detector selection
159-
detector_ids: list[int] = Field(
160-
title="Detector IDs",
161-
description="Detector indices to process",
162-
default=[0, 1, 2],
163-
)
164-
# Chunking options
165-
iter_chunk: bool = Field(
166-
title="Iterate in Chunks",
167-
description="Whether to process the input file in chunks "
168-
" based on the hdf5 dataset chunk size. "
169-
"It is ignored if hdf5 dataset is not chunked. "
170-
"If True, it overrides chunk-size-pulse and chunk-size-events options.",
171-
default=False,
172-
)
173-
chunk_size_pulse: int = Field(
174-
title="Chunk Size Pulse",
175-
description="Number of pulses to process in each chunk. "
176-
"If 0 or negative, process all pulses at once.",
177-
default=0,
178-
)
179-
chunk_size_events: int = Field(
180-
title="Chunk Size Events",
181-
description="Number of events to process in each chunk. "
182-
"If 0 or negative, process all events at once."
183-
"If both chunk-size-pulse and chunk-size-events are set, "
184-
"chunk-size-pulse is preferred.",
185-
default=0,
186-
)
187-
188-
189-
class TimeBinUnit(enum.StrEnum):
190-
ms = 'ms'
191-
us = 'us'
192-
ns = 'ns'
193-
194-
195-
class TimeBinCoordinate(enum.StrEnum):
196-
event_time_offset = 'event_time_offset'
197-
time_of_flight = 'time_of_flight'
198-
199-
200-
class WorkflowConfig(BaseModel):
201-
# Add title of the basemodel
202-
model_config = {"title": "Workflow Configuration"}
203-
time_bin_coordinate: TimeBinCoordinate = Field(
204-
title="Time Bin Coordinate",
205-
description="Coordinate to bin the time data.",
206-
default=TimeBinCoordinate.event_time_offset,
207-
)
208-
nbins: int = Field(
209-
title="Number of Time Bins",
210-
description="Number of Time bins",
211-
default=50,
212-
)
213-
min_time_bin: int | None = Field(
214-
title="Minimum Time Bin",
215-
description="Minimum time edge of [time_bin_coordinate] in [time_bin_unit].",
216-
default=None,
217-
)
218-
max_time_bin: int | None = Field(
219-
title="Maximum Time Bin",
220-
description="Maximum time edge of [time_bin_coordinate] in [time_bin_unit].",
221-
default=None,
222-
)
223-
time_bin_unit: TimeBinUnit = Field(
224-
title="Unit of Time Bins",
225-
description="Unit of time bins.",
226-
default=TimeBinUnit.ms,
227-
)
228-
tof_lookup_table_file_path: str | None = Field(
229-
title="TOF Lookup Table File Path",
230-
description="Path to the TOF lookup table file. "
231-
"If None, the lookup table will be computed on-the-fly.",
232-
default=None,
233-
)
234-
tof_simulation_min_wavelength: float = Field(
235-
title="TOF Simulation Minimum Wavelength",
236-
description="Minimum wavelength for TOF simulation in Angstrom.",
237-
default=1.8,
238-
)
239-
tof_simulation_max_wavelength: float = Field(
240-
title="TOF Simulation Maximum Wavelength",
241-
description="Maximum wavelength for TOF simulation in Angstrom.",
242-
default=3.6,
243-
)
244-
tof_simulation_seed: int = Field(
245-
title="TOF Simulation Seed",
246-
description="Random seed for TOF simulation.",
247-
default=42, # No reason.
248-
)
249-
250-
251-
class OutputConfig(BaseModel):
252-
# Add title of the basemodel
253-
model_config = {"title": "Output Configuration"}
254-
# Log verbosity
255-
verbose: bool = Field(
256-
title="Verbose Logging",
257-
description="Increase output verbosity.",
258-
default=False,
259-
)
260-
# File output
261-
output_file: str = Field(
262-
title="Output File",
263-
description="Path to the output file.",
264-
default="scipp_output.h5",
265-
)
266-
compression: Compression = Field(
267-
title="Compression",
268-
description="Compress option of reduced output file.",
269-
default=Compression.BITSHUFFLE_LZ4,
270-
)
271-
272-
273-
class ReductionConfig(BaseModel):
274-
"""Container for all reduction configurations."""
275-
276-
inputs: InputConfig
277-
workflow: WorkflowConfig = Field(default_factory=WorkflowConfig)
278-
output: OutputConfig = Field(default_factory=OutputConfig)
279-
280-
@property
281-
def _children(self) -> list[BaseModel]:
282-
return [self.inputs, self.workflow, self.output]
283-
284-
285143
T = TypeVar('T', bound=BaseModel)
286144

287145

src/ess/nmx/configurations.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
3+
import enum
4+
5+
from pydantic import BaseModel, Field
6+
7+
from .types import Compression
8+
9+
10+
class InputConfig(BaseModel):
11+
# Add title of the basemodel
12+
model_config = {"title": "Input Configuration"}
13+
# File IO
14+
input_file: list[str] = Field(
15+
title="Input File",
16+
description="Path to the input file. If multiple file paths are given,"
17+
" the output(histogram) will be merged(summed) "
18+
"and will not save individual outputs per input file. ",
19+
)
20+
swmr: bool = Field(
21+
title="SWMR Mode",
22+
description="Open the input file in SWMR mode",
23+
default=False,
24+
)
25+
# Detector selection
26+
detector_ids: list[int] = Field(
27+
title="Detector IDs",
28+
description="Detector indices to process",
29+
default=[0, 1, 2],
30+
)
31+
# Chunking options
32+
iter_chunk: bool = Field(
33+
title="Iterate in Chunks",
34+
description="Whether to process the input file in chunks "
35+
" based on the hdf5 dataset chunk size. "
36+
"It is ignored if hdf5 dataset is not chunked. "
37+
"If True, it overrides chunk-size-pulse and chunk-size-events options.",
38+
default=False,
39+
)
40+
chunk_size_pulse: int = Field(
41+
title="Chunk Size Pulse",
42+
description="Number of pulses to process in each chunk. "
43+
"If 0 or negative, process all pulses at once.",
44+
default=0,
45+
)
46+
chunk_size_events: int = Field(
47+
title="Chunk Size Events",
48+
description="Number of events to process in each chunk. "
49+
"If 0 or negative, process all events at once."
50+
"If both chunk-size-pulse and chunk-size-events are set, "
51+
"chunk-size-pulse is preferred.",
52+
default=0,
53+
)
54+
55+
56+
class TimeBinUnit(enum.StrEnum):
57+
ms = 'ms'
58+
us = 'us'
59+
ns = 'ns'
60+
61+
62+
class TimeBinCoordinate(enum.StrEnum):
63+
event_time_offset = 'event_time_offset'
64+
time_of_flight = 'time_of_flight'
65+
66+
67+
class WorkflowConfig(BaseModel):
68+
# Add title of the basemodel
69+
model_config = {"title": "Workflow Configuration"}
70+
time_bin_coordinate: TimeBinCoordinate = Field(
71+
title="Time Bin Coordinate",
72+
description="Coordinate to bin the time data.",
73+
default=TimeBinCoordinate.event_time_offset,
74+
)
75+
nbins: int = Field(
76+
title="Number of Time Bins",
77+
description="Number of Time bins",
78+
default=50,
79+
)
80+
min_time_bin: int | None = Field(
81+
title="Minimum Time Bin",
82+
description="Minimum time edge of [time_bin_coordinate] in [time_bin_unit].",
83+
default=None,
84+
)
85+
max_time_bin: int | None = Field(
86+
title="Maximum Time Bin",
87+
description="Maximum time edge of [time_bin_coordinate] in [time_bin_unit].",
88+
default=None,
89+
)
90+
time_bin_unit: TimeBinUnit = Field(
91+
title="Unit of Time Bins",
92+
description="Unit of time bins.",
93+
default=TimeBinUnit.ms,
94+
)
95+
tof_lookup_table_file_path: str | None = Field(
96+
title="TOF Lookup Table File Path",
97+
description="Path to the TOF lookup table file. "
98+
"If None, the lookup table will be computed on-the-fly.",
99+
default=None,
100+
)
101+
tof_simulation_min_wavelength: float = Field(
102+
title="TOF Simulation Minimum Wavelength",
103+
description="Minimum wavelength for TOF simulation in Angstrom.",
104+
default=1.8,
105+
)
106+
tof_simulation_max_wavelength: float = Field(
107+
title="TOF Simulation Maximum Wavelength",
108+
description="Maximum wavelength for TOF simulation in Angstrom.",
109+
default=3.6,
110+
)
111+
tof_simulation_seed: int = Field(
112+
title="TOF Simulation Seed",
113+
description="Random seed for TOF simulation.",
114+
default=42, # No reason.
115+
)
116+
117+
118+
class OutputConfig(BaseModel):
119+
# Add title of the basemodel
120+
model_config = {"title": "Output Configuration"}
121+
# Log verbosity
122+
verbose: bool = Field(
123+
title="Verbose Logging",
124+
description="Increase output verbosity.",
125+
default=False,
126+
)
127+
# File output
128+
output_file: str = Field(
129+
title="Output File",
130+
description="Path to the output file.",
131+
default="scipp_output.h5",
132+
)
133+
compression: Compression = Field(
134+
title="Compression",
135+
description="Compress option of reduced output file.",
136+
default=Compression.BITSHUFFLE_LZ4,
137+
)
138+
139+
140+
class ReductionConfig(BaseModel):
141+
"""Container for all reduction configurations."""
142+
143+
inputs: InputConfig
144+
workflow: WorkflowConfig = Field(default_factory=WorkflowConfig)
145+
output: OutputConfig = Field(default_factory=OutputConfig)
146+
147+
@property
148+
def _children(self) -> list[BaseModel]:
149+
return [self.inputs, self.workflow, self.output]

0 commit comments

Comments
 (0)