|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +from scipy.signal import find_peaks |
| 6 | + |
| 7 | +__all__ = ["find_cosmic_rays", "remove_cosmic_rays", "group_spectra_points"] |
| 8 | + |
| 9 | + |
| 10 | +def find_cosmic_rays( |
| 11 | + spectra: np.ndarray, ignore_region: tuple[int, int] = (200, 400), **kwargs |
| 12 | +) -> np.ndarray: |
| 13 | + """ |
| 14 | + Find the indices of cosmic rays. |
| 15 | +
|
| 16 | + Parameters |
| 17 | + ---------- |
| 18 | + spectra : (N, 1340) |
| 19 | + The arraylike of spectr to search through |
| 20 | + ignore_region : (int, int) |
| 21 | + A region to not worry about cosmic rays in. |
| 22 | + **kwargs : |
| 23 | + Passed to scipy.signal.find_peaks |
| 24 | +
|
| 25 | + Returns |
| 26 | + ------- |
| 27 | + idx : (M, 2) np.ndarray |
| 28 | + The indices of which spectra+pixels have detected cosmic rays. |
| 29 | + The first column contains which spectra, the second which pixel. |
| 30 | + """ |
| 31 | + spectra = np.atleast_2d(spectra) |
| 32 | + idx = [] |
| 33 | + min_ignore = min(ignore_region) |
| 34 | + max_ignore = max(ignore_region) |
| 35 | + threshold = kwargs.pop("threshold", 75) |
| 36 | + prominence = kwargs.pop("prominence", 100) |
| 37 | + for s, spec in enumerate(spectra): |
| 38 | + peaks, _ = find_peaks( |
| 39 | + spec, threshold=threshold, prominence=prominence, **kwargs |
| 40 | + ) |
| 41 | + for p in peaks: |
| 42 | + if min_ignore < p < max_ignore: |
| 43 | + continue |
| 44 | + idx.append((s, p)) |
| 45 | + return np.asarray(idx) |
| 46 | + |
| 47 | + |
| 48 | +def remove_cosmic_rays(df: pd.DataFrame, plot: bool = False, **kwargs) -> pd.DataFrame: |
| 49 | + """ |
| 50 | + Process a dataframe by removing all spectra with detected cosmic rays. |
| 51 | +
|
| 52 | + Parameters |
| 53 | + ---------- |
| 54 | + df : pd.DataFrame |
| 55 | + The dataframe with spectra components as the first 1340 columns |
| 56 | + plot : bool |
| 57 | + Whether to generate a plot showing which spectra were removed. |
| 58 | + **kwargs |
| 59 | + Passed to scipy.signal.find_peaks |
| 60 | +
|
| 61 | + Returns |
| 62 | + ------- |
| 63 | + pd.DataFrame |
| 64 | + The spectra dataframe with spectra with cosmic rays removed. |
| 65 | + """ |
| 66 | + spectra = df.iloc[:, :1340].values |
| 67 | + |
| 68 | + cosmic_idx = find_cosmic_rays(spectra, **kwargs) |
| 69 | + keep_idx = np.setdiff1d(np.arange(spectra.shape[0]), cosmic_idx) |
| 70 | + if plot: |
| 71 | + import matplotlib.pyplot as plt |
| 72 | + |
| 73 | + unique_cosmic, offset_idx = np.unique(cosmic_idx[:, 0], return_inverse=True) |
| 74 | + offsets = np.arange(unique_cosmic.shape[0]) * 100 |
| 75 | + fig, axs = plt.subplots(1, 2, figsize=(10, 6)) |
| 76 | + axs[0].set_title("Post Removal") |
| 77 | + axs[0].plot(spectra[keep_idx].T, alpha=0.75) |
| 78 | + |
| 79 | + axs[1].plot(spectra[unique_cosmic].T + offsets) |
| 80 | + axs[1].plot( |
| 81 | + cosmic_idx[:, 1], |
| 82 | + spectra[cosmic_idx[:, 0], cosmic_idx[:, 1]] + offsets[offset_idx], |
| 83 | + "rx", |
| 84 | + markersize=10, |
| 85 | + label="detected cosmic rays", |
| 86 | + mew=5, |
| 87 | + ) |
| 88 | + axs[1].legend() |
| 89 | + |
| 90 | + axs[1].set_title("Post removal - with offset") |
| 91 | + return df.iloc[keep_idx] |
| 92 | + |
| 93 | + |
| 94 | +def group_spectra_points(df, multiplier: int) -> pd.DataFrame: |
| 95 | + """ |
| 96 | + Add which point each spectra is from to the multiindex. |
| 97 | +
|
| 98 | + Parameters |
| 99 | + ---------- |
| 100 | + df : pd.DataFrame |
| 101 | + The raman dataframe. Should be organized as T, P, type |
| 102 | + multiplier : int |
| 103 | + How many subspectra per point |
| 104 | +
|
| 105 | + Returns |
| 106 | + ------- |
| 107 | + pd.DataFrame |
| 108 | + the original dataframe with pt_label appended to multiindex |
| 109 | + """ |
| 110 | + offset = 0 |
| 111 | + for pos, pos_df in df.groupby(level=1): |
| 112 | + for t, tp_df in pos_df.groupby(level=0): |
| 113 | + n_pts = int(len(tp_df) / multiplier) |
| 114 | + pt_labels = ( |
| 115 | + np.broadcast_to( |
| 116 | + np.arange(n_pts, dtype=int)[:, None], (n_pts, multiplier) |
| 117 | + ).ravel() |
| 118 | + + offset |
| 119 | + ) |
| 120 | + df.loc[(t, pos), "pt"] = pt_labels |
| 121 | + offset = df.loc[pos, "pt"].max() |
| 122 | + df["pt"] = df["pt"].astype(int) |
| 123 | + return df.set_index("pt", append=True) |
0 commit comments