Skip to content

Commit 0e0b817

Browse files
MSD-966: ignore zero-seq-length records for pull_data_for_accuracy
1 parent 3a680aa commit 0e0b817

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/mostlyai/qa/sampling.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,10 @@ def pull_data_for_accuracy(
111111
df = pd.merge(df, df_nxt, on=key, how="left")
112112
df = df.drop(columns=[key])
113113

114-
# fill count columns
114+
# remove records with sequence length equal to 0
115115
count_column = f"{TGT_COLUMN_PREFIX}{COUNT_COLUMN}"
116116
df[count_column] = df[count_column].fillna(0).astype("Int64")
117+
df = df.loc[df[count_column] > 0].reset_index(drop=True)
117118

118119
if setup is None:
119120
setup = "1:1" if (df[count_column] == 1).all() else "1:N"

0 commit comments

Comments
 (0)