Skip to content

Commit 979e114

Browse files
wfviningcwhanse
andauthored
Geometric clipping (#93)
* Test fixtures for new geometric clipping fixture Set up to pass pdc0 for PVWatts inverter model to generate data with clipping. 1-minute timestamp spacing can be downsampled for tests at lower frequencies. * Test that data with/without clipping is correctly identified * Register the pdc0_inverter pytest.mark pytest warns on unregistered marks to prevent typos, register this mark so we can use it safely. * Parametrize tests by data frequency Test at 1, 15, 30, and 60 minute frequencies. * Test that the correct data is flagged as clipped Also expand parametrization to include 5 minute data. * Down-sample data with frequency less than 10 minutes A different method is used to calculate the clipping threshold when data is down-sampled. * Test with "simulated" midday cloudy period * Use larger default window for tracking systems * Test passing a larger window results in no clipping detected * Test tracking=True parameter * Don't test correctness at 1-minute frequency Because some not-clipped data is very close to the clipping threshold at 1-minute timestamp spacing, creating a test that exactly captures the expected output is unreasonable. We have tests that ensures clipping is detected at that frequency if and only if it is present. * Add features.clipping.geometric to api.rst * Allow some values that are not clipped for high-frequency data Values that are very near the clipping level cannot be reliably distinguished from clipped data. * Use cythonized kernel functions Substantial performance improvements by using .transform('max') instead of .transform(lambda xs: data[xs.index][xs].max()). Some minor additional work is required to select the correct data before applying .transform() * Add test with irregular and missing data * Raise a ValueError if ac_power is not sorted because we are rolling over the integer indices, not time windows the data must be sorted. * Adjust min/max threshold to be below/above true min/max The previous approach was to round to 8 decimal places; however, this fails when both min and max are rounded up to the same value. The solution implemented here is slightly more complex, but ensures that the thresholds are adjusted in the correct direction (maximum increases if it is less than the true minimum and minimum decreases if it is greater than the true maximum). Co-authored-by: Cliff Hansen <[email protected]>
1 parent 2f7e687 commit 979e114

File tree

4 files changed

+339
-0
lines changed

4 files changed

+339
-0
lines changed

docs/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ Functions for identifying inverter clipping
188188

189189
features.clipping.levels
190190
features.clipping.threshold
191+
features.clipping.geometric
191192

192193
Clearsky
193194
--------

pvanalytics/features/clipping.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,207 @@ def threshold(ac_power, slope_max=0.0035, power_min=0.75,
227227
freq=freq
228228
)
229229
return ac_power >= threshold
230+
231+
232+
def _freq_minutes(index, freq):
233+
"""Return the frequency in minutes for `freq`. If `freq` is None
234+
then use the frequency inferred from `index`."""
235+
if freq is None:
236+
freq = pd.infer_freq(index)
237+
if freq is None:
238+
raise ValueError("cannot infer frequency")
239+
return util.freq_to_timedelta(freq).seconds / 60
240+
241+
242+
def _apply_daily_mask(mask, data, transformation):
243+
"""Apply `f` to the data selected by `mask` on each day.
244+
245+
Parameters
246+
----------
247+
mask : Series
248+
Boolean Series with same index as `data`
249+
data : Series
250+
Series with the data.
251+
transformation : str or function
252+
Any value that can be passed to ``Series.resample().transform()``.
253+
254+
Returns
255+
-------
256+
Series
257+
Series with same index as `mask` and values assigned by applying
258+
transformation to data in ``data[mask]`` on each day.
259+
"""
260+
data = data.copy()
261+
data[~mask] = np.nan
262+
return data.resample('D').transform(transformation)
263+
264+
265+
def _threshold_mean(mask, data):
266+
"""Return daily thresholds based on mean and standard deviation.
267+
268+
Parameters
269+
----------
270+
mask : Series
271+
Boolean series.
272+
data : Series
273+
Data with same index as `mask`.
274+
275+
Returns
276+
-------
277+
minimum : Series
278+
`data` transformed to the mean of ``data[mask]`` minus 2 times
279+
the standard deviation of ``data[mask]`` on each day.
280+
maximum : Series
281+
`data` transformed to the mean of ``data[mask]`` plus 2 times
282+
the standard deviation of ``data[mask]`` on each day.
283+
"""
284+
daily_mean = _apply_daily_mask(mask, data, 'mean')
285+
daily_std = _apply_daily_mask(mask, data, 'std')
286+
daily_clipped_max = daily_mean + 2 * daily_std
287+
daily_clipped_min = daily_mean - 2 * daily_std
288+
# In cases where the standard deviation is 0 (i.e. all the data is
289+
# identical) it is possible for the mean to be above the daily maximum
290+
# by a very small amount due to floating point rounding errors. To ensure
291+
# that rounding errors do not affect the final outcome we lower the daily
292+
# clipping minimum if it is greater than the maximum for that day and
293+
# raise the daily clipping maximum if it is less than the minimum for
294+
# that day.
295+
daily_min, daily_max = _threshold_minmax(mask, data)
296+
min_above_max = daily_clipped_min > daily_max
297+
max_below_min = daily_clipped_max < daily_min
298+
daily_clipped_min[min_above_max] = daily_max[min_above_max]
299+
daily_clipped_max[max_below_min] = daily_min[max_below_min]
300+
return daily_clipped_min, daily_clipped_max
301+
302+
303+
def _threshold_minmax(mask, data):
304+
"""Return daily thresholds based on min and max.
305+
306+
Parameters
307+
----------
308+
mask : Series
309+
Boolean series
310+
data : Series
311+
Data with same index as `mask`.
312+
313+
Returns
314+
-------
315+
minimum : Series
316+
`data` transformed to have the minimum value from ``data[mask]``
317+
on each day.
318+
maximum : Series
319+
`data` transformed to have the maximum value from ``data[mask]``
320+
on each day.
321+
"""
322+
daily_max = _apply_daily_mask(mask, data, 'max')
323+
daily_min = _apply_daily_mask(mask, data, 'min')
324+
return daily_min, daily_max
325+
326+
327+
def _rolling_low_slope(ac_power, window, slope_max):
328+
"""Return True for timestamps where the data has slope less
329+
than `slope_min` over a rolling window of length `window."""
330+
# Reverse the series to do a forward looking (left-labeled)
331+
# rolling max/min.
332+
rolling_max = ac_power[::-1].rolling(
333+
window=window).max().reindex_like(ac_power)
334+
rolling_min = ac_power[::-1].rolling(
335+
window=window).min().reindex_like(ac_power)
336+
# calculate an upper bound on the derivative
337+
derivative_max = ((rolling_max - rolling_min)
338+
/ ((rolling_max + rolling_min) / 2) * 100)
339+
clipped = derivative_max < slope_max
340+
clipped_windows = clipped.copy()
341+
# flag all points in a window that has clipping
342+
for i in range(0, window):
343+
clipped_windows |= clipped.shift(i)
344+
return clipped_windows
345+
346+
347+
def geometric(ac_power, window=None, slope_max=0.2, freq=None,
348+
tracking=False):
349+
"""Identify clipping based on a the shape of the `ac_power`
350+
curve on each day.
351+
352+
Each day is checked for periods where the slope of `ac_power`
353+
is small. The power values in these periods are used to calculate
354+
a minimum and a maximum clipped power level for that day. Any
355+
power values that are within this range are flagged as
356+
clipped. The methodology for computing the thresholds varies
357+
depending on the frequency of `ac_power`. For high frequency
358+
data (less than 10 minute timestamp spacing) the minimum
359+
clipped power is the mean of the low-slope period(s) on that
360+
day minus 2 times the standard deviation in the same period(s).
361+
For lower frequency data the absolute minimum and maximum of
362+
the low slope period(s) on each day are used.
363+
364+
If the frequency of `ac_power` is less than ten minutes, then
365+
`ac_power` is down-sampled to 15 minutes and the mean value in
366+
each 15-minute period is used to reduce noise inherent in
367+
high frequency data.
368+
369+
Parameters
370+
----------
371+
ac_power : Series
372+
AC power data.
373+
window : int, optional
374+
Size of the rolling window used to identify low-slope
375+
periods. If not specified and `tracking` is False then
376+
`window=3` is used. If not specified and `tracking` is
377+
True then `window=5` is used.
378+
slope_max : float, default 0.2
379+
Maximum difference in maximum and minimum power for a
380+
window to be flagged as clipped. Units are percent of
381+
average power in the interval.
382+
freq : str, optional
383+
Frequency of `ac_power`. If not specified then
384+
:py:func:`pandas.infer_freq` is used.
385+
tracking : bool, default False
386+
If True then a larger default `window` is used. If `window`
387+
is specified then `tracking` has no affect.
388+
389+
Returns
390+
-------
391+
Series
392+
Boolean Series with True for values that appear to be clipped.
393+
394+
Raises
395+
------
396+
ValueError
397+
If the index of `ac_power` is not sorted.
398+
399+
Notes
400+
-----
401+
Based on code from the PVFleets QA project.
402+
"""
403+
if not ac_power.index.is_monotonic_increasing:
404+
raise ValueError("Index must be monotonically increasing.")
405+
ac_power_original = ac_power.copy()
406+
ac_power = ac_power_original
407+
try:
408+
freq_minutes = _freq_minutes(ac_power.index, freq)
409+
except ValueError:
410+
raise ValueError("Cannot infer frequency of `ac_power`. "
411+
"Please resample or pass `freq`.")
412+
if freq_minutes < 10:
413+
ac_power = ac_power.resample('15T').mean()
414+
if window is None and tracking and freq_minutes < 30:
415+
window = 5
416+
else:
417+
window = window or 3
418+
# remove low power times to eliminate night.
419+
daily_min = ac_power.resample('D').transform('max') * 0.1
420+
ac_power.loc[ac_power < daily_min] = np.nan
421+
clipped = _rolling_low_slope(ac_power, window, slope_max)
422+
if not ac_power.index.equals(ac_power_original.index):
423+
# data was down-sampled.
424+
daily_clipped_min, daily_clipped_max = _threshold_mean(
425+
clipped.reindex_like(ac_power_original, method='ffill'),
426+
ac_power_original
427+
)
428+
else:
429+
daily_clipped_min, daily_clipped_max = _threshold_minmax(
430+
clipped, ac_power_original
431+
)
432+
return ((ac_power_original >= daily_clipped_min)
433+
& (ac_power_original <= daily_clipped_max))

pvanalytics/tests/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ def pytest_addoption(parser):
1414

1515
def pytest_configure(config):
1616
config.addinivalue_line("markers", "slow: mark test as slow to run")
17+
config.addinivalue_line("markers", "pdc0_inverter: pass inverter"
18+
"DC input limit to fixture that"
19+
"models AC power using PVWatts")
1720

1821

1922
def pytest_collection_modifyitems(config, items):

pvanalytics/tests/features/test_clipping.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from pandas.util.testing import assert_series_equal
44
import numpy as np
55
import pandas as pd
6+
from pvlib import irradiance, temperature, pvsystem, inverter
7+
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
68
from pvanalytics.features import clipping
79

810

@@ -242,3 +244,132 @@ def test_threshold_no_clipping_four_days(quadratic):
242244
clipped = clipping.threshold(power)
243245

244246
assert not clipped.any()
247+
248+
249+
@pytest.fixture(scope='module')
250+
def july():
251+
return pd.date_range(start='7/1/2020', end='8/1/2020', freq='T')
252+
253+
254+
@pytest.fixture(scope='module')
255+
def clearsky_july(july, albuquerque):
256+
return albuquerque.get_clearsky(
257+
july,
258+
model='simplified_solis'
259+
)
260+
261+
262+
@pytest.fixture(scope='module')
263+
def solarposition_july(july, albuquerque):
264+
return albuquerque.get_solarposition(july)
265+
266+
267+
@pytest.fixture
268+
def power_pvwatts(request, clearsky_july, solarposition_july):
269+
pdc0 = 100
270+
pdc0_inverter = 110
271+
tilt = 30
272+
azimuth = 180
273+
pdc0_marker = request.node.get_closest_marker("pdc0_inverter")
274+
if pdc0_marker is not None:
275+
pdc0_inverter = pdc0_marker.args[0]
276+
poa = irradiance.get_total_irradiance(
277+
tilt, azimuth,
278+
solarposition_july['zenith'], solarposition_july['azimuth'],
279+
**clearsky_july
280+
)
281+
cell_temp = temperature.sapm_cell(
282+
poa['poa_global'], 25, 0,
283+
**TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']
284+
)
285+
dc = pvsystem.pvwatts_dc(poa['poa_global'], cell_temp, pdc0, -0.004)
286+
return inverter.pvwatts(dc, pdc0_inverter)
287+
288+
289+
@pytest.mark.parametrize('freq', ['T', '5T', '15T', '30T', 'H'])
290+
def test_geometric_no_clipping(power_pvwatts, freq):
291+
clipped = clipping.geometric(power_pvwatts.resample(freq).asfreq())
292+
assert not clipped.any()
293+
294+
295+
@pytest.mark.pdc0_inverter(60)
296+
@pytest.mark.parametrize('freq', ['T', '5T', '15T', '30T', 'H'])
297+
def test_geometric_clipping(power_pvwatts, freq):
298+
clipped = clipping.geometric(power_pvwatts.resample(freq).asfreq())
299+
assert clipped.any()
300+
301+
302+
@pytest.mark.pdc0_inverter(65)
303+
@pytest.mark.parametrize('freq', ['5T', '15T', '30T', 'H'])
304+
def test_geometric_clipping_correct(power_pvwatts, freq):
305+
power = power_pvwatts.resample(freq).asfreq()
306+
clipped = clipping.geometric(power)
307+
expected = power == power.max()
308+
if freq == '5T':
309+
assert np.allclose(power[clipped], power.max(), atol=0.5)
310+
else:
311+
assert_series_equal(clipped, expected)
312+
313+
314+
@pytest.mark.pdc0_inverter(65)
315+
def test_geometric_clipping_midday_clouds(power_pvwatts):
316+
power = power_pvwatts.resample('15T').asfreq()
317+
power.loc[power.between_time(
318+
start_time='17:30', end_time='19:30',
319+
include_start=True, include_end=True
320+
).index] = list(range(30, 39)) * 31
321+
clipped = clipping.geometric(power)
322+
expected = power == power.max()
323+
assert_series_equal(clipped, expected)
324+
325+
326+
@pytest.mark.pdc0_inverter(80)
327+
def test_geometric_clipping_window(power_pvwatts):
328+
power = power_pvwatts.resample('15T').asfreq()
329+
clipped = clipping.geometric(power)
330+
assert clipped.any()
331+
clipped_window = clipping.geometric(power, window=24)
332+
assert not clipped_window.any()
333+
334+
335+
@pytest.mark.pdc0_inverter(89)
336+
def test_geometric_clipping_tracking(power_pvwatts):
337+
power = power_pvwatts.resample('15T').asfreq()
338+
clipped = clipping.geometric(power)
339+
assert clipped.any()
340+
clipped = clipping.geometric(power, tracking=True)
341+
assert not clipped.any()
342+
343+
344+
@pytest.mark.pdc0_inverter(80)
345+
def test_geometric_clipping_window_overrides_tracking(power_pvwatts):
346+
power = power_pvwatts.resample('15T').asfreq()
347+
clipped = clipping.geometric(power, tracking=True)
348+
assert clipped.any()
349+
clipped_override = clipping.geometric(power, tracking=True, window=24)
350+
assert not clipped_override.any()
351+
352+
353+
@pytest.mark.parametrize('freq', ['5T', '15T'])
354+
def test_geometric_clipping_missing_data(freq, power_pvwatts):
355+
power = power_pvwatts.resample(freq).asfreq()
356+
power.loc[power.between_time('09:00', '10:30').index] = np.nan
357+
power.loc[power.between_time('12:15', '12:45').index] = np.nan
358+
power.dropna(inplace=True)
359+
with pytest.raises(ValueError,
360+
match="Cannot infer frequency of `ac_power`. "
361+
"Please resample or pass `freq`."):
362+
clipping.geometric(power)
363+
assert not clipping.geometric(power, freq=freq).any()
364+
365+
366+
def test_geometric_index_not_sorted():
367+
power = pd.Series(
368+
[1, 2, 3],
369+
index=pd.DatetimeIndex(
370+
['20200201 0700', '20200201 0630', '20200201 0730']
371+
)
372+
)
373+
with pytest.raises(ValueError,
374+
match=r"Index must be monotonically increasing\."):
375+
clipping.geometric(power, freq='30T')

0 commit comments

Comments
 (0)