Changes in the mayFreeIoBlobs criterion. (#1126)

slon872 · web-flow · commit bb6559a09203 · 2025-05-03T22:51:19.000+02:00
First off all, it was rewritten as function to simplify understanding.
But this is not the main change.
The main is that `!IsRecurrentMode() || IsLastSequencePos()` subcriteron is combined
with others using `and` instead of `or`. As a result, the inputs of an incomplete
recurrent layer will not be dropped until the recurrent processing completes.

Found when monitoring allocations: each call of `runOnce` inside recursion was
allocating output blob.

Signed-off-by: slon872 &lt;30573417+slon872@users.noreply.github.com&gt;
diff --git a/NeoML/include/NeoML/Dnn/Dnn.h b/NeoML/include/NeoML/Dnn/Dnn.h
@@ -125,8 +125,8 @@ struct CDnnLayerLink final {
 
 //------------------------------------------------------------------------------------------------------------
 
-// CBaseLayer is the base class for all layers with which the network can function. 
-// Each layer has a string name that should be unique in the network. Each layer may have 
+// CBaseLayer is the base class for all layers with which the network can function.
+// Each layer has a string name that should be unique in the network. Each layer may have
 // one or several inputs and one or several outputs.
 class NEOML_API CBaseLayer : public virtual IObject {
 public:
@@ -138,7 +138,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	// The current network (described by a CDnn class) to which the layer belongs
 	// While a layer is connected to a network, you may not change its basic configuration,
 	// such as its name, the list of inputs, the size of a convolution window, etc.
-	// While a layer belongs to a network, only the settings like input blob size 
+	// While a layer belongs to a network, only the settings like input blob size
 	// or function coefficients (for example, during training) may be changed
 	const CDnn* GetDnn() const { return dnn; }
 	CDnn* GetDnn() { return dnn; }
@@ -181,7 +181,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	void EnableLearning();
 	bool IsLearningEnabled() const { return isLearningEnabled; }
 
-	// Base learning rate (the learning strategy may change 
+	// Base learning rate (the learning strategy may change
 	// the relative learning rates inside the network, but the base rate stays the same)
 	float GetBaseLearningRate() const { return baseLearningRate; }
 	void SetBaseLearningRate( float rate ) { baseLearningRate = rate; }
@@ -198,11 +198,11 @@ class NEOML_API CBaseLayer : public virtual IObject {
 
 	// Begins processing a new sequence
 	// The method is overloaded for the composite layer and the backward link layer
-	virtual void RestartSequence() {} 
+	virtual void RestartSequence() {}
 
 	void Serialize(CArchive& archive) override;
 
-	// Indicates that backpropagation should be performed for the layer 
+	// Indicates that backpropagation should be performed for the layer
 	// even if there are no trainable layers before it
 	bool GetBackwardForced() const { return isBackwardForced; }
 	void SetBackwardForced(bool forced);
@@ -432,6 +432,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	void reshape();
 	void setInputDesc(int i);
 	void runOnce();
+	bool mayFreeIoBlobs() const;
 	void recheckBackwardNeeded();
 	void backwardRunAndLearnOnce();
 	void transferDiffBlob( CDnnBlob* diffBlob, int outputNum );
diff --git a/NeoML/src/Dnn/BaseLayer.cpp b/NeoML/src/Dnn/BaseLayer.cpp
@@ -405,7 +405,7 @@ void CBaseLayer::reshape()
 	CArray<CBlobDesc> prevInputDescs;
 	inputDescs.MoveTo( prevInputDescs );
 	inputDescs.SetSize(inputs.Size());
-	
+
 	// Call the input layers reshape recursively, reset the input blobs
 	for( int i = 0; i < GetInputCount(); ++i ) {
 		GetInputLayer(i)->reshape();
@@ -420,7 +420,7 @@ void CBaseLayer::reshape()
 
 	if(!forcedReshape) {
 		for(int i = 0; i < inputBlobs.Size(); i++) {
-			forcedReshape = forcedReshape 
+			forcedReshape = forcedReshape
 				|| !inputDescs[i].HasEqualDimensions(prevInputDescs[i]);
 		}
 	}
@@ -529,11 +529,7 @@ void CBaseLayer::runOnce()
 		inputBlobs[i] = prevLayerOutput;
 	}
 
-	const bool mayFreeIoBlobs = GetDnn()->isReuseMemoryMode
-		&& ( !GetDnn()->isBackwardPerformed || !GetDnn()->IsRecurrentMode() || GetDnn()->IsLastSequencePos()
-			|| ( ( blobsNeededForBackward & TInputBlobs ) == 0 && ( !isInPlace || ( blobsNeededForBackward & TOutputBlobs ) == 0 ) ) );
-
-	if( mayFreeIoBlobs ) {
+	if( mayFreeIoBlobs() ) {
 		for( int i = 0; i < inputBlobs.Size(); ++i ) {
 			CBaseLayer* inputLayer = GetInputLayer( i );
 			const int outputNumber = inputs[i].OutputNumber;
@@ -564,6 +560,39 @@ void CBaseLayer::runOnce()
 	}
 }
 
+// Checks if output blobs of input layers can be discarded.
+bool CBaseLayer::mayFreeIoBlobs() const
+{
+	assert( dnn != nullptr );
+
+	if( !dnn->isReuseMemoryMode ) {
+		// Memory reuse turned off.
+		return false;
+	}
+
+	if( dnn->IsRecurrentMode() && !dnn->IsLastSequencePos() ) {
+		// Recurrent layer processing is incomplete.
+		return false;
+	}
+
+	if( !dnn->isBackwardPerformed ) {
+		// Inference mode, intermediate data is not required.
+		return true;
+	}
+
+	if( (blobsNeededForBackward & TInputBlobs) != 0 ) {
+		// Input blobs are required for back propagation.
+		return false;
+	}
+
+	if( isInPlace && (blobsNeededForBackward & TOutputBlobs) != 0 ) {
+		// Output blobs are required for back propagation and they are the same as input.
+		return false;
+	}
+
+	return true;
+}
+
 // Recalculates the isBackwardNeeded flag; recursively checks the inputs
 void CBaseLayer::recheckBackwardNeeded()
 {
@@ -625,7 +654,7 @@ void CBaseLayer::backwardRunAndLearnOnce()
 			}
 		}
 
-		// Perform one step of error backward propagation: 
+		// Perform one step of error backward propagation:
 		// calculate the input error from the output one
 		BackwardOnce();
 	}
@@ -647,7 +676,7 @@ void CBaseLayer::backwardRunAndLearnOnce()
 			paramDiffBlobs.DeleteAll();
 		}
 	}
-	
+
 	outputDiffBlobs.DeleteAll();
 
 	if( IsBackwardPerformed() ) {
@@ -681,7 +710,7 @@ void CBaseLayer::backwardRunAndLearnOnce()
 }
 
 // Handles the notification that output diff is ready for a given output
-// If that is the last output diff necessary for learning, 
+// If that is the last output diff necessary for learning,
 // backpropagation and learning are started for this layer
 void CBaseLayer::transferDiffBlob( CDnnBlob* diffBlob, int outputNum )
 {