@@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
4747 [ NotNull ]
4848 private readonly Dnn DnnInstance = DnnService . Instance ;
4949
50- /// <summary>
51- /// Sets the cuDNN fields that will be used during future forward/backwards operations
52- /// </summary>
50+ // cuDNN fields setup
5351 private void SetupCuDnnInfo ( )
5452 {
5553 ConvolutionDescription . Set2D ( OperationInfo . VerticalPadding , OperationInfo . HorizontalPadding , OperationInfo . VerticalStride , OperationInfo . HorizontalStride , 1 , 1 , ( Alea . cuDNN . ConvolutionMode ) OperationInfo . Mode ) ;
@@ -74,71 +72,63 @@ public CuDnnConvolutionalLayer(
7472 #region Implementation
7573
7674 /// <inheritdoc/>
77- public override unsafe void Forward ( in Tensor x , out Tensor z , out Tensor a )
75+ public override void Forward ( in Tensor x , out Tensor z , out Tensor a )
7876 {
79- fixed ( float * pw = Weights )
77+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
8078 {
81- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
82- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
79+ // Tensors info setup
80+ InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
81+ OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
82+
83+ // Forward convolution
84+ DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
85+ DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
86+ using ( DeviceMemory < float >
87+ x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
88+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
89+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
8390 {
84- // Tensors info setup
85- InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
86- OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
87-
88- // Forward convolution
89- DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
90- DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
91- using ( DeviceMemory < float >
92- x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
93- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
94- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
95- {
96- DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
97- }
91+ DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
92+ }
9893
99- // Biases
100- using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
101- {
102- DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
103- }
104- z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
94+ // Biases
95+ using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
96+ {
97+ DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
98+ }
99+ z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
105100
106- // Activation
107- if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
108- else
109- {
110- DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
111- z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
112- }
101+ // Activation
102+ if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
103+ else
104+ {
105+ DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
106+ z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
113107 }
114108 }
115109 }
116110
117111 /// <inheritdoc/>
118- public override unsafe void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
112+ public override void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
119113 {
120- fixed ( float * pw = Weights )
114+ using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
121115 {
122- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
116+ // Convolution
123117 DnnInstance . GetConvolutionBackwardDataAlgorithm ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , ConvolutionBwdDataPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionBwdDataAlgo algorithm ) ;
124118 DnnInstance . GetConvolutionBackwardDataWorkspaceSize ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , algorithm , out IntPtr size ) ;
125- using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
119+ using ( DeviceMemory < float >
120+ delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
121+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
122+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
126123 {
127- // Backwards convolution
128- using ( DeviceMemory < float >
129- delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
130- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
131- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
132- {
133- DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
134- }
124+ DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
125+ }
135126
136- // Activation
137- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
138- {
139- DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
140- z_gpu . CopyTo ( z ) ;
141- }
127+ // Activation
128+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
129+ {
130+ DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
131+ z_gpu . CopyTo ( z ) ;
142132 }
143133 }
144134 }
@@ -159,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
159149 {
160150 DnnInstance . ConvolutionBackwardFilter ( 1 , InputDescription , a_gpu . Ptr , OutputDescription , delta_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , FilterDescription , w_gpu . Ptr ) ;
161151 }
162- w_gpu . CopyToHost ( Kernels , KernelInfo . Size , out dJdw ) ;
152+ w_gpu . CopyToHost ( 1 , Weights . Length , out dJdw ) ;
163153 }
164154
165155 // Bias
0 commit comments