Minor refactoring and bug fixes

Sergio0694 · Sergio0694 · commit 415ffe16fbb9 · 2018-02-01T20:24:58.000+01:00
diff --git a/NeuralNetwork.NET/APIs/NetworkManager.cs b/NeuralNetwork.NET/APIs/NetworkManager.cs
@@ -8,7 +8,6 @@
 using NeuralNetworkNET.APIs.Interfaces;
 using NeuralNetworkNET.APIs.Interfaces.Data;
 using NeuralNetworkNET.APIs.Results;
-using NeuralNetworkNET.APIs.Settings;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Networks.Graph;
@@ -60,6 +59,11 @@ public static INeuralNetwork NewGraph(TensorInfo input, [NotNull] Action<NodeBui
 
         #region Training APIs
 
+        /// <summary>
+        /// Gets whether or not a neural network is currently being trained
+        /// </summary>
+        public static bool TrainingInProgress { get; private set; }
+
         /// <summary>
         /// Trains a neural network with the given parameters
         /// </summary>
@@ -148,7 +152,7 @@ private static TrainingSessionResult TrainNetworkCore(
                 throw new ArgumentException("The input dataset doesn't match the number of input and output features for the current network", nameof(dataset));
 
             // Start the training
-            NetworkSettings.TrainingInProgress = NetworkSettings.TrainingInProgress
+            TrainingInProgress = TrainingInProgress
                 ? throw new InvalidOperationException("Can't train two networks at the same time") // This would cause problems with cuDNN
                 : true;
             TrainingSessionResult result = NetworkTrainer.TrainNetwork(
@@ -158,7 +162,7 @@ private static TrainingSessionResult TrainNetworkCore(
                 validationDataset as ValidationDataset,
                 testDataset as TestDataset,
                 token);
-            NetworkSettings.TrainingInProgress = false;
+            TrainingInProgress = false;
             return result;
         }
     }
diff --git a/NeuralNetwork.NET/APIs/Settings/NetworkSettings.cs b/NeuralNetwork.NET/APIs/Settings/NetworkSettings.cs
@@ -35,15 +35,5 @@ public static AccuracyTester AccuracyTester
             get => _AccuracyTester;
             set => _AccuracyTester = value ?? throw new ArgumentNullException(nameof(AccuracyTester), "The input delegate can't be null");
         }
-
-        /// <summary>
-        /// Gets whether or not a neural network is currently being trained
-        /// </summary>
-        public static bool TrainingInProgress { get; internal set; }
-
-        /// <summary>
-        /// Gets whether or not a neural network is currently processing the training samples through backpropagation (as opposed to evaluating them)
-        /// </summary>
-        internal static bool BackpropagationInProgress { get; set; }
     }
 }
diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs
@@ -95,8 +95,8 @@ private Tensor(IntPtr ptr, int entities, int length)
         /// <summary>
         /// Creates a new instance with the specified shape
         /// </summary>
-        /// <param name="n">The height of the matrix</param>
-        /// <param name="chw">The width of the matrix</param>
+        /// <param name="n">The height of the <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void New(int n, int chw, out Tensor tensor)
@@ -108,8 +108,8 @@ public static void New(int n, int chw, out Tensor tensor)
         /// <summary>
         /// Creates a new instance with the specified shape and initializes the allocated memory to 0s
         /// </summary>
-        /// <param name="n">The height of the matrix</param>
-        /// <param name="chw">The width of the matrix</param>
+        /// <param name="n">The height of the <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void NewZeroed(int n, int chw, out Tensor tensor)
@@ -124,8 +124,8 @@ public static unsafe void NewZeroed(int n, int chw, out Tensor tensor)
         /// Creates a new instance by wrapping the input pointer
         /// </summary>
         /// <param name="p">The target memory area</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void Reshape(float* p, int n, int chw, out Tensor tensor)
@@ -152,8 +152,8 @@ public static unsafe void Reshape(float* p, int n, int chw, out Tensor tensor)
         /// Creates a new instance by copying the contents at the given memory location and reshaping it to the desired size
         /// </summary>
         /// <param name="p">The target memory area to copy</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void From(float* p, int n, int chw, out Tensor tensor)
@@ -179,8 +179,8 @@ public static unsafe void From([NotNull] float[,] m, out Tensor tensor)
         /// Creates a new instance by copying the contents of the input vector and reshaping it to the desired size
         /// </summary>
         /// <param name="v">The input vector to copy</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void From([NotNull] float[] v, int n, int chw, out Tensor tensor)
@@ -197,8 +197,8 @@ public static unsafe void From([NotNull] float[] v, int n, int chw, out Tensor t
         /// <summary>
         /// Creates a new instance by wrapping the current memory area
         /// </summary>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void Reshape(int n, int chw, out Tensor tensor)
@@ -223,19 +223,31 @@ public void Reshape(int n, int chw, out Tensor tensor)
         public bool MatchShape(int entities, int length) => Entities == entities && Length == length;
 
         /// <summary>
-        /// Overwrites the contents of the current matrix with the input matrix
+        /// Overwrites the contents of the current instance with the input <see cref="Tensor"/>
         /// </summary>
         /// <param name="tensor">The input <see cref="Tensor"/> to copy</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public unsafe void Overwrite(in Tensor tensor)
         {
-            if (tensor.Entities != Entities || tensor.Length != Length) throw new ArgumentException("The input matrix doesn't have the same size as the target");
+            if (tensor.Entities != Entities || tensor.Length != Length) throw new ArgumentException("The input tensor doesn't have the same size as the target");
             int bytes = sizeof(float) * Size;
             Buffer.MemoryCopy(tensor, this, bytes, bytes);
         }
 
         /// <summary>
-        /// Duplicates the current instance to an output <see cref="Tensor"/> matrix
+        /// Overwrites the contents of the current <see cref="Tensor"/> with the input array
+        /// </summary>
+        /// <param name="array">The input array to copy</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe void Overwrite([NotNull] float[] array)
+        {
+            if (array.Length != Size) throw new ArgumentException("The input array doesn't have the same size as the target");
+            int bytes = sizeof(float) * Size;
+            fixed (float* p = array) Buffer.MemoryCopy(p, this, bytes, bytes);
+        }
+
+        /// <summary>
+        /// Duplicates the current instance to an output <see cref="Tensor"/>
         /// </summary>
         /// <param name="tensor">The output tensor</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
diff --git a/NeuralNetwork.NET/Networks/Layers/Abstract/BatchNormalizationLayerBase.cs b/NeuralNetwork.NET/Networks/Layers/Abstract/BatchNormalizationLayerBase.cs
@@ -2,10 +2,10 @@
 using System.IO;
 using JetBrains.Annotations;
 using NeuralNetworkNET.APIs.Enums;
-using NeuralNetworkNET.APIs.Settings;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Networks.Layers.Initialization;
+using NeuralNetworkNET.SupervisedLearning.Optimization;
 using Newtonsoft.Json;
 
 namespace NeuralNetworkNET.Networks.Layers.Abstract
@@ -21,13 +21,13 @@ internal abstract class BatchNormalizationLayerBase : WeightedLayerBase
         /// The cached mu tensor
         /// </summary>
         [NotNull]
-        protected float[] Mu;
+        public float[] Mu { get; }
 
         /// <summary>
         /// The cached sigma^2 tensor
         /// </summary>
         [NotNull]
-        protected readonly float[] Sigma2;
+        public float[] Sigma2 { get; }
 
         // The current iteration number (for the Cumulative Moving Average)
         private int _Iteration;
@@ -60,6 +60,7 @@ protected BatchNormalizationLayerBase(in TensorInfo shape, NormalizationMode mod
                     break;
                 default: throw new ArgumentOutOfRangeException("Invalid batch normalization mode");
             }
+            Sigma2.AsSpan().Fill(1);
             NormalizationMode = mode;
         }
 
@@ -80,7 +81,7 @@ protected BatchNormalizationLayerBase(in TensorInfo shape, NormalizationMode mod
         /// <inheritdoc/>
         public override void Forward(in Tensor x, out Tensor z, out Tensor a)
         {
-            if (NetworkSettings.BackpropagationInProgress) ForwardTraining(1f / (1 + _Iteration++), x, out z, out a);
+            if (NetworkTrainer.BackpropagationInProgress) ForwardTraining(1f / (1 + _Iteration++), x, out z, out a);
             else ForwardInference(x, out z, out a);
         }
 
diff --git a/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs b/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs
@@ -5,7 +5,6 @@
 using NeuralNetworkNET.APIs.Enums;
 using NeuralNetworkNET.APIs.Interfaces;
 using NeuralNetworkNET.APIs.Results;
-using NeuralNetworkNET.APIs.Settings;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Networks.Implementations;
 using NeuralNetworkNET.Services;
@@ -83,6 +82,21 @@ public static TrainingSessionResult TrainNetwork(
             return Optimize(network, batches, epochs, dropout, optimizer, batchProgress, trainingProgress, validationDataset, testDataset, token);
         }
 
+        /// <summary>
+        /// Gets whether or not a neural network is currently processing the training samples through backpropagation (as opposed to evaluating them)
+        /// </summary>
+        public static bool BackpropagationInProgress
+        {
+            get;
+
+            // Switch from private to internal in DEBUG mode to allow for external handling in the Unit tests
+            #if DEBUG
+            set;
+            #else
+            private set;
+            #endif
+        }
+
         /// <summary>
         /// Trains the target <see cref="SequentialNetwork"/> using the input algorithm
         /// </summary>
@@ -123,18 +137,18 @@ TrainingSessionResult PrepareResult(TrainingStopReason reason, int loops)
                 miniBatches.CrossShuffle();
 
                 // Gradient descent over the current batches
-                NetworkSettings.BackpropagationInProgress = true;
+                BackpropagationInProgress = true;
                 for (int j = 0; j < miniBatches.BatchesCount; j++)
                 {
                     if (token.IsCancellationRequested)
                     {
-                        NetworkSettings.BackpropagationInProgress = true;
+                        BackpropagationInProgress = false;
                         return PrepareResult(TrainingStopReason.TrainingCanceled, i);
                     }
                     network.Backpropagate(miniBatches.Batches[j], dropout, updater);
                     batchMonitor?.NotifyCompletedBatch(miniBatches.Batches[j].X.GetLength(0));
                 }
-                NetworkSettings.BackpropagationInProgress = true;
+                BackpropagationInProgress = false;
                 batchMonitor?.Reset();
                 if (network.IsInNumericOverflow) return PrepareResult(TrainingStopReason.NumericOverflow, i);
 
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
@@ -1,14 +1,15 @@
-﻿using JetBrains.Annotations;
+﻿using System;
+using JetBrains.Annotations;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 using NeuralNetworkNET.APIs.Enums;
-using NeuralNetworkNET.APIs.Settings;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.Networks.Layers.Abstract;
 using NeuralNetworkNET.Networks.Layers.Cpu;
 using NeuralNetworkNET.Networks.Layers.Cuda;
 using NeuralNetworkNET.Networks.Layers.Initialization;
+using NeuralNetworkNET.SupervisedLearning.Optimization;
 
 namespace NeuralNetworkNET.Cuda.Unit
 {
@@ -47,7 +48,7 @@ private static void TestForward(NetworkLayerBase cpu, NetworkLayerBase gpu, int
 
         private static void TestBackward(WeightedLayerBase cpu, WeightedLayerBase gpu, int samples)
         {
-            NetworkSettings.TrainingInProgress = true;
+            NetworkTrainer.BackpropagationInProgress = true;
             Tensor
                 x = CreateRandomTensor(samples, cpu.InputInfo.Size),
                 dy = CreateRandomTensor(samples, cpu.OutputInfo.Size);
@@ -61,12 +62,12 @@ private static void TestBackward(WeightedLayerBase cpu, WeightedLayerBase gpu, i
             Assert.IsTrue(dJdw_cpu.ContentEquals(dJdw_gpu, 1e-4f, 1e-5f));
             Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f)); // The cuDNN ConvolutionBackwardBias is not always as precise as the CPU version
             Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, z_gpu, a_gpu, dJdw_cpu, dJdb_cpu, dJdw_gpu, dJdb_gpu);
-            NetworkSettings.TrainingInProgress = false;
+            NetworkTrainer.BackpropagationInProgress = false;
         }
 
         private static unsafe void TestBackward(OutputLayerBase cpu, OutputLayerBase gpu, float[,] y)
         {
-            NetworkSettings.TrainingInProgress = true;
+            NetworkTrainer.BackpropagationInProgress = true;
             int n = y.GetLength(0);
             fixed (float* p = y)
             {
@@ -85,7 +86,7 @@ private static unsafe void TestBackward(OutputLayerBase cpu, OutputLayerBase gpu
                 Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f));
                 Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, z_gpu, a_gpu, dJdw_cpu, dJdw_gpu, dJdb_cpu, dJdb_gpu);
             }
-            NetworkSettings.TrainingInProgress = false;
+            NetworkTrainer.BackpropagationInProgress = false;
         }
 
         #endregion
@@ -164,36 +165,36 @@ public void ConvolutionBackward()
         [TestMethod]
         public void PerActivationBatchNormalizationForward()
         {
-            WeightedLayerBase
+            BatchNormalizationLayerBase
                 cpu = new BatchNormalizationLayer(TensorInfo.Linear(250), NormalizationMode.PerActivation, ActivationType.ReLU),
-                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, new float[250], new float[250], cpu.ActivationType);
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
             TestForward(cpu, gpu, 400);
         }
 
         [TestMethod]
         public void PerActivationBatchNormalizationBackward()
         {
-            WeightedLayerBase
+            BatchNormalizationLayerBase
                 cpu = new BatchNormalizationLayer(TensorInfo.Linear(250), NormalizationMode.PerActivation, ActivationType.ReLU),
-                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, new float[250], new float[250], cpu.ActivationType);
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
             TestBackward(cpu, gpu, 400);
         }
 
         [TestMethod]
         public void SpatialBatchNormalizationForward()
         {
-            WeightedLayerBase
+            BatchNormalizationLayerBase
                 cpu = new BatchNormalizationLayer(TensorInfo.Volume(12, 12, 13), NormalizationMode.Spatial, ActivationType.ReLU),
-                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, new float[13], new float[13], cpu.ActivationType);
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
             TestForward(cpu, gpu, 400);
         }
 
         [TestMethod]
         public void SpatialBatchNormalizationBackward()
         {
-            WeightedLayerBase
+            BatchNormalizationLayerBase
                 cpu = new BatchNormalizationLayer(TensorInfo.Volume(12, 12, 13), NormalizationMode.Spatial, ActivationType.ReLU),
-                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, new float[13], new float[13], cpu.ActivationType);
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
             TestBackward(cpu, gpu, 400);
         }
 

Original file line number	Diff line number	Diff line change
`@@ -35,15 +35,5 @@ public static AccuracyTester AccuracyTester`
`35`	`35`	`get => _AccuracyTester;`
`36`	`36`	`set => _AccuracyTester = value ?? throw new ArgumentNullException(nameof(AccuracyTester), "The input delegate can't be null");`
`37`	`37`	`}`
`38`		`-`
`39`		`- /// <summary>`
`40`		`- /// Gets whether or not a neural network is currently being trained`
`41`		`- /// </summary>`
`42`		`- public static bool TrainingInProgress { get; internal set; }`
`43`		`-`
`44`		`- /// <summary>`
`45`		`- /// Gets whether or not a neural network is currently processing the training samples through backpropagation (as opposed to evaluating them)`
`46`		`- /// </summary>`
`47`		`- internal static bool BackpropagationInProgress { get; set; }`
`48`	`38`	`}`
`49`	`39`	`}`