diff --git a/experiments/SELDnet23_inference_on_rooms.md b/experiments/SELDnet23_inference_on_rooms.md new file mode 100644 index 0000000..5a74fbb --- /dev/null +++ b/experiments/SELDnet23_inference_on_rooms.md @@ -0,0 +1,467 @@ +A model trained with the instructions and data specified [here](https://github.com/sharathadavanne/seld-dcase2023) has the following performance metrics when used to run inference on SpatialScaper data: + +# FOA + +## BOMB SHELTER +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.51 [0.50, 0.52] +SED metrics: Error rate: 0.74 [0.72, 0.76], F-score: 31.8 [30.34, 33.32] +DOA metrics: Localization error: 22.9 [21.86 , 23.84], Localization Recall: 52.2 [50.68, 53.69] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.74 [0.72, 0.76] 0.39 [0.33, 0.45] 20.30 [18.64, 22.03] 0.61 [0.56, 0.68] 0.46 [0.43, 0.49] +1 0.74 [0.72, 0.76] 0.33 [0.27, 0.40] 20.00 [17.67, 22.33] 0.51 [0.44, 0.58] 0.50 [0.47, 0.53] +2 0.74 [0.72, 0.76] 0.27 [0.20, 0.34] 22.66 [20.26, 25.05] 0.52 [0.45, 0.60] 0.52 [0.48, 0.55] +3 0.74 [0.72, 0.76] 0.42 [0.36, 0.48] 22.20 [19.18, 25.18] 0.56 [0.50, 0.61] 0.47 [0.44, 0.50] +4 0.74 [0.72, 0.76] 0.37 [0.32, 0.43] 22.04 [18.07, 26.08] 0.62 [0.55, 0.69] 0.47 [0.43, 0.50] +5 0.74 [0.72, 0.76] 0.37 [0.32, 0.42] 23.11 [20.01, 25.92] 0.67 [0.61, 0.74] 0.46 [0.43, 0.48] +6 0.74 [0.72, 0.76] 0.33 [0.26, 0.39] 21.75 [19.27, 24.21] 0.53 [0.47, 0.59] 0.50 [0.47, 0.53] +7 0.74 [0.72, 0.76] 0.16 [0.11, 0.22] 23.24 [15.82, 30.52] 0.17 [0.12, 0.21] 0.63 [0.60, 0.66] +8 0.74 [0.72, 0.76] 0.26 [0.21, 0.32] 23.41 [20.89, 25.97] 0.39 [0.32, 0.46] 0.55 [0.52, 0.58] +9 0.74 [0.72, 0.76] 0.22 [0.18, 0.27] 31.59 [28.00, 34.78] 0.78 [0.73, 0.83] 0.48 [0.45, 0.50] +10 0.74 [0.72, 0.76] 0.34 [0.27, 0.41] 18.45 [16.76, 20.12] 0.37 [0.29, 0.43] 0.53 [0.50, 0.57] +11 0.74 [0.72, 0.76] 0.36 [0.29, 0.42] 26.27 [22.84, 30.06] 0.59 [0.53, 0.65] 0.48 [0.46, 0.51] +12 0.74 [0.72, 0.76] 0.31 [0.24, 0.37] 22.22 [19.31, 25.09] 0.47 [0.40, 0.54] 0.52 [0.49, 0.55] + +## GYM +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.48 [0.47, 0.49] +SED metrics: Error rate: 0.71 [0.69, 0.73], F-score: 36.7 [34.98, 38.59] +DOA metrics: Localization error: 20.6 [19.73 , 21.41], Localization Recall: 54.6 [53.15, 56.11] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.71 [0.69, 0.73] 0.45 [0.40, 0.51] 19.36 [16.84, 21.63] 0.60 [0.53, 0.67] 0.44 [0.41, 0.47] +1 0.71 [0.69, 0.73] 0.34 [0.27, 0.41] 18.32 [15.84, 20.70] 0.50 [0.41, 0.59] 0.49 [0.45, 0.53] +2 0.71 [0.69, 0.73] 0.40 [0.34, 0.46] 20.28 [18.33, 22.22] 0.60 [0.52, 0.67] 0.46 [0.43, 0.49] +3 0.71 [0.69, 0.73] 0.45 [0.38, 0.51] 19.68 [17.43, 21.86] 0.61 [0.56, 0.68] 0.44 [0.41, 0.47] +4 0.71 [0.69, 0.73] 0.44 [0.39, 0.50] 17.86 [15.34, 20.33] 0.58 [0.52, 0.64] 0.45 [0.42, 0.47] +5 0.71 [0.69, 0.73] 0.39 [0.34, 0.44] 22.44 [18.78, 26.07] 0.71 [0.67, 0.76] 0.43 [0.41, 0.46] +6 0.71 [0.69, 0.73] 0.46 [0.39, 0.51] 16.91 [15.62, 18.82] 0.61 [0.54, 0.67] 0.44 [0.41, 0.47] +7 0.71 [0.69, 0.73] 0.14 [0.09, 0.19] 24.60 [19.98, 29.28] 0.17 [0.11, 0.23] 0.63 [0.61, 0.66] +8 0.71 [0.69, 0.73] 0.35 [0.29, 0.42] 16.65 [14.44, 18.87] 0.36 [0.30, 0.43] 0.52 [0.49, 0.55] +9 0.71 [0.69, 0.73] 0.24 [0.20, 0.28] 26.83 [23.28, 30.02] 0.78 [0.73, 0.83] 0.46 [0.44, 0.48] +10 0.71 [0.69, 0.73] 0.38 [0.31, 0.45] 17.15 [15.63, 18.70] 0.38 [0.31, 0.45] 0.51 [0.48, 0.55] +11 0.71 [0.69, 0.73] 0.34 [0.28, 0.41] 26.89 [22.34, 31.54] 0.58 [0.52, 0.64] 0.49 [0.46, 0.52] +12 0.71 [0.69, 0.73] 0.39 [0.33, 0.46] 20.52 [17.66, 23.30] 0.61 [0.55, 0.67] 0.46 [0.42, 0.49] + +## PB132 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.55 [0.54, 0.56] +SED metrics: Error rate: 0.80 [0.78, 0.82], F-score: 23.7 [21.98, 25.41] +DOA metrics: Localization error: 27.1 [25.96 , 28.14], Localization Recall: 52.1 [50.27, 53.97] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.80 [0.78, 0.82] 0.35 [0.28, 0.41] 23.23 [19.92, 26.55] 0.61 [0.54, 0.68] 0.49 [0.46, 0.53] +1 0.80 [0.78, 0.82] 0.23 [0.18, 0.28] 26.02 [22.64, 29.33] 0.48 [0.40, 0.55] 0.56 [0.53, 0.59] +2 0.80 [0.78, 0.82] 0.21 [0.16, 0.26] 28.42 [25.93, 30.89] 0.58 [0.51, 0.64] 0.54 [0.52, 0.57] +3 0.80 [0.78, 0.82] 0.28 [0.22, 0.34] 25.65 [21.22, 30.04] 0.54 [0.46, 0.61] 0.53 [0.50, 0.56] +4 0.80 [0.78, 0.82] 0.32 [0.27, 0.37] 22.64 [20.48, 24.81] 0.60 [0.54, 0.66] 0.50 [0.48, 0.53] +5 0.80 [0.78, 0.82] 0.27 [0.22, 0.32] 28.29 [24.65, 32.00] 0.67 [0.61, 0.72] 0.51 [0.48, 0.53] +6 0.80 [0.78, 0.82] 0.28 [0.22, 0.35] 23.86 [21.50, 26.19] 0.61 [0.55, 0.68] 0.51 [0.48, 0.54] +7 0.80 [0.78, 0.82] 0.07 [0.03, 0.12] 30.31 [24.17, 36.09] 0.15 [0.10, 0.21] 0.69 [0.66, 0.71] +8 0.80 [0.78, 0.82] 0.19 [0.13, 0.24] 27.96 [25.24, 30.56] 0.40 [0.33, 0.47] 0.59 [0.56, 0.62] +9 0.80 [0.78, 0.82] 0.14 [0.10, 0.17] 37.65 [32.63, 42.67] 0.75 [0.69, 0.80] 0.53 [0.51, 0.56] +10 0.80 [0.78, 0.82] 0.31 [0.25, 0.37] 19.86 [17.60, 22.12] 0.39 [0.32, 0.46] 0.55 [0.52, 0.58] +11 0.80 [0.78, 0.82] 0.20 [0.13, 0.26] 31.87 [27.22, 36.51] 0.52 [0.46, 0.59] 0.56 [0.53, 0.60] +12 0.80 [0.78, 0.82] 0.24 [0.17, 0.31] 26.15 [22.50, 29.73] 0.48 [0.40, 0.55] 0.56 [0.52, 0.59] + +## PC226 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.53 [0.52, 0.54] +SED metrics: Error rate: 0.76 [0.74, 0.78], F-score: 25.8 [24.17, 27.44] +DOA metrics: Localization error: 25.6 [24.55 , 26.74], Localization Recall: 52.3 [50.84, 53.85] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.76 [0.74, 0.78] 0.28 [0.21, 0.34] 24.78 [20.77, 28.91] 0.56 [0.49, 0.65] 0.52 [0.48, 0.55] +1 0.76 [0.74, 0.78] 0.25 [0.19, 0.30] 22.95 [20.49, 25.41] 0.45 [0.36, 0.53] 0.55 [0.52, 0.58] +2 0.76 [0.74, 0.78] 0.23 [0.17, 0.29] 28.03 [24.94, 31.13] 0.60 [0.53, 0.67] 0.52 [0.49, 0.55] +3 0.76 [0.74, 0.78] 0.24 [0.18, 0.29] 28.29 [25.16, 31.37] 0.49 [0.43, 0.56] 0.55 [0.52, 0.57] +4 0.76 [0.74, 0.78] 0.33 [0.28, 0.37] 24.43 [21.84, 27.09] 0.63 [0.58, 0.67] 0.49 [0.47, 0.51] +5 0.76 [0.74, 0.78] 0.30 [0.25, 0.36] 23.81 [21.18, 26.38] 0.66 [0.60, 0.73] 0.48 [0.45, 0.51] +6 0.76 [0.74, 0.78] 0.38 [0.33, 0.43] 21.24 [19.55, 22.97] 0.64 [0.58, 0.70] 0.47 [0.44, 0.49] +7 0.76 [0.74, 0.78] 0.10 [0.04, 0.15] 25.82 [19.46, 31.95] 0.15 [0.08, 0.22] 0.67 [0.63, 0.70] +8 0.76 [0.74, 0.78] 0.28 [0.22, 0.34] 23.80 [19.94, 27.88] 0.36 [0.29, 0.43] 0.56 [0.53, 0.60] +9 0.76 [0.74, 0.78] 0.19 [0.15, 0.24] 30.61 [25.82, 35.39] 0.79 [0.73, 0.85] 0.49 [0.46, 0.51] +10 0.76 [0.74, 0.78] 0.27 [0.21, 0.33] 23.98 [16.87, 31.23] 0.34 [0.28, 0.40] 0.57 [0.54, 0.60] +11 0.76 [0.74, 0.78] 0.24 [0.19, 0.29] 30.68 [25.98, 35.30] 0.61 [0.55, 0.68] 0.52 [0.49, 0.55] +12 0.76 [0.74, 0.78] 0.28 [0.22, 0.33] 24.88 [22.27, 27.47] 0.52 [0.45, 0.59] 0.53 [0.50, 0.56] + +## SA203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.56 [0.55, 0.57] +SED metrics: Error rate: 0.81 [0.79, 0.83], F-score: 23.1 [21.36, 24.64] +DOA metrics: Localization error: 27.6 [26.46 , 28.81], Localization Recall: 50.6 [48.63, 52.24] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.81 [0.79, 0.83] 0.33 [0.26, 0.40] 24.52 [20.73, 28.08] 0.52 [0.46, 0.59] 0.52 [0.49, 0.56] +1 0.81 [0.79, 0.83] 0.15 [0.09, 0.21] 29.05 [25.14, 32.97] 0.43 [0.34, 0.51] 0.60 [0.57, 0.63] +2 0.81 [0.79, 0.83] 0.23 [0.17, 0.28] 27.60 [24.25, 30.86] 0.58 [0.50, 0.65] 0.54 [0.51, 0.57] +3 0.81 [0.79, 0.83] 0.32 [0.24, 0.38] 28.32 [24.66, 32.19] 0.63 [0.57, 0.69] 0.50 [0.48, 0.54] +4 0.81 [0.79, 0.83] 0.29 [0.22, 0.35] 25.10 [22.36, 27.89] 0.59 [0.52, 0.67] 0.52 [0.49, 0.55] +5 0.81 [0.79, 0.83] 0.19 [0.15, 0.23] 33.87 [28.98, 38.82] 0.71 [0.64, 0.76] 0.52 [0.50, 0.55] +6 0.81 [0.79, 0.83] 0.32 [0.25, 0.39] 23.61 [21.42, 25.82] 0.60 [0.54, 0.66] 0.50 [0.47, 0.54] +7 0.81 [0.79, 0.83] 0.12 [0.07, 0.17] 22.46 [17.62, 27.27] 0.12 [0.08, 0.17] 0.67 [0.65, 0.70] +8 0.81 [0.79, 0.83] 0.15 [0.10, 0.21] 28.44 [23.85, 33.09] 0.26 [0.20, 0.32] 0.64 [0.61, 0.67] +9 0.81 [0.79, 0.83] 0.15 [0.12, 0.19] 31.64 [27.77, 35.44] 0.77 [0.69, 0.82] 0.52 [0.49, 0.54] +10 0.81 [0.79, 0.83] 0.15 [0.09, 0.20] 27.52 [24.06, 30.99] 0.28 [0.21, 0.34] 0.63 [0.61, 0.66] +11 0.81 [0.79, 0.83] 0.25 [0.20, 0.30] 33.51 [28.10, 38.91] 0.58 [0.52, 0.65] 0.54 [0.51, 0.57] +12 0.81 [0.79, 0.83] 0.35 [0.28, 0.41] 23.65 [19.31, 27.89] 0.50 [0.44, 0.57] 0.52 [0.49, 0.56] + +## SC203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.54 [0.53, 0.56] +SED metrics: Error rate: 0.81 [0.78, 0.83], F-score: 26.6 [25.02, 28.54] +DOA metrics: Localization error: 24.8 [23.75 , 25.83], Localization Recall: 50.4 [48.62, 52.52] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.81 [0.78, 0.83] 0.38 [0.31, 0.45] 22.04 [18.70, 25.34] 0.55 [0.48, 0.61] 0.50 [0.47, 0.54] +1 0.81 [0.78, 0.83] 0.31 [0.24, 0.37] 20.84 [18.18, 23.46] 0.50 [0.42, 0.58] 0.53 [0.49, 0.56] +2 0.81 [0.78, 0.83] 0.32 [0.26, 0.39] 22.03 [19.41, 24.68] 0.55 [0.49, 0.63] 0.51 [0.48, 0.54] +3 0.81 [0.78, 0.83] 0.30 [0.25, 0.39] 26.09 [22.68, 28.68] 0.55 [0.49, 0.62] 0.53 [0.49, 0.55] +4 0.81 [0.78, 0.83] 0.34 [0.30, 0.39] 23.63 [20.62, 26.63] 0.60 [0.55, 0.64] 0.50 [0.48, 0.52] +5 0.81 [0.78, 0.83] 0.24 [0.19, 0.30] 29.85 [24.44, 35.10] 0.64 [0.57, 0.72] 0.52 [0.49, 0.55] +6 0.81 [0.78, 0.83] 0.34 [0.28, 0.40] 21.35 [19.11, 23.56] 0.54 [0.47, 0.61] 0.51 [0.48, 0.54] +7 0.81 [0.78, 0.83] 0.13 [0.08, 0.18] 22.12 [17.87, 26.32] 0.17 [0.12, 0.21] 0.66 [0.63, 0.69] +8 0.81 [0.78, 0.83] 0.23 [0.17, 0.29] 24.43 [20.03, 28.86] 0.35 [0.28, 0.42] 0.59 [0.56, 0.62] +9 0.81 [0.78, 0.83] 0.18 [0.13, 0.22] 28.42 [25.09, 31.73] 0.74 [0.69, 0.80] 0.51 [0.49, 0.53] +10 0.81 [0.78, 0.83] 0.17 [0.12, 0.23] 25.93 [22.35, 29.52] 0.28 [0.22, 0.35] 0.62 [0.60, 0.65] +11 0.81 [0.78, 0.83] 0.22 [0.17, 0.27] 33.38 [28.50, 38.36] 0.58 [0.51, 0.64] 0.55 [0.52, 0.58] +12 0.81 [0.78, 0.83] 0.29 [0.24, 0.36] 22.78 [19.55, 25.73] 0.50 [0.43, 0.58] 0.53 [0.50, 0.56] + +## SE203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.54 [0.53, 0.55] +SED metrics: Error rate: 0.79 [0.77, 0.82], F-score: 28.9 [27.25, 30.66] +DOA metrics: Localization error: 27.0 [25.53 , 28.30], Localization Recall: 49.2 [47.65, 50.88] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.79 [0.77, 0.82] 0.32 [0.26, 0.38] 23.77 [20.11, 27.43] 0.53 [0.47, 0.61] 0.52 [0.49, 0.55] +1 0.79 [0.77, 0.82] 0.34 [0.27, 0.41] 19.12 [16.25, 21.94] 0.48 [0.40, 0.55] 0.52 [0.49, 0.56] +2 0.79 [0.77, 0.82] 0.37 [0.30, 0.44] 20.14 [17.70, 22.56] 0.51 [0.45, 0.57] 0.51 [0.47, 0.54] +3 0.79 [0.77, 0.82] 0.39 [0.33, 0.45] 25.86 [21.72, 30.04] 0.58 [0.51, 0.64] 0.49 [0.46, 0.52] +4 0.79 [0.77, 0.82] 0.37 [0.31, 0.43] 25.25 [21.34, 29.06] 0.63 [0.57, 0.68] 0.48 [0.45, 0.52] +5 0.79 [0.77, 0.82] 0.19 [0.14, 0.23] 41.30 [35.52, 47.25] 0.58 [0.51, 0.65] 0.57 [0.54, 0.59] +6 0.79 [0.77, 0.82] 0.40 [0.35, 0.46] 20.27 [17.99, 22.41] 0.55 [0.49, 0.61] 0.49 [0.46, 0.52] +7 0.79 [0.77, 0.82] 0.13 [0.07, 0.18] 26.40 [16.03, 36.95] 0.17 [0.12, 0.22] 0.66 [0.63, 0.69] +8 0.79 [0.77, 0.82] 0.22 [0.16, 0.27] 28.30 [24.27, 32.24] 0.33 [0.27, 0.39] 0.60 [0.57, 0.63] +9 0.79 [0.77, 0.82] 0.15 [0.11, 0.18] 38.63 [33.68, 43.58] 0.75 [0.69, 0.80] 0.53 [0.51, 0.55] +10 0.79 [0.77, 0.82] 0.20 [0.16, 0.27] 28.80 [23.42, 33.19] 0.28 [0.22, 0.35] 0.62 [0.58, 0.64] +11 0.79 [0.77, 0.82] 0.31 [0.24, 0.38] 32.74 [26.85, 38.52] 0.57 [0.50, 0.64] 0.52 [0.49, 0.56] +12 0.79 [0.77, 0.82] 0.38 [0.30, 0.44] 19.90 [15.85, 23.87] 0.44 [0.37, 0.51] 0.52 [0.49, 0.56] + +## TB103 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.56 [0.55, 0.57] +SED metrics: Error rate: 0.82 [0.80, 0.84], F-score: 21.1 [19.55, 22.85] +DOA metrics: Localization error: 28.9 [27.77 , 29.93], Localization Recall: 51.6 [49.91, 53.24] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.82 [0.80, 0.84] 0.24 [0.16, 0.31] 27.32 [24.06, 30.57] 0.59 [0.52, 0.66] 0.54 [0.51, 0.57] +1 0.82 [0.80, 0.84] 0.21 [0.15, 0.26] 27.89 [25.02, 30.80] 0.58 [0.50, 0.66] 0.55 [0.52, 0.58] +2 0.82 [0.80, 0.84] 0.26 [0.21, 0.32] 26.40 [23.47, 29.26] 0.56 [0.50, 0.62] 0.54 [0.51, 0.56] +3 0.82 [0.80, 0.84] 0.27 [0.22, 0.32] 27.51 [24.62, 30.39] 0.62 [0.56, 0.68] 0.52 [0.50, 0.54] +4 0.82 [0.80, 0.84] 0.29 [0.24, 0.35] 24.31 [21.99, 26.42] 0.64 [0.57, 0.71] 0.51 [0.48, 0.53] +5 0.82 [0.80, 0.84] 0.11 [0.07, 0.14] 39.32 [35.12, 44.09] 0.64 [0.57, 0.71] 0.57 [0.55, 0.60] +6 0.82 [0.80, 0.84] 0.23 [0.18, 0.28] 26.71 [23.41, 29.64] 0.51 [0.46, 0.56] 0.56 [0.53, 0.58] +7 0.82 [0.80, 0.84] 0.16 [0.09, 0.25] 21.16 [14.79, 26.96] 0.19 [0.13, 0.26] 0.65 [0.60, 0.69] +8 0.82 [0.80, 0.84] 0.15 [0.11, 0.20] 30.45 [27.19, 33.29] 0.32 [0.26, 0.38] 0.63 [0.60, 0.65] +9 0.82 [0.80, 0.84] 0.16 [0.12, 0.20] 36.45 [31.03, 41.89] 0.75 [0.71, 0.80] 0.53 [0.51, 0.55] +10 0.82 [0.80, 0.84] 0.11 [0.06, 0.15] 29.58 [25.96, 33.23] 0.27 [0.21, 0.33] 0.65 [0.63, 0.68] +11 0.82 [0.80, 0.84] 0.25 [0.18, 0.31] 31.86 [27.86, 36.63] 0.51 [0.46, 0.56] 0.56 [0.53, 0.59] +12 0.82 [0.80, 0.84] 0.31 [0.25, 0.36] 26.23 [21.28, 31.16] 0.52 [0.45, 0.59] 0.54 [0.50, 0.57] + +## TC352 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_FOA.py` + +Test Loss +SELD score (early stopping metric): 0.52 [0.51, 0.53] +SED metrics: Error rate: 0.76 [0.74, 0.78], F-score: 29.4 [27.91, 30.90] +DOA metrics: Localization error: 24.4 [23.46 , 25.40], Localization Recall: 51.8 [50.17, 53.35] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.76 [0.74, 0.78] 0.34 [0.28, 0.40] 23.94 [20.74, 27.63] 0.56 [0.49, 0.62] 0.50 [0.47, 0.53] +1 0.76 [0.74, 0.78] 0.36 [0.29, 0.43] 20.94 [17.78, 23.91] 0.55 [0.47, 0.62] 0.49 [0.46, 0.53] +2 0.76 [0.74, 0.78] 0.24 [0.17, 0.30] 26.35 [22.69, 29.96] 0.54 [0.48, 0.60] 0.53 [0.50, 0.56] +3 0.76 [0.74, 0.78] 0.33 [0.27, 0.39] 23.11 [20.66, 25.55] 0.57 [0.50, 0.63] 0.50 [0.47, 0.53] +4 0.76 [0.74, 0.78] 0.42 [0.36, 0.47] 21.97 [18.20, 25.80] 0.67 [0.62, 0.73] 0.45 [0.42, 0.48] +5 0.76 [0.74, 0.78] 0.35 [0.30, 0.40] 25.57 [21.40, 29.86] 0.64 [0.58, 0.69] 0.48 [0.45, 0.50] +6 0.76 [0.74, 0.78] 0.40 [0.34, 0.46] 18.30 [16.58, 20.42] 0.59 [0.53, 0.66] 0.47 [0.44, 0.50] +7 0.76 [0.74, 0.78] 0.15 [0.09, 0.22] 20.64 [15.77, 25.25] 0.16 [0.11, 0.21] 0.64 [0.61, 0.67] +8 0.76 [0.74, 0.78] 0.26 [0.21, 0.31] 24.35 [21.28, 27.42] 0.38 [0.32, 0.44] 0.56 [0.54, 0.59] +9 0.76 [0.74, 0.78] 0.18 [0.14, 0.22] 32.39 [28.25, 36.51] 0.76 [0.71, 0.82] 0.50 [0.48, 0.52] +10 0.76 [0.74, 0.78] 0.28 [0.23, 0.34] 21.45 [18.41, 24.45] 0.36 [0.31, 0.42] 0.56 [0.53, 0.59] +11 0.76 [0.74, 0.78] 0.24 [0.20, 0.29] 30.83 [27.42, 34.18] 0.55 [0.50, 0.61] 0.53 [0.51, 0.56] +12 0.76 [0.74, 0.78] 0.26 [0.21, 0.32] 27.51 [22.88, 32.22] 0.39 [0.33, 0.46] 0.56 [0.53, 0.59] + + +# MIC + +## BOMB SHELTER +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.45 [0.44, 0.46] +SED metrics: Error rate: 0.66 [0.64, 0.67], F-score: 41.7 [40.16, 43.59] +DOA metrics: Localization error: 19.2 [18.42 , 20.17], Localization Recall: 54.9 [53.37, 56.84] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.66 [0.64, 0.67] 0.51 [0.45, 0.57] 19.02 [14.38, 23.81] 0.63 [0.57, 0.69] 0.41 [0.37, 0.44] +1 0.66 [0.64, 0.67] 0.40 [0.33, 0.48] 17.79 [14.56, 20.82] 0.56 [0.48, 0.63] 0.45 [0.41, 0.49] +2 0.66 [0.64, 0.67] 0.49 [0.44, 0.57] 16.22 [14.23, 18.11] 0.60 [0.53, 0.68] 0.41 [0.37, 0.44] +3 0.66 [0.64, 0.67] 0.41 [0.37, 0.46] 26.07 [22.76, 29.28] 0.64 [0.59, 0.70] 0.44 [0.41, 0.46] +4 0.66 [0.64, 0.67] 0.45 [0.39, 0.51] 17.86 [14.40, 21.36] 0.67 [0.61, 0.73] 0.41 [0.38, 0.44] +5 0.66 [0.64, 0.67] 0.53 [0.49, 0.58] 15.69 [13.65, 18.27] 0.75 [0.70, 0.80] 0.37 [0.34, 0.39] +6 0.66 [0.64, 0.67] 0.51 [0.46, 0.56] 16.56 [14.47, 18.68] 0.65 [0.58, 0.71] 0.40 [0.37, 0.43] +7 0.66 [0.64, 0.67] 0.21 [0.14, 0.28] 18.23 [12.86, 23.39] 0.21 [0.15, 0.28] 0.58 [0.55, 0.62] +8 0.66 [0.64, 0.67] 0.41 [0.35, 0.47] 18.72 [14.94, 22.44] 0.41 [0.35, 0.47] 0.48 [0.45, 0.51] +9 0.66 [0.64, 0.67] 0.35 [0.29, 0.40] 21.66 [18.19, 27.14] 0.70 [0.64, 0.76] 0.43 [0.40, 0.46] +10 0.66 [0.64, 0.67] 0.50 [0.43, 0.56] 15.55 [12.69, 18.38] 0.51 [0.44, 0.57] 0.43 [0.40, 0.47] +11 0.66 [0.64, 0.67] 0.28 [0.21, 0.34] 27.42 [22.82, 31.99] 0.40 [0.33, 0.46] 0.53 [0.50, 0.56] +12 0.66 [0.64, 0.67] 0.37 [0.29, 0.45] 19.10 [15.91, 22.13] 0.43 [0.35, 0.51] 0.49 [0.45, 0.53] + +## GYM +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.45 [0.44, 0.46] +SED metrics: Error rate: 0.65 [0.63, 0.67], F-score: 42.3 [40.50, 44.37] +DOA metrics: Localization error: 17.6 [16.66 , 18.39], Localization Recall: 52.9 [50.94, 54.75] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.65 [0.63, 0.67] 0.46 [0.38, 0.53] 17.11 [14.79, 19.38] 0.52 [0.43, 0.59] 0.44 [0.41, 0.48] +1 0.65 [0.63, 0.67] 0.40 [0.34, 0.46] 16.70 [14.23, 19.21] 0.55 [0.47, 0.62] 0.45 [0.42, 0.48] +2 0.65 [0.63, 0.67] 0.41 [0.35, 0.48] 18.26 [15.17, 21.47] 0.52 [0.45, 0.58] 0.46 [0.42, 0.49] +3 0.65 [0.63, 0.67] 0.47 [0.41, 0.53] 21.97 [19.20, 24.74] 0.65 [0.58, 0.71] 0.41 [0.38, 0.45] +4 0.65 [0.63, 0.67] 0.45 [0.39, 0.52] 15.95 [12.74, 18.45] 0.54 [0.46, 0.63] 0.44 [0.40, 0.47] +5 0.65 [0.63, 0.67] 0.52 [0.47, 0.58] 13.95 [11.57, 16.11] 0.77 [0.73, 0.82] 0.36 [0.33, 0.38] +6 0.65 [0.63, 0.67] 0.51 [0.46, 0.58] 14.06 [12.09, 15.67] 0.61 [0.54, 0.68] 0.40 [0.37, 0.43] +7 0.65 [0.63, 0.67] 0.19 [0.13, 0.25] 16.63 [13.11, 20.11] 0.18 [0.12, 0.24] 0.59 [0.56, 0.62] +8 0.65 [0.63, 0.67] 0.49 [0.43, 0.56] 14.08 [12.32, 15.83] 0.44 [0.37, 0.51] 0.45 [0.42, 0.48] +9 0.65 [0.63, 0.67] 0.36 [0.31, 0.41] 20.01 [17.11, 23.85] 0.68 [0.63, 0.74] 0.43 [0.41, 0.45] +10 0.65 [0.63, 0.67] 0.50 [0.44, 0.56] 12.28 [10.86, 13.67] 0.47 [0.40, 0.53] 0.44 [0.41, 0.47] +11 0.65 [0.63, 0.67] 0.29 [0.22, 0.36] 30.17 [23.79, 36.02] 0.47 [0.41, 0.52] 0.52 [0.48, 0.55] +12 0.65 [0.63, 0.67] 0.43 [0.36, 0.51] 17.08 [14.12, 19.99] 0.49 [0.41, 0.56] 0.46 [0.42, 0.49] + +## PB132 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.46 [0.45, 0.47] +SED metrics: Error rate: 0.67 [0.65, 0.69], F-score: 38.5 [36.64, 40.37] +DOA metrics: Localization error: 20.7 [19.69 , 21.68], Localization Recall: 55.7 [54.12, 57.31] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.67 [0.65, 0.69] 0.39 [0.33, 0.45] 20.78 [16.42, 24.73] 0.60 [0.54, 0.67] 0.45 [0.42, 0.48] +1 0.67 [0.65, 0.69] 0.34 [0.29, 0.40] 19.92 [17.17, 22.54] 0.51 [0.43, 0.59] 0.48 [0.45, 0.51] +2 0.67 [0.65, 0.69] 0.38 [0.32, 0.44] 21.90 [19.18, 24.69] 0.58 [0.52, 0.63] 0.46 [0.43, 0.49] +3 0.67 [0.65, 0.69] 0.37 [0.31, 0.43] 24.42 [21.30, 27.34] 0.63 [0.58, 0.69] 0.45 [0.42, 0.48] +4 0.67 [0.65, 0.69] 0.49 [0.44, 0.54] 17.53 [14.76, 20.09] 0.64 [0.59, 0.70] 0.41 [0.38, 0.43] +5 0.67 [0.65, 0.69] 0.52 [0.47, 0.57] 14.03 [12.82, 15.23] 0.70 [0.65, 0.75] 0.38 [0.36, 0.41] +6 0.67 [0.65, 0.69] 0.49 [0.43, 0.54] 15.74 [13.84, 17.71] 0.66 [0.59, 0.72] 0.40 [0.38, 0.43] +7 0.67 [0.65, 0.69] 0.15 [0.10, 0.21] 26.66 [21.49, 31.80] 0.25 [0.18, 0.31] 0.61 [0.57, 0.64] +8 0.67 [0.65, 0.69] 0.43 [0.37, 0.49] 17.34 [14.90, 19.63] 0.44 [0.38, 0.50] 0.47 [0.44, 0.51] +9 0.67 [0.65, 0.69] 0.31 [0.26, 0.37] 26.18 [21.60, 30.71] 0.75 [0.69, 0.81] 0.44 [0.41, 0.47] +10 0.67 [0.65, 0.69] 0.52 [0.46, 0.58] 14.79 [12.89, 16.86] 0.60 [0.53, 0.67] 0.41 [0.38, 0.44] +11 0.67 [0.65, 0.69] 0.31 [0.24, 0.38] 25.93 [21.81, 30.06] 0.46 [0.38, 0.53] 0.51 [0.48, 0.55] +12 0.67 [0.65, 0.69] 0.30 [0.23, 0.37] 23.98 [20.35, 27.96] 0.44 [0.38, 0.50] 0.52 [0.48, 0.55] + +## PC226 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.46 [0.45, 0.47] +SED metrics: Error rate: 0.67 [0.65, 0.69], F-score: 39.2 [37.51, 40.99] +DOA metrics: Localization error: 20.7 [19.68 , 21.70], Localization Recall: 56.2 [54.59, 57.76] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.67 [0.65, 0.69] 0.48 [0.42, 0.53] 22.76 [17.17, 28.42] 0.66 [0.60, 0.72] 0.42 [0.39, 0.44] +1 0.67 [0.65, 0.69] 0.36 [0.29, 0.42] 21.66 [16.93, 26.31] 0.50 [0.43, 0.57] 0.48 [0.45, 0.52] +2 0.67 [0.65, 0.69] 0.38 [0.31, 0.44] 20.73 [18.24, 23.26] 0.56 [0.49, 0.62] 0.46 [0.43, 0.49] +3 0.67 [0.65, 0.69] 0.36 [0.29, 0.43] 24.55 [21.79, 27.30] 0.66 [0.61, 0.72] 0.45 [0.42, 0.48] +4 0.67 [0.65, 0.69] 0.49 [0.45, 0.55] 17.13 [14.66, 19.18] 0.68 [0.63, 0.74] 0.40 [0.37, 0.42] +5 0.67 [0.65, 0.69] 0.53 [0.49, 0.58] 17.17 [14.86, 19.52] 0.78 [0.74, 0.83] 0.36 [0.34, 0.38] +6 0.67 [0.65, 0.69] 0.49 [0.43, 0.56] 16.56 [14.05, 19.02] 0.67 [0.61, 0.73] 0.40 [0.37, 0.43] +7 0.67 [0.65, 0.69] 0.19 [0.11, 0.27] 18.32 [14.30, 22.12] 0.20 [0.14, 0.27] 0.60 [0.56, 0.63] +8 0.67 [0.65, 0.69] 0.42 [0.36, 0.49] 17.90 [15.39, 20.46] 0.42 [0.36, 0.49] 0.48 [0.45, 0.51] +9 0.67 [0.65, 0.69] 0.32 [0.27, 0.37] 24.52 [21.07, 27.89] 0.73 [0.67, 0.78] 0.44 [0.41, 0.47] +10 0.67 [0.65, 0.69] 0.52 [0.46, 0.58] 15.33 [13.27, 17.37] 0.58 [0.51, 0.65] 0.41 [0.38, 0.45] +11 0.67 [0.65, 0.69] 0.17 [0.11, 0.24] 31.51 [25.34, 37.67] 0.40 [0.33, 0.47] 0.57 [0.53, 0.60] +12 0.67 [0.65, 0.69] 0.37 [0.30, 0.45] 21.20 [17.57, 24.77] 0.45 [0.38, 0.53] 0.49 [0.45, 0.53] + +## SA203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.46 [0.44, 0.47] +SED metrics: Error rate: 0.68 [0.65, 0.70], F-score: 41.3 [39.61, 43.34] +DOA metrics: Localization error: 19.1 [18.11 , 20.15], Localization Recall: 54.6 [52.68, 56.72] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.68 [0.65, 0.70] 0.42 [0.36, 0.49] 20.99 [17.19, 24.86] 0.54 [0.47, 0.62] 0.46 [0.42, 0.49] +1 0.68 [0.65, 0.70] 0.42 [0.35, 0.51] 14.94 [11.45, 17.73] 0.63 [0.55, 0.74] 0.43 [0.38, 0.46] +2 0.68 [0.65, 0.70] 0.45 [0.37, 0.53] 18.21 [16.15, 22.12] 0.51 [0.43, 0.61] 0.45 [0.41, 0.49] +3 0.68 [0.65, 0.70] 0.46 [0.40, 0.52] 22.80 [19.87, 25.69] 0.68 [0.62, 0.74] 0.42 [0.39, 0.45] +4 0.68 [0.65, 0.70] 0.46 [0.40, 0.51] 19.21 [16.57, 21.83] 0.63 [0.56, 0.70] 0.42 [0.39, 0.45] +5 0.68 [0.65, 0.70] 0.49 [0.43, 0.55] 16.47 [13.06, 19.87] 0.77 [0.71, 0.82] 0.38 [0.35, 0.41] +6 0.68 [0.65, 0.70] 0.49 [0.43, 0.56] 16.14 [13.80, 18.22] 0.65 [0.58, 0.71] 0.41 [0.37, 0.44] +7 0.68 [0.65, 0.70] 0.14 [0.09, 0.20] 15.26 [10.87, 19.77] 0.13 [0.08, 0.18] 0.62 [0.59, 0.65] +8 0.68 [0.65, 0.70] 0.43 [0.38, 0.49] 19.05 [15.85, 22.22] 0.47 [0.40, 0.53] 0.47 [0.44, 0.50] +9 0.68 [0.65, 0.70] 0.32 [0.28, 0.37] 27.59 [22.14, 33.09] 0.68 [0.60, 0.76] 0.46 [0.43, 0.49] +10 0.68 [0.65, 0.70] 0.45 [0.39, 0.51] 17.37 [13.25, 21.42] 0.44 [0.38, 0.50] 0.47 [0.44, 0.50] +11 0.68 [0.65, 0.70] 0.41 [0.35, 0.48] 23.38 [19.48, 27.05] 0.51 [0.45, 0.57] 0.47 [0.44, 0.50] +12 0.68 [0.65, 0.70] 0.42 [0.35, 0.49] 16.96 [14.17, 19.70] 0.46 [0.39, 0.52] 0.47 [0.44, 0.51] + +## SC203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.44 [0.43, 0.46] +SED metrics: Error rate: 0.66 [0.64, 0.69], F-score: 44.8 [42.98, 46.46] +DOA metrics: Localization error: 18.4 [17.32 , 19.36], Localization Recall: 54.7 [52.88, 56.28] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.66 [0.64, 0.69] 0.50 [0.44, 0.57] 16.36 [12.95, 19.58] 0.55 [0.48, 0.62] 0.43 [0.39, 0.46] +1 0.66 [0.64, 0.69] 0.40 [0.34, 0.47] 16.59 [13.63, 19.48] 0.52 [0.44, 0.59] 0.46 [0.42, 0.49] +2 0.66 [0.64, 0.69] 0.51 [0.45, 0.58] 16.21 [12.61, 19.63] 0.57 [0.50, 0.65] 0.42 [0.38, 0.45] +3 0.66 [0.64, 0.69] 0.48 [0.41, 0.53] 21.17 [18.50, 24.52] 0.61 [0.53, 0.67] 0.42 [0.40, 0.46] +4 0.66 [0.64, 0.69] 0.46 [0.41, 0.51] 18.15 [15.03, 21.30] 0.65 [0.59, 0.70] 0.41 [0.39, 0.44] +5 0.66 [0.64, 0.69] 0.55 [0.50, 0.60] 14.31 [12.40, 16.23] 0.76 [0.71, 0.80] 0.36 [0.34, 0.38] +6 0.66 [0.64, 0.69] 0.51 [0.46, 0.56] 13.91 [12.30, 15.51] 0.64 [0.58, 0.70] 0.40 [0.37, 0.43] +7 0.66 [0.64, 0.69] 0.20 [0.14, 0.26] 19.12 [11.98, 26.02] 0.16 [0.11, 0.22] 0.60 [0.57, 0.64] +8 0.66 [0.64, 0.69] 0.52 [0.46, 0.58] 16.23 [13.83, 18.62] 0.49 [0.43, 0.56] 0.44 [0.40, 0.47] +9 0.66 [0.64, 0.69] 0.31 [0.26, 0.36] 27.14 [22.66, 31.33] 0.74 [0.69, 0.79] 0.44 [0.42, 0.47] +10 0.66 [0.64, 0.69] 0.50 [0.44, 0.56] 14.30 [11.70, 16.92] 0.46 [0.39, 0.52] 0.44 [0.41, 0.48] +11 0.66 [0.64, 0.69] 0.40 [0.34, 0.46] 24.75 [20.00, 29.55] 0.45 [0.39, 0.51] 0.49 [0.46, 0.52] +12 0.66 [0.64, 0.69] 0.46 [0.40, 0.53] 20.33 [15.70, 24.92] 0.51 [0.44, 0.59] 0.45 [0.41, 0.49] + +## SE203 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.44 [0.43, 0.45] +SED metrics: Error rate: 0.65 [0.63, 0.67], F-score: 44.8 [42.88, 46.83] +DOA metrics: Localization error: 17.3 [16.25 , 18.21], Localization Recall: 53.7 [51.92, 55.56] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.65 [0.63, 0.67] 0.49 [0.42, 0.56] 16.53 [11.51, 21.38] 0.54 [0.46, 0.61] 0.43 [0.39, 0.47] +1 0.65 [0.63, 0.67] 0.41 [0.34, 0.48] 16.25 [12.97, 19.41] 0.52 [0.44, 0.60] 0.45 [0.42, 0.49] +2 0.65 [0.63, 0.67] 0.56 [0.48, 0.63] 12.04 [10.37, 13.92] 0.58 [0.49, 0.66] 0.39 [0.36, 0.44] +3 0.65 [0.63, 0.67] 0.54 [0.48, 0.61] 21.22 [17.70, 24.50] 0.71 [0.64, 0.77] 0.38 [0.34, 0.41] +4 0.65 [0.63, 0.67] 0.41 [0.36, 0.48] 19.91 [15.10, 24.01] 0.65 [0.59, 0.72] 0.42 [0.39, 0.45] +5 0.65 [0.63, 0.67] 0.55 [0.50, 0.61] 16.14 [12.97, 19.25] 0.75 [0.70, 0.80] 0.36 [0.33, 0.39] +6 0.65 [0.63, 0.67] 0.51 [0.45, 0.56] 17.03 [14.18, 19.84] 0.66 [0.60, 0.72] 0.39 [0.37, 0.42] +7 0.65 [0.63, 0.67] 0.20 [0.15, 0.25] 17.32 [12.08, 22.54] 0.17 [0.12, 0.22] 0.59 [0.57, 0.62] +8 0.65 [0.63, 0.67] 0.49 [0.44, 0.54] 15.81 [13.97, 17.51] 0.50 [0.43, 0.56] 0.44 [0.41, 0.47] +9 0.65 [0.63, 0.67] 0.34 [0.30, 0.40] 23.19 [19.02, 26.87] 0.66 [0.60, 0.73] 0.44 [0.41, 0.47] +10 0.65 [0.63, 0.67] 0.49 [0.42, 0.55] 12.06 [10.25, 14.43] 0.46 [0.38, 0.53] 0.44 [0.41, 0.48] +11 0.65 [0.63, 0.67] 0.40 [0.33, 0.47] 22.83 [18.19, 27.47] 0.45 [0.39, 0.51] 0.48 [0.45, 0.52] +12 0.65 [0.63, 0.67] 0.43 [0.35, 0.52] 14.35 [10.85, 17.64] 0.36 [0.28, 0.44] 0.48 [0.44, 0.53] + +## TB103 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.43 [0.42, 0.44] +SED metrics: Error rate: 0.63 [0.61, 0.65], F-score: 45.6 [43.97, 47.41] +DOA metrics: Localization error: 16.9 [15.99 , 17.82], Localization Recall: 54.1 [52.45, 55.81] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.63 [0.61, 0.65] 0.42 [0.35, 0.48] 18.64 [14.89, 22.43] 0.48 [0.41, 0.54] 0.46 [0.43, 0.49] +1 0.63 [0.61, 0.65] 0.45 [0.39, 0.51] 14.90 [12.78, 17.18] 0.59 [0.52, 0.65] 0.42 [0.39, 0.45] +2 0.63 [0.61, 0.65] 0.48 [0.43, 0.53] 16.79 [13.70, 19.48] 0.53 [0.47, 0.59] 0.43 [0.40, 0.46] +3 0.63 [0.61, 0.65] 0.50 [0.44, 0.56] 21.74 [18.82, 24.51] 0.65 [0.59, 0.71] 0.40 [0.37, 0.43] +4 0.63 [0.61, 0.65] 0.45 [0.39, 0.51] 18.36 [14.96, 21.58] 0.58 [0.52, 0.64] 0.43 [0.40, 0.45] +5 0.63 [0.61, 0.65] 0.57 [0.52, 0.61] 12.38 [10.73, 14.02] 0.75 [0.70, 0.80] 0.34 [0.32, 0.37] +6 0.63 [0.61, 0.65] 0.55 [0.50, 0.61] 13.30 [10.90, 15.51] 0.67 [0.61, 0.73] 0.37 [0.34, 0.40] +7 0.63 [0.61, 0.65] 0.28 [0.21, 0.36] 14.46 [10.31, 18.47] 0.24 [0.17, 0.30] 0.55 [0.51, 0.58] +8 0.63 [0.61, 0.65] 0.50 [0.44, 0.56] 13.94 [12.15, 15.69] 0.44 [0.38, 0.49] 0.44 [0.41, 0.47] +9 0.63 [0.61, 0.65] 0.35 [0.30, 0.40] 23.32 [18.21, 28.23] 0.69 [0.64, 0.75] 0.43 [0.40, 0.46] +10 0.63 [0.61, 0.65] 0.56 [0.51, 0.62] 12.50 [10.11, 14.85] 0.53 [0.46, 0.59] 0.40 [0.37, 0.43] +11 0.63 [0.61, 0.65] 0.38 [0.31, 0.45] 24.50 [20.23, 28.70] 0.46 [0.40, 0.53] 0.48 [0.45, 0.52] +12 0.63 [0.61, 0.65] 0.45 [0.38, 0.50] 15.50 [12.68, 18.36] 0.43 [0.37, 0.49] 0.46 [0.43, 0.49] + +## TC352 +results on 133 scapes generated for this room using `SpatialScaper/experiments/data_generation_TAU_MIC.py` + +Test Loss +SELD score (early stopping metric): 0.45 [0.44, 0.46] +SED metrics: Error rate: 0.66 [0.64, 0.68], F-score: 41.4 [39.73, 43.26] +DOA metrics: Localization error: 19.8 [18.83 , 20.85], Localization Recall: 56.3 [54.73, 58.27] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.66 [0.64, 0.68] 0.46 [0.39, 0.53] 19.69 [15.43, 23.72] 0.62 [0.55, 0.69] 0.42 [0.39, 0.46] +1 0.66 [0.64, 0.68] 0.41 [0.35, 0.47] 19.59 [16.28, 22.85] 0.60 [0.53, 0.67] 0.44 [0.41, 0.47] +2 0.66 [0.64, 0.68] 0.36 [0.29, 0.43] 22.35 [19.10, 25.92] 0.51 [0.43, 0.59] 0.48 [0.44, 0.51] +3 0.66 [0.64, 0.68] 0.42 [0.36, 0.47] 22.92 [20.67, 25.17] 0.68 [0.62, 0.73] 0.42 [0.40, 0.45] +4 0.66 [0.64, 0.68] 0.46 [0.40, 0.53] 18.60 [15.18, 21.80] 0.68 [0.62, 0.74] 0.41 [0.37, 0.44] +5 0.66 [0.64, 0.68] 0.50 [0.44, 0.56] 14.38 [11.51, 17.26] 0.74 [0.68, 0.80] 0.38 [0.35, 0.40] +6 0.66 [0.64, 0.68] 0.55 [0.50, 0.61] 14.90 [13.21, 16.59] 0.68 [0.62, 0.74] 0.38 [0.35, 0.41] +7 0.66 [0.64, 0.68] 0.16 [0.12, 0.23] 21.21 [15.08, 26.44] 0.18 [0.13, 0.23] 0.61 [0.57, 0.64] +8 0.66 [0.64, 0.68] 0.47 [0.40, 0.54] 16.16 [13.48, 18.74] 0.46 [0.39, 0.52] 0.46 [0.42, 0.49] +9 0.66 [0.64, 0.68] 0.36 [0.30, 0.42] 24.28 [19.93, 29.27] 0.69 [0.64, 0.75] 0.44 [0.41, 0.46] +10 0.66 [0.64, 0.68] 0.54 [0.49, 0.60] 14.80 [12.46, 17.34] 0.55 [0.50, 0.61] 0.41 [0.38, 0.44] +11 0.66 [0.64, 0.68] 0.29 [0.24, 0.34] 29.16 [24.78, 33.61] 0.41 [0.34, 0.47] 0.53 [0.51, 0.56] +12 0.66 [0.64, 0.68] 0.40 [0.33, 0.46] 19.84 [17.57, 22.49] 0.54 [0.47, 0.61] 0.46 [0.43, 0.49] + + +# Model metrics on the STARSS23 dataset validation split for reference + +## FOA + +Test Loss +SELD score (early stopping metric): 0.47 [0.44, 0.50] +SED metrics: Error rate: 0.59 [0.55, 0.63], F-score: 30.5 [26.32, 35.29] +DOA metrics: Localization error: 22.1 [18.01 , 25.76], Localization Recall: 51.2 [45.84, 56.75] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.59 [0.55, 0.63] 0.49 [0.38, 0.63] 24.37 [14.81, 32.81] 0.77 [0.71, 0.84] 0.37 [0.30, 0.42] +1 0.59 [0.55, 0.63] 0.58 [0.51, 0.64] 16.33 [14.44, 18.35] 0.70 [0.66, 0.75] 0.35 [0.33, 0.38] +2 0.59 [0.55, 0.63] 0.26 [0.09, 0.44] 22.41 [18.39, 26.46] 0.70 [0.60, 0.80] 0.44 [0.37, 0.51] +3 0.59 [0.55, 0.63] 0.17 [-0.04, 0.42] 40.50 [-6.37, 79.54] 0.46 [0.13, 0.82] 0.55 [0.36, 0.71] +4 0.59 [0.55, 0.63] 0.30 [0.22, 0.37] 19.10 [13.69, 26.36] 0.33 [0.26, 0.41] 0.52 [0.48, 0.56] +5 0.59 [0.55, 0.63] 0.45 [0.33, 0.57] 18.05 [15.24, 20.97] 0.66 [0.49, 0.83] 0.40 [0.33, 0.46] +6 0.59 [0.55, 0.63] 0.05 [0.01, 0.08] 33.17 [28.73, 37.17] 0.16 [0.06, 0.26] 0.64 [0.61, 0.68] +7 0.59 [0.55, 0.63] 0.15 [-0.00, 0.31] 9.54 [4.75, 14.36] 0.13 [0.00, 0.26] 0.59 [0.52, 0.66] +8 0.59 [0.55, 0.63] 0.34 [0.17, 0.53] 29.00 [16.10, 40.45] 0.56 [0.47, 0.66] 0.46 [0.38, 0.53] +9 0.59 [0.55, 0.63] 0.61 [0.49, 0.73] 15.99 [12.60, 19.11] 0.69 [0.57, 0.80] 0.35 [0.28, 0.41] +10 0.59 [0.55, 0.63] 0.23 [0.03, 0.40] 16.47 [12.82, 20.47] 0.23 [-0.01, 0.42] 0.56 [0.46, 0.67] +11 0.59 [0.55, 0.63] 0.07 [-0.06, 0.18] 30.62 [21.33, 44.03] 0.69 [0.43, 1.00] 0.50 [0.43, 0.57] +12 0.59 [0.55, 0.63] 0.28 [0.09, 0.47] 11.12 [7.34, 15.03] 0.57 [0.12, 0.98] 0.45 [0.32, 0.60] + +## MIC + +Test Loss +SELD score (early stopping metric): 0.52 [0.49, 0.55] +SED metrics: Error rate: 0.69 [0.65, 0.73], F-score: 25.0 [21.54, 29.01] +DOA metrics: Localization error: 31.4 [24.06 , 38.43], Localization Recall: 52.3 [46.63, 58.50] +Classwise results on unseen test data +Class ER F LE LR SELD_score +0 0.69 [0.65, 0.73] 0.51 [0.42, 0.63] 17.70 [14.43, 19.95] 0.72 [0.66, 0.80] 0.39 [0.33, 0.43] +1 0.69 [0.65, 0.73] 0.49 [0.43, 0.54] 18.48 [16.45, 20.61] 0.64 [0.57, 0.70] 0.42 [0.38, 0.45] +2 0.69 [0.65, 0.73] 0.25 [0.10, 0.43] 22.05 [14.60, 28.63] 0.64 [0.47, 0.82] 0.48 [0.39, 0.56] +3 0.69 [0.65, 0.73] 0.00 [0.00, 0.00] 66.15 [-4.51, 113.47] 0.33 [-0.02, 0.71] 0.68 [0.50, 0.82] +4 0.69 [0.65, 0.73] 0.20 [0.15, 0.26] 28.10 [21.19, 35.82] 0.41 [0.34, 0.50] 0.56 [0.53, 0.59] +5 0.69 [0.65, 0.73] 0.35 [0.20, 0.51] 26.54 [16.37, 36.43] 0.61 [0.48, 0.75] 0.47 [0.39, 0.54] +6 0.69 [0.65, 0.73] 0.04 [0.01, 0.07] 36.35 [31.15, 41.46] 0.27 [0.14, 0.38] 0.65 [0.61, 0.68] +7 0.69 [0.65, 0.73] 0.20 [0.08, 0.32] 14.58 [9.83, 18.98] 0.28 [0.15, 0.41] 0.57 [0.51, 0.63] +8 0.69 [0.65, 0.73] 0.25 [0.10, 0.42] 32.58 [20.70, 43.34] 0.52 [0.39, 0.64] 0.53 [0.45, 0.59] +9 0.69 [0.65, 0.73] 0.42 [0.30, 0.56] 22.72 [18.44, 26.78] 0.76 [0.64, 0.87] 0.41 [0.34, 0.47] +10 0.69 [0.65, 0.73] 0.22 [0.06, 0.37] 23.20 [16.66, 31.15] 0.58 [0.42, 0.75] 0.50 [0.43, 0.58] +11 0.69 [0.65, 0.73] 0.04 [-0.03, 0.10] 83.15 [28.88, 159.45] 0.39 [0.11, 0.72] 0.68 [0.58, 0.80] +12 0.69 [0.65, 0.73] 0.27 [0.11, 0.44] 16.31 [9.32, 22.70] 0.64 [0.30, 0.96] 0.47 [0.35, 0.58] diff --git a/experiments/SELDnet23_training_with_SS_TAU_results.md b/experiments/SELDnet23_training_with_SS_TAU_results.md new file mode 100644 index 0000000..90dc964 --- /dev/null +++ b/experiments/SELDnet23_training_with_SS_TAU_results.md @@ -0,0 +1,19 @@ +Training the SELDnet23 model, following the instructions specified [here](https://github.com/sharathadavanne/seld-dcase2023), but using the data generated with `SpatialScaper/experiments/data_generation_TAU_{format}.py` (where format is `MIC` or 'FOA') instead of the synthetic recordings from the DCASE2022 webpage, results in a model with the following performance: + +## FOA + +SED metrics: +* Error rate: 0.65 [0.60, 0.70] +* F-score: 24.8 [20.60, 29.32] +DOA metrics: +* Localization error: 23.8 [20.52 , 26.71] +* Localization Recall: 51.0 [46.23, 55.95] + +## MIC + +SED metrics: +* Error rate: 0.68 [0.62, 0.72] +* F-score: 24.7 [20.88, 28.89] +DOA metrics: +* Localization error: 26.9 [23.66 , 29.43] +* Localization Recall: 45.2 [40.13, 50.50] diff --git a/experiments/data_generation_TAU_FOA.py b/experiments/data_generation_TAU_FOA.py new file mode 100644 index 0000000..408948c --- /dev/null +++ b/experiments/data_generation_TAU_FOA.py @@ -0,0 +1,52 @@ +import numpy as np +import spatialscaper as ss +import os + +# Constants +NSCAPES_PER_ROOM = 133 # Number of soundscapes to generate per room +FOREGROUND_DIR = "datasets/sound_event_datasets/FSD50K_FMA" # Directory with FSD50K foreground sound files +RIR_DIR = "datasets/rir_datasets" # Directory containing Room Impulse Response (RIR) files +FORMAT = "foa" # Output format specifier +N_EVENTS_MEAN = 15 # Mean number of foreground events in a soundscape +N_EVENTS_STD = 6 # Standard deviation of the number of foreground events +DURATION = 60.0 # Duration in seconds of each soundscape, customizable by the user +SR = 24000 # SpatialScaper default sampling rate for the audio files +OUTPUT_DIR = "FOA_TAU" # Directory to store the generated soundscapes + +ROOMS = ["bomb_shelter", "gym", "pb132", "pc226", "sa203", "sc203", "se203", "tb103", "tc352"] + +def generate_soundscape(room, index, room_number): + ref_db = np.random.uniform(-70, -50) # Randomly determine the ref_db + track_name = f"fold1_room{room_number}_mix{index+1:03d}" + # Initialize Scaper + ssc = ss.Scaper( + DURATION, + FOREGROUND_DIR, + RIR_DIR, + FORMAT, + room, + max_event_overlap=2, + speed_limit=2.0, + ) + ssc.ref_db = ref_db + + # static white noise in this example + ssc.add_background() + + # Add a random number of foreground events + n_events = int(np.random.normal(N_EVENTS_MEAN, N_EVENTS_STD)) + n_events = n_events if n_events > 0 else 1 + + for _ in range(n_events): + ssc.add_event() + + audiofile = os.path.join(OUTPUT_DIR, FORMAT, track_name) + labelfile = os.path.join(OUTPUT_DIR, "labels", track_name) + + ssc.generate(audiofile, labelfile) + +# Main loop for generating soundscapes for each room +for room_number, room in enumerate(ROOMS, start=1): + for iscape in range(NSCAPES_PER_ROOM): + print(f"Generating soundscape: {iscape + 1}/{NSCAPES_PER_ROOM} for room {room}") + generate_soundscape(room, iscape, room_number) diff --git a/experiments/data_generation_TAU_MIC.py b/experiments/data_generation_TAU_MIC.py new file mode 100644 index 0000000..d3b41b5 --- /dev/null +++ b/experiments/data_generation_TAU_MIC.py @@ -0,0 +1,52 @@ +import numpy as np +import spatialscaper as ss +import os + +# Constants +NSCAPES_PER_ROOM = 133 # Number of soundscapes to generate per room +FOREGROUND_DIR = "datasets/sound_event_datasets/FSD50K_FMA" # Directory with FSD50K foreground sound files +RIR_DIR = "datasets/rir_datasets" # Directory containing Room Impulse Response (RIR) files +FORMAT = "mic" # Output format specifier +N_EVENTS_MEAN = 15 # Mean number of foreground events in a soundscape +N_EVENTS_STD = 6 # Standard deviation of the number of foreground events +DURATION = 60.0 # Duration in seconds of each soundscape, customizable by the user +SR = 24000 # SpatialScaper default sampling rate for the audio files +OUTPUT_DIR = "output" # Directory to store the generated soundscapes + +ROOMS = ["bomb_shelter", "gym", "pb132", "pc226", "sa203", "sc203", "se203", "tb103", "tc352"] + +def generate_soundscape(room, index, room_number): + ref_db = np.random.uniform(-70, -50) # Randomly determine the ref_db + track_name = f"fold1_room{room_number}_mix{index+1:03d}" + # Initialize Scaper + ssc = ss.Scaper( + DURATION, + FOREGROUND_DIR, + RIR_DIR, + FORMAT, + room, + max_event_overlap=2, + speed_limit=2.0, + ) + ssc.ref_db = ref_db + + # static white noise in this example + ssc.add_background() + + # Add a random number of foreground events + n_events = int(np.random.normal(N_EVENTS_MEAN, N_EVENTS_STD)) + n_events = n_events if n_events > 0 else 1 + + for _ in range(n_events): + ssc.add_event() + + audiofile = os.path.join(OUTPUT_DIR, FORMAT, track_name) + labelfile = os.path.join(OUTPUT_DIR, "labels", track_name) + + ssc.generate(audiofile, labelfile) + +# Main loop for generating soundscapes for each room +for room_number, room in enumerate(ROOMS, start=1): + for iscape in range(NSCAPES_PER_ROOM): + print(f"Generating soundscape: {iscape + 1}/{NSCAPES_PER_ROOM} for room {room}") + generate_soundscape(room, iscape, room_number)