Skip to content

Commit cd72243

Browse files
fixes
1 parent 686c39e commit cd72243

File tree

8 files changed

+61
-35
lines changed

8 files changed

+61
-35
lines changed

cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_finalize_impl.i

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,24 @@ Status FinalizeKernel<algorithmFPType, cpu>::compute(const NumericTable & xtxTab
5959
const KernelHelperIface<algorithmFPType, cpu> & helper,
6060
const HyperparameterType * hyperparameter)
6161
{
62+
std::cout << "here tracker1" << std::endl;
6263
DAAL_PROFILER_TASK(computeFinalize);
6364
const size_t nBetas(betaTable.getNumberOfColumns());
6465
const size_t nResponses(betaTable.getNumberOfRows());
6566
const size_t nBetasIntercept = (interceptFlag ? nBetas : (nBetas - 1));
66-
67+
std::cout << "here tracker2" << std::endl;
6768
const size_t xtxSizeInBytes(sizeof(algorithmFPType) * nBetasIntercept * nBetasIntercept);
6869
const size_t xtySizeInBytes(sizeof(algorithmFPType) * nBetasIntercept * nResponses);
6970

7071
TArray<algorithmFPType, cpu> betaBufferArray;
7172
algorithmFPType * betaBuffer(nullptr);
73+
std::cout << "here tracker3" << std::endl;
7274
Status st;
7375
{
7476
ReadRowsType xtxBlock(const_cast<NumericTable &>(xtxTable), 0, nBetasIntercept);
7577
DAAL_CHECK_BLOCK_STATUS(xtxBlock);
7678
algorithmFPType * xtx = const_cast<algorithmFPType *>(xtxBlock.get());
77-
79+
std::cout << "here tracker4" << std::endl;
7880
if (&xtxTable != &xtxFinalTable)
7981
{
8082
DAAL_PROFILER_TASK(computeFinalize.copyToxtxFinalTable);
@@ -84,18 +86,19 @@ Status FinalizeKernel<algorithmFPType, cpu>::compute(const NumericTable & xtxTab
8486
{
8587
ReadRowsType xtyBlock(const_cast<NumericTable &>(xtyTable), 0, nResponses);
8688
DAAL_CHECK_BLOCK_STATUS(xtyBlock);
89+
std::cout << "here tracker5" << std::endl;
8790
algorithmFPType * xty = const_cast<algorithmFPType *>(xtyBlock.get());
8891

8992
if (&xtyTable != &xtyFinalTable)
9093
{
9194
DAAL_PROFILER_TASK(computeFinalize.copyToxtyFinalTable);
9295
DAAL_CHECK_STATUS(st, copyDataToTable(xty, xtySizeInBytes, xtyFinalTable));
9396
}
94-
97+
std::cout << "here tracker6" << std::endl;
9598
betaBufferArray.reset(nResponses * nBetasIntercept);
9699
betaBuffer = betaBufferArray.get();
97100
DAAL_CHECK_MALLOC(betaBuffer);
98-
101+
std::cout << "here tracker7" << std::endl;
99102
DAAL_PROFILER_TASK(computeFinalize.betaBufCopy);
100103
int result = daal::services::internal::daal_memcpy_s(betaBuffer, xtySizeInBytes, xty, xtySizeInBytes);
101104
DAAL_CHECK(!result, services::ErrorMemoryCopyFailedInternal);
@@ -104,27 +107,30 @@ Status FinalizeKernel<algorithmFPType, cpu>::compute(const NumericTable & xtxTab
104107
TArray<algorithmFPType, cpu> xtxCopyArray(nBetasIntercept * nBetasIntercept);
105108
algorithmFPType * xtxCopy = xtxCopyArray.get();
106109
DAAL_CHECK_MALLOC(xtxCopy);
107-
110+
std::cout << "here tracker7" << std::endl;
108111
{
109112
DAAL_PROFILER_TASK(computeFinalize.xtxCopy);
113+
std::cout << "here tracker8" << std::endl;
110114
int result = daal::services::internal::daal_memcpy_s(xtxCopy, xtxSizeInBytes, xtx, xtxSizeInBytes);
111115
DAAL_CHECK(!result, services::ErrorMemoryCopyFailedInternal);
112116
}
113117

114118
{
115119
DAAL_PROFILER_TASK(computeFinalize.computeBetasImpl);
120+
std::cout << "here tracker8.1" << std::endl;
116121
DAAL_CHECK_STATUS(st, helper.computeBetasImpl(nBetasIntercept, xtx, xtxCopy, nResponses, betaBuffer, interceptFlag));
117122
}
118123
}
119124
}
120-
125+
std::cout << "here tracker9" << std::endl;
121126
WriteOnlyRowsType betaBlock(betaTable, 0, nResponses);
122127
DAAL_CHECK_BLOCK_STATUS(betaBlock);
123128
algorithmFPType * beta = betaBlock.get();
124129

125130
DAAL_PROFILER_TASK(computeFinalize.copyBetaToResult);
126131
if (nBetasIntercept == nBetas)
127132
{
133+
std::cout << "here tracker10" << std::endl;
128134
for (size_t i = 0; i < nResponses; i++)
129135
{
130136
PRAGMA_OMP_SIMD
@@ -138,6 +144,7 @@ Status FinalizeKernel<algorithmFPType, cpu>::compute(const NumericTable & xtxTab
138144
}
139145
else
140146
{
147+
std::cout << "here tracker11" << std::endl;
141148
for (size_t i = 0; i < nResponses; i++)
142149
{
143150
PRAGMA_OMP_SIMD
@@ -167,14 +174,8 @@ template <typename algorithmFPType, CpuType cpu>
167174
Status FinalizeKernel<algorithmFPType, cpu>::solveSystem(DAAL_INT p, algorithmFPType * a, DAAL_INT ny, algorithmFPType * b,
168175
const ErrorID & internalError)
169176
{
170-
if (daal::algorithms::internal::solveSymmetricEquationsSystem<algorithmFPType, cpu>(a, b, p, ny, false))
171-
{
172-
return Status();
173-
}
174-
else
175-
{
176-
return Status(internalError);
177-
}
177+
daal::algorithms::internal::solveSymmetricEquationsSystem<algorithmFPType, cpu>(a, b, p, ny, false);
178+
return Status();
178179
}
179180

180181
} // namespace internal

cpp/daal/src/algorithms/linear_model/linear_model_train_qr_common_impl.i

100755100644
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Status CommonKernel<algorithmFPType, cpu>::computeWorkSize(DAAL_INT nRows, DAAL_
4848
algorithmFPType workLocal;
4949

5050
DAAL_INT lwork1 = -1;
51+
std::cout << "point1" << std::endl;
5152
LapackInst<algorithmFPType, cpu>::xxgerqf(&nCols, &nRows, NULL, &nCols, NULL, &workLocal, &lwork1, &info);
5253
DAAL_CHECK(info == 0, services::ErrorLinearRegressionInternal);
5354

@@ -58,6 +59,7 @@ Status CommonKernel<algorithmFPType, cpu>::computeWorkSize(DAAL_INT nRows, DAAL_
5859
DAAL_INT lwork2 = -1;
5960
LapackInst<algorithmFPType, cpu>::xxormrq(&side, &trans, &nResponses, &nRows, &nCols, NULL, &nCols, NULL, NULL, &nResponses, &workLocal, &lwork2,
6061
&info);
62+
std::cout << "point2" << std::endl;
6163
DAAL_CHECK(info == 0, services::ErrorLinearRegressionInternal);
6264

6365
lwork2 = (DAAL_INT)workLocal;
@@ -75,6 +77,7 @@ Status CommonKernel<algorithmFPType, cpu>::computeQRForBlock(DAAL_INT p, DAAL_IN
7577

7678
/* Calculate RQ decomposition of X */
7779
LapackInst<algorithmFPType, cpu>::xxgerqf(&p, &n, const_cast<algorithmFPType *>(x), &p, tau, work, &lwork, &info);
80+
std::cout << "point3" << std::endl;
7881
DAAL_CHECK(info == 0, services::ErrorLinearRegressionInternal);
7982

8083
/* Copy result into matrix R */
@@ -100,6 +103,7 @@ Status CommonKernel<algorithmFPType, cpu>::computeQRForBlock(DAAL_INT p, DAAL_IN
100103
char trans = 'T';
101104
LapackInst<algorithmFPType, cpu>::xxormrq(&side, &trans, &ny, &n, const_cast<DAAL_INT *>(&nRowsInR), const_cast<algorithmFPType *>(x + jOffset),
102105
&p, tau, const_cast<algorithmFPType *>(y), &ny, work, &lwork, &info);
106+
std::cout << "point4" << std::endl;
103107
DAAL_CHECK(info == 0, services::ErrorLinearRegressionInternal);
104108

105109
if (p > n)

cpp/daal/src/algorithms/linear_model/linear_model_train_qr_finalize_impl.i

100755100644
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Status FinalizeKernel<algorithmFPType, cpu>::compute(const NumericTable & rTable
104104
LapackInst<algorithmFPType, cpu>::xtrtrs(&up, &trans, &nodiag, const_cast<DAAL_INT *>(&nBetasIntercept), const_cast<DAAL_INT *>(&nResponses),
105105
const_cast<algorithmFPType *>(r), const_cast<DAAL_INT *>(&nBetasIntercept), betaBuffer,
106106
const_cast<DAAL_INT *>(&nBetasIntercept), &info);
107+
std::cout << "point5" << std::endl;
107108
DAAL_CHECK(info == 0, services::ErrorLinearRegressionInternal);
108109
}
109110

cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_impl.i

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ Status OnlineKernel<algorithmFPType, training::normEqDense, cpu>::finalizeComput
9191
NumericTable & beta, bool interceptFlag,
9292
const HyperparameterType * hyperparameter) const
9393
{
94+
std::cout << "here finalize compute" << std::endl;
9495
services::SharedPtr<linear_model::internal::Hyperparameter> lmHyperparameter;
9596
DAAL_CHECK_STATUS_VAR(linear_regression::internal::convert(hyperparameter, lmHyperparameter));
9697
return FinalizeKernelType::compute(xtx, xty, xtxFinal, xtyFinal, beta, interceptFlag, KernelHelper<algorithmFPType, cpu>(),

cpp/daal/src/algorithms/service_kernel_math.h

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ template <typename FPType, CpuType cpu>
691691
bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_t n, size_t nX, bool sequential)
692692
{
693693
DAAL_PROFILER_TASK_WITH_ARGS(solveEquationsSystemWithSpectralDecomposition, n, nX);
694+
std::cout << "solveEquationsSystemWithSpectralDecomposition1" << std::endl;
694695
/* Storage for the eigenvalues.
695696
Note: this allocates more size than they might require when nX > 1, because the same
696697
buffer will get reused later on and needs the extra size. Those additional entries
@@ -711,20 +712,22 @@ bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_
711712
FPType zero = 0;
712713
DAAL_INT info;
713714
DAAL_INT num_eigenvalues;
714-
715+
std::cout << "solveEquationsSystemWithSpectralDecomposition2" << std::endl;
715716
/* Query the procedure for size of required buffer */
716717
DAAL_INT lwork_query_indicator = -1;
717718
FPType buffer_size_work = 0;
718719
DAAL_INT buffer_size_iwork = 0;
719720
if (sequential)
720721
{
722+
std::cout << "solveEquationsSystemWithSpectralDecomposition3" << std::endl;
721723
LapackInst<FPType, cpu>::xxsyevr(&jobz, &range, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, nullptr, nullptr, nullptr, nullptr, &zero,
722724
&num_eigenvalues, eigenvalues.get(), eigenvectors.get(), (DAAL_INT *)&n, buffer_isuppz.get(),
723725
&buffer_size_work, &lwork_query_indicator, &buffer_size_iwork, &lwork_query_indicator, &info);
724726
}
725727

726728
else
727729
{
730+
std::cout << "solveEquationsSystemWithSpectralDecomposition4" << std::endl;
728731
LapackInst<FPType, cpu>::xsyevr(&jobz, &range, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, nullptr, nullptr, nullptr, nullptr, &zero,
729732
&num_eigenvalues, eigenvalues.get(), eigenvectors.get(), (DAAL_INT *)&n, buffer_isuppz.get(),
730733
&buffer_size_work, &lwork_query_indicator, &buffer_size_iwork, &lwork_query_indicator, &info);
@@ -746,34 +749,41 @@ bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_
746749
/* Perform Q*diag(l)*Q' factorization of A */
747750
if (sequential)
748751
{
752+
std::cout << "solveEquationsSystemWithSpectralDecomposition5" << std::endl;
749753
LapackInst<FPType, cpu>::xxsyevr(&jobz, &range, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, nullptr, nullptr, nullptr, nullptr, &zero,
750754
&num_eigenvalues, eigenvalues.get(), eigenvectors.get(), (DAAL_INT *)&n, buffer_isuppz.get(),
751755
work_buffer.get(), &work_buffer_size, iwork_buffer.get(), &buffer_size_iwork, &info);
752756
}
753757
else
754758
{
755-
LapackInst<FPType, cpu>::xsyevr(&jobz, &range, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, nullptr, nullptr, nullptr, nullptr, &zero,
756-
&num_eigenvalues, eigenvalues.get(), eigenvectors.get(), (DAAL_INT *)&n, buffer_isuppz.get(),
757-
work_buffer.get(), &work_buffer_size, iwork_buffer.get(), &buffer_size_iwork, &info);
759+
std::cout << "solveEquationsSystemWithSpectralDecomposition6" << std::endl;
760+
LapackInst<FPType, cpu>::xxsyevr(&jobz, &range, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, nullptr, nullptr, nullptr, nullptr, &zero,
761+
&num_eigenvalues, eigenvalues.get(), eigenvectors.get(), (DAAL_INT *)&n, buffer_isuppz.get(),
762+
work_buffer.get(), &work_buffer_size, iwork_buffer.get(), &buffer_size_iwork, &info);
763+
std::cout << "solveEquationsSystemWithSpectralDecomposition61" << std::endl;
758764
}
759765
if (info) return false;
760-
766+
std::cout << "solveEquationsSystemWithSpectralDecomposition62" << std::endl;
761767
/* Components with small singular values get eliminated using the exact same logic as 'gelsd' with default parameters
762768
Note: these are hard-coded versions of machine epsilon for single and double precision. They aren't obtained through
763769
'std::numeric_limits' in order to avoid potential template instantiation errors with some types. */
764770
const FPType eps = std::is_same<FPType, float>::value ? 1.1920929e-07 : 2.220446049250313e-16;
771+
std::cout << "solveEquationsSystemWithSpectralDecomposition621" << std::endl;
772+
std::cout << "eigen equals:" << eigenvalues[n - 1] << std::endl;
765773
if (eigenvalues[n - 1] <= eps) return false;
774+
std::cout << "solveEquationsSystemWithSpectralDecomposition63" << std::endl;
766775
const double component_threshold = eps * eigenvalues[n - 1];
767776
DAAL_INT num_discarded;
768777
for (num_discarded = 0; num_discarded < static_cast<DAAL_INT>(n) - 1; num_discarded++)
769778
{
770779
if (eigenvalues[num_discarded] > component_threshold) break;
771780
}
772-
781+
std::cout << "solveEquationsSystemWithSpectralDecomposition7" << std::endl;
773782
/* Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l)) */
774783
DAAL_INT num_taken = static_cast<DAAL_INT>(n) - num_discarded;
775784
daal::internal::MathInst<FPType, cpu>::vSqrt(num_taken, eigenvalues.get() + num_discarded, eigenvalues.get() + num_discarded);
776785
DAAL_INT one = 1;
786+
std::cout << "solveEquationsSystemWithSpectralDecomposition8" << std::endl;
777787
for (size_t col = num_discarded; col < n; col++)
778788
{
779789
const FPType scale = eigenvalues[col];
@@ -787,14 +797,15 @@ bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_
787797
LapackInst<FPType, cpu>::xrscl((DAAL_INT *)&n, &scale, eigenvectors.get() + col * n, &one);
788798
}
789799
}
790-
800+
std::cout << "solveEquationsSystemWithSpectralDecomposition9" << std::endl;
791801
/* Now calculate the actual solution: Qis * Qis' * B */
792802
char trans_yes = 'T';
793803
char trans_no = 'N';
794804
FPType one_fp = 1;
795805
const size_t eigenvectors_offset = static_cast<size_t>(num_discarded) * n;
796806
if (sequential)
797807
{
808+
std::cout << "solveEquationsSystemWithSpectralDecomposition12" << std::endl;
798809
if (nX == 1)
799810
{
800811
BlasInst<FPType, cpu>::xxgemv(&trans_yes, (DAAL_INT *)&n, &num_taken, &one_fp, eigenvectors.get() + eigenvectors_offset, (DAAL_INT *)&n,
@@ -813,19 +824,19 @@ bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_
813824
(DAAL_INT *)&n);
814825
}
815826
}
816-
817827
else
818828
{
829+
std::cout << "solveEquationsSystemWithSpectralDecomposition13" << std::endl;
819830
if (nX == 1)
820831
{
821832
BlasInst<FPType, cpu>::xgemv(&trans_yes, (DAAL_INT *)&n, &num_taken, &one_fp, eigenvectors.get() + eigenvectors_offset, (DAAL_INT *)&n, b,
822833
&one, &zero, eigenvalues.get(), &one);
823834
BlasInst<FPType, cpu>::xgemv(&trans_no, (DAAL_INT *)&n, &num_taken, &one_fp, eigenvectors.get() + eigenvectors_offset, (DAAL_INT *)&n,
824835
eigenvalues.get(), &one, &zero, b, &one);
825836
}
826-
827837
else
828838
{
839+
std::cout << "solveEquationsSystemWithSpectralDecomposition11" << std::endl;
829840
BlasInst<FPType, cpu>::xgemm(&trans_yes, &trans_no, &num_taken, (DAAL_INT *)&nX, (DAAL_INT *)&n, &one_fp,
830841
eigenvectors.get() + eigenvectors_offset, (DAAL_INT *)&n, b, (DAAL_INT *)&n, &zero, eigenvalues.get(),
831842
&num_taken);
@@ -841,29 +852,36 @@ bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_
841852
template <typename FPType, CpuType cpu>
842853
bool solveSymmetricEquationsSystem(FPType * a, FPType * b, size_t n, size_t nX, bool sequential)
843854
{
855+
std::cout << "here symmetric system1" << std::endl;
844856
DAAL_PROFILER_TASK_WITH_ARGS(solveSymmetricEquationsSystem, n, nX);
845857
/* Copy data for fallback from Cholesky to spectral decomposition */
846858
TArrayScalable<FPType, cpu> aCopy(n * n);
847859
TArrayScalable<FPType, cpu> bCopy(n * nX);
848860
DAAL_CHECK_MALLOC(aCopy.get());
849861
DAAL_CHECK_MALLOC(bCopy.get());
850-
862+
std::cout << "here symmetric system2" << std::endl;
851863
int copy_status = services::internal::daal_memcpy_s(aCopy.get(), n * n * sizeof(FPType), a, n * n * sizeof(FPType));
852864
copy_status += services::internal::daal_memcpy_s(bCopy.get(), n * nX * sizeof(FPType), b, n * nX * sizeof(FPType));
853865

854866
if (copy_status != 0) return false;
855-
867+
std::cout << "here symmetric system3" << std::endl;
856868
/* Try to solve with Cholesky factorization */
857869
if (!solveEquationsSystemWithCholesky<FPType, cpu>(a, b, n, nX, sequential))
858870
{
871+
std::cout << "here symmetric system4" << std::endl;
859872
/* Fall back to spectral decomposition */
860873
bool status = solveEquationsSystemWithSpectralDecomposition<FPType, cpu>(aCopy.get(), bCopy.get(), n, nX, sequential);
874+
std::cout << "here symmetric system5" << std::endl;
861875
if (status)
862876
{
877+
std::cout << "here symmetric system5" << std::endl;
863878
status = status && (services::internal::daal_memcpy_s(b, n * nX * sizeof(FPType), bCopy.get(), n * nX * sizeof(FPType)) == 0);
879+
std::cout << "here symmetric system6" << std::endl;
864880
}
881+
std::cout << "here symmetric system5555" << std::endl;
865882
return status;
866883
}
884+
std::cout << "here symmetric system55556" << std::endl;
867885
return true;
868886
}
869887
} // namespace internal

0 commit comments

Comments
 (0)