Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ services::Status PredictBinaryClassificationTask<algorithmFPType, cpu>::run(cons
services::HostAppIface * pHostApp, bool predShapContributions,
bool predShapInteractions)
{
// std::cerr << "PredictBinaryClassificationTask" << std::endl;
// assert we're not requesting both contributions and interactions
DAAL_ASSERT(!(predShapContributions && predShapInteractions));

Expand Down Expand Up @@ -468,6 +469,7 @@ services::Status PredictBinaryClassificationTask<algorithmFPType, cpu>::run(cons
PRAGMA_VECTOR_ALWAYS
for (size_t iRow = startRow; iRow < finishRow; ++iRow)
{
// TODO: fix indicies - probaly this is 0/1 probability no fix is required
res[iRow] = label[services::internal::SignBit<algorithmFPType, cpu>::get(res[iRow])];
prob_pred[2 * iRow + 1] = expVal[iRow] / (algorithmFPType(1.) + expVal[iRow]);
prob_pred[2 * iRow] = algorithmFPType(1.) - prob_pred[2 * iRow + 1];
Expand Down Expand Up @@ -496,6 +498,7 @@ services::Status PredictBinaryClassificationTask<algorithmFPType, cpu>::run(cons
daal::internal::MathInst<algorithmFPType, cpu>::vExp(finishRow - startRow, expVal + startRow, expVal + startRow);
for (size_t iRow = startRow; iRow < finishRow; ++iRow)
{
// TODO: fix - probaly this is 0/1 probability no fix is required
prob_pred[2 * iRow + 1] = expVal[iRow] / (algorithmFPType(1.) + expVal[iRow]);
prob_pred[2 * iRow] = algorithmFPType(1.) - prob_pred[2 * iRow + 1];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ public:
for (size_t i = start; i < end; i++)
{
const algorithmFPType sigm = algorithmFPType(1.0) / (algorithmFPType(1.0) + exp[i]);
// TODO: fix
// UPD: probably these are just pairs (gradient/hessian) that are stored in one array
gh[2 * sampleInd[i]] = sigm - y[sampleInd[i]]; //gradient
gh[2 * sampleInd[i] + 1] = sigm * (algorithmFPType(1.0) - sigm); //hessian
}
Expand All @@ -111,6 +113,7 @@ public:
PRAGMA_VECTOR_ALWAYS
for (size_t i = start; i < end; i++)
{
// TODO: fix
const auto sigm = algorithmFPType(1.0) / (algorithmFPType(1.0) + exp[i]);
gh[2 * i] = sigm - y[i]; //gradient
gh[2 * i + 1] = sigm * (algorithmFPType(1.0) - sigm); //hessian
Expand Down Expand Up @@ -145,6 +148,8 @@ public:
{
const algorithmFPType pk = p[k];
const algorithmFPType h = algorithmFPType(2.) * pk * (algorithmFPType(1.) - pk);
// TODO: fix here
// UPD: probably these are just pairs (gradient/hessian) that are stored in one array
algorithmFPType * gh_ik = gh + 2 * (k * nRows + iSample);
gh_ik[1] = h;
if (size_t(y[iSample]) == k)
Expand Down
95 changes: 63 additions & 32 deletions cpp/daal/src/algorithms/dtrees/gbt/gbt_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

#include "services/daal_defines.h"
#include "src/algorithms/dtrees/gbt/gbt_model_impl.h"
#include <cassert>
#include <iostream>
#include <utility>

using namespace daal::data_management;
using namespace daal::services;
Expand Down Expand Up @@ -58,6 +61,7 @@ size_t ModelImpl::numberOfTrees() const

void ModelImpl::traverseDF(size_t iTree, algorithms::regression::TreeNodeVisitor & visitor) const
{
// std::cerr << "traverseDF" << std::endl;
if (iTree >= size()) return;

const GbtDecisionTree & gbtTree = *at(iTree);
Expand All @@ -78,6 +82,7 @@ void ModelImpl::traverseDF(size_t iTree, algorithms::regression::TreeNodeVisitor

void ModelImpl::traverseBF(size_t iTree, algorithms::regression::TreeNodeVisitor & visitor) const
{
// std::cerr << "traverseBF" << std::endl;
if (iTree >= size()) return;

const GbtDecisionTree & gbtTree = *at(iTree);
Expand All @@ -103,6 +108,7 @@ void ModelImpl::traverseBF(size_t iTree, algorithms::regression::TreeNodeVisitor

void ModelImpl::traverseBFS(size_t iTree, tree_utils::regression::TreeNodeVisitor & visitor) const
{
// std::cerr << "traverseBFS" << std::endl;
if (iTree >= size()) return;

const GbtDecisionTree & gbtTree = *at(iTree);
Expand Down Expand Up @@ -143,6 +149,7 @@ void ModelImpl::traverseBFS(size_t iTree, tree_utils::regression::TreeNodeVisito

void ModelImpl::traverseDFS(size_t iTree, tree_utils::regression::TreeNodeVisitor & visitor) const
{
// std::cerr << "traverseDFS" << std::endl;
if (iTree >= size()) return;

const GbtDecisionTree & gbtTree = *at(iTree);
Expand Down Expand Up @@ -225,39 +232,49 @@ void ModelImpl::destroy()
super::destroy();
}

bool ModelImpl::nodeIsDummyLeaf(size_t nodeIndex, const GbtDecisionTree & gbtTree)
{
const size_t childArrayIndex = nodeIndex - 1;
const ModelFPType * splitPoints = gbtTree.getSplitPoints();
const FeatureIndexType * splitFeatures = gbtTree.getFeatureIndexesForSplit();

if (childArrayIndex)
{
// check if child node has same split feature and split value as parent
const size_t parent = getIdxOfParent(nodeIndex);
const size_t parentArrayIndex = parent - 1;
return splitPoints[parentArrayIndex] == splitPoints[childArrayIndex] && splitFeatures[parentArrayIndex] == splitFeatures[childArrayIndex];
}
return false;
}
// bool ModelImpl::nodeIsDummyLeaf(size_t nodeIndex, const GbtDecisionTree & gbtTree)
// {
// std::cerr << "This function should not be called" << std::endl;
// assert(0);
// // TODO fix - function is redundant
// const size_t childArrayIndex = nodeIndex - 1;
// const ModelFPType * splitPoints = gbtTree.getSplitPoints();
// const FeatureIndexType * splitFeatures = gbtTree.getFeatureIndexesForSplit();

// if (childArrayIndex)
// {
// // check if child node has same split feature and split value as parent
// const size_t parent = getIdxOfParent(nodeIndex);
// const size_t parentArrayIndex = parent - 1;
// return splitPoints[parentArrayIndex] == splitPoints[childArrayIndex] && splitFeatures[parentArrayIndex] == splitFeatures[childArrayIndex];
// }
// return false;
// }

bool ModelImpl::nodeIsLeaf(size_t idx, const GbtDecisionTree & gbtTree, const size_t lvl)
{
if (lvl == gbtTree.getMaxLvl())
{
return true;
}
else if (nodeIsDummyLeaf(2 * idx, gbtTree)) // check, that left son is dummy
{
return true;
}
return false;
// TODO: fix
const size_t * leftChildIdx = gbtTree.getLeftChildIndexes();
// std::cerr << "nodeIsLeaf: " << idx - 1 << " " << leftChildIdx[idx - 1] << std::endl;
return leftChildIdx[idx - 1] == idx;
// if (lvl == gbtTree.getMaxLvl())
// {
// return true;
// }
// else if (nodeIsDummyLeaf(2 * idx, gbtTree)) // check, that left son is dummy
// {
// return true;
// }
// return false;
}

size_t ModelImpl::getIdxOfParent(const size_t childIdx)
{
return childIdx / 2;
}
// size_t ModelImpl::getIdxOfParent(const size_t childIdx)
// {
// // TODO fix
// std::cerr << "This function should not be called" << std::endl;
// assert(0);
// return childIdx / 2;
// }

void ModelImpl::decisionTreeToGbtTree(const DecisionTreeTable & tree, GbtDecisionTree & newTree)
{
Expand All @@ -273,6 +290,8 @@ void ModelImpl::decisionTreeToGbtTree(const DecisionTreeTable & tree, GbtDecisio
NodeType * parents = parentsArr.data();

ModelFPType * const splitPoints = newTree.getSplitPoints();
size_t * const leftChildIndexes = newTree.getLeftChildIndexes();
SplitLeftIdPair* const SplitAndLeftIds = newTree.getSplitsAndLeftIds();
FeatureIndexType * const featureIndexes = newTree.getFeatureIndexesForSplit();
ModelFPType * const nodeCoverValues = newTree.getNodeCoverValues();
int * const defaultLeft = newTree.getDefaultLeftForSplit();
Expand All @@ -286,6 +305,7 @@ void ModelImpl::decisionTreeToGbtTree(const DecisionTreeTable & tree, GbtDecisio
size_t nParents = 1;
parents[0] = arr;
size_t idxInTable = 0;
size_t idxChild = 2;

for (size_t lvl = 0; lvl < nLvls + 1; ++lvl)
{
Expand All @@ -302,18 +322,29 @@ void ModelImpl::decisionTreeToGbtTree(const DecisionTreeTable & tree, GbtDecisio
nodeCoverValues[idxInTable] = p->cover;
defaultLeft[idxInTable] = p->defaultLeft;
DAAL_ASSERT(featureIndexes[idxInTable] >= 0);
splitPoints[idxInTable] = p->featureValueOrResponse;
splitPoints[idxInTable] = p->featureValueOrResponse;
leftChildIndexes[idxInTable] = idxChild;
SplitAndLeftIds[idxInTable].leftId = idxChild;
SplitAndLeftIds[idxInTable].splitPoint = p->featureValueOrResponse;
idxChild += 2;
}
else
{
sons[nSons++] = p;
sons[nSons++] = p;
//sons[nSons++] = p;
//sons[nSons++] = p;
featureIndexes[idxInTable] = 0;
nodeCoverValues[idxInTable] = p->cover;
defaultLeft[idxInTable] = 0;
splitPoints[idxInTable] = p->featureValueOrResponse;
}

//leftChildIndexes[idxInTable] = idxChild;
//idxChild += 2;
leftChildIndexes[idxInTable] = idxInTable + 1;

SplitAndLeftIds[idxInTable].leftId = idxInTable + 1;
SplitAndLeftIds[idxInTable].splitPoint = p->featureValueOrResponse;
}
// leftChildIndexes[idxInTable] = (idxInTable + 1) * 2;
idxInTable++;
}
swap(parents, sons);
Expand Down
Loading
Loading