@@ -58,14 +58,24 @@ def _find_best_split(self, X, target, n_features):
5858 gain = self .criterion (target ["y" ], splits )
5959 else :
6060 # Gradient boosting
61- left , right = split_dataset (X , target , column , value , return_X = False )
61+ left , right = split_dataset (
62+ X , target , column , value , return_X = False
63+ )
6264 gain = xgb_criterion (target , left , right , self .loss )
6365
6466 if (max_gain is None ) or (gain > max_gain ):
6567 max_col , max_val , max_gain = column , value , gain
6668 return max_col , max_val , max_gain
6769
68- def _train (self , X , target , max_features = None , min_samples_split = 10 , max_depth = None , minimum_gain = 0.01 ):
70+ def _train (
71+ self ,
72+ X ,
73+ target ,
74+ max_features = None ,
75+ min_samples_split = 10 ,
76+ max_depth = None ,
77+ minimum_gain = 0.01 ,
78+ ):
6979 try :
7080 # Exit from recursion using assert syntax
7181 assert X .shape [0 ] > min_samples_split
@@ -86,22 +96,43 @@ def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=N
8696 self .impurity = gain
8797
8898 # Split dataset
89- left_X , right_X , left_target , right_target = split_dataset (X , target , column , value )
99+ left_X , right_X , left_target , right_target = split_dataset (
100+ X , target , column , value
101+ )
90102
91103 # Grow left and right child
92104 self .left_child = Tree (self .regression , self .criterion , self .n_classes )
93105 self .left_child ._train (
94- left_X , left_target , max_features , min_samples_split , max_depth - 1 , minimum_gain
106+ left_X ,
107+ left_target ,
108+ max_features ,
109+ min_samples_split ,
110+ max_depth - 1 ,
111+ minimum_gain ,
95112 )
96113
97114 self .right_child = Tree (self .regression , self .criterion , self .n_classes )
98115 self .right_child ._train (
99- right_X , right_target , max_features , min_samples_split , max_depth - 1 , minimum_gain
116+ right_X ,
117+ right_target ,
118+ max_features ,
119+ min_samples_split ,
120+ max_depth - 1 ,
121+ minimum_gain ,
100122 )
101123 except AssertionError :
102124 self ._calculate_leaf_value (target )
103125
104- def train (self , X , target , max_features = None , min_samples_split = 10 , max_depth = None , minimum_gain = 0.01 , loss = None ):
126+ def train (
127+ self ,
128+ X ,
129+ target ,
130+ max_features = None ,
131+ min_samples_split = 10 ,
132+ max_depth = None ,
133+ minimum_gain = 0.01 ,
134+ loss = None ,
135+ ):
105136 """Build a decision tree from training set.
106137
107138 Parameters
@@ -131,11 +162,16 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
131162 self .loss = loss
132163
133164 if not self .regression :
134- self .n_classes = len (np .unique (target ['y' ]))
135-
136- self ._train (X , target , max_features = max_features , min_samples_split = min_samples_split ,
137- max_depth = max_depth , minimum_gain = minimum_gain )
165+ self .n_classes = len (np .unique (target ["y" ]))
138166
167+ self ._train (
168+ X ,
169+ target ,
170+ max_features = max_features ,
171+ min_samples_split = min_samples_split ,
172+ max_depth = max_depth ,
173+ minimum_gain = minimum_gain ,
174+ )
139175
140176 def _calculate_leaf_value (self , targets ):
141177 """Find optimal value for leaf."""
@@ -149,7 +185,10 @@ def _calculate_leaf_value(self, targets):
149185 self .outcome = np .mean (targets ["y" ])
150186 else :
151187 # Probability for classification task
152- self .outcome = np .bincount (targets ["y" ], minlength = self .n_classes ) / targets ["y" ].shape [0 ]
188+ self .outcome = (
189+ np .bincount (targets ["y" ], minlength = self .n_classes )
190+ / targets ["y" ].shape [0 ]
191+ )
153192
154193 def predict_row (self , row ):
155194 """Predict single row."""
0 commit comments