Skip to content

Commit 4e93532

Browse files
committed
Format using ruff
1 parent edf9a13 commit 4e93532

32 files changed

+397
-110
lines changed

mla/datasets/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ def load(dataset="training", digits=np.arange(10)):
3939
images = zeros((N, rows, cols), dtype=uint8)
4040
labels = zeros((N, 1), dtype=int8)
4141
for i in range(len(ind)):
42-
images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
42+
images[i] = array(
43+
img[ind[i] * rows * cols : (ind[i] + 1) * rows * cols]
44+
).reshape((rows, cols))
4345
labels[i] = lbl[ind[i]]
4446

4547
return images, labels
@@ -64,7 +66,7 @@ def load_nietzsche():
6466
sentences = []
6567
next_chars = []
6668
for i in range(0, len(text) - maxlen, step):
67-
sentences.append(text[i: i + maxlen])
69+
sentences.append(text[i : i + maxlen])
6870
next_chars.append(text[i + maxlen])
6971

7072
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)

mla/ensemble/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,20 @@ def f_entropy(p):
1414

1515

1616
def information_gain(y, splits):
17-
splits_entropy = sum([f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits])
17+
splits_entropy = sum(
18+
[f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits]
19+
)
1820
return f_entropy(y) - splits_entropy
1921

2022

2123
def mse_criterion(y, splits):
2224
y_mean = np.mean(y)
23-
return -sum([np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0]) for split in splits])
25+
return -sum(
26+
[
27+
np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0])
28+
for split in splits
29+
]
30+
)
2431

2532

2633
def xgb_criterion(y, left, right, loss):

mla/ensemble/gbm.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# coding:utf-8
22
import numpy as np
3+
34
# logistic function
45
from scipy.special import expit
56

@@ -32,7 +33,9 @@ def hess(self, actual, predicted):
3233

3334
def approximate(self, actual, predicted):
3435
"""Approximate leaf value."""
35-
return self.grad(actual, predicted).sum() / (self.hess(actual, predicted).sum() + self.regularization)
36+
return self.grad(actual, predicted).sum() / (
37+
self.hess(actual, predicted).sum() + self.regularization
38+
)
3639

3740
def transform(self, pred):
3841
"""Transform predictions values."""
@@ -73,7 +76,14 @@ def transform(self, output):
7376
class GradientBoosting(BaseEstimator):
7477
"""Gradient boosting trees with Taylor's expansion approximation (as in xgboost)."""
7578

76-
def __init__(self, n_estimators, learning_rate=0.1, max_features=10, max_depth=2, min_samples_split=10):
79+
def __init__(
80+
self,
81+
n_estimators,
82+
learning_rate=0.1,
83+
max_features=10,
84+
max_depth=2,
85+
min_samples_split=10,
86+
):
7787
self.min_samples_split = min_samples_split
7888
self.learning_rate = learning_rate
7989
self.max_depth = max_depth

mla/ensemble/random_forest.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,14 @@
77

88

99
class RandomForest(BaseEstimator):
10-
def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion=None):
10+
def __init__(
11+
self,
12+
n_estimators=10,
13+
max_features=None,
14+
min_samples_split=10,
15+
max_depth=None,
16+
criterion=None,
17+
):
1118
"""Base class for RandomForest.
1219
1320
Parameters
@@ -44,15 +51,22 @@ def _train(self):
4451
self.y,
4552
max_features=self.max_features,
4653
min_samples_split=self.min_samples_split,
47-
max_depth=self.max_depth
54+
max_depth=self.max_depth,
4855
)
4956

5057
def _predict(self, X=None):
5158
raise NotImplementedError()
5259

5360

5461
class RandomForestClassifier(RandomForest):
55-
def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="entropy"):
62+
def __init__(
63+
self,
64+
n_estimators=10,
65+
max_features=None,
66+
min_samples_split=10,
67+
max_depth=None,
68+
criterion="entropy",
69+
):
5670
super(RandomForestClassifier, self).__init__(
5771
n_estimators=n_estimators,
5872
max_features=max_features,
@@ -85,7 +99,14 @@ def _predict(self, X=None):
8599

86100

87101
class RandomForestRegressor(RandomForest):
88-
def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="mse"):
102+
def __init__(
103+
self,
104+
n_estimators=10,
105+
max_features=None,
106+
min_samples_split=10,
107+
max_depth=None,
108+
criterion="mse",
109+
):
89110
super(RandomForestRegressor, self).__init__(
90111
n_estimators=n_estimators,
91112
max_features=max_features,

mla/ensemble/tree.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,24 @@ def _find_best_split(self, X, target, n_features):
5858
gain = self.criterion(target["y"], splits)
5959
else:
6060
# Gradient boosting
61-
left, right = split_dataset(X, target, column, value, return_X=False)
61+
left, right = split_dataset(
62+
X, target, column, value, return_X=False
63+
)
6264
gain = xgb_criterion(target, left, right, self.loss)
6365

6466
if (max_gain is None) or (gain > max_gain):
6567
max_col, max_val, max_gain = column, value, gain
6668
return max_col, max_val, max_gain
6769

68-
def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01):
70+
def _train(
71+
self,
72+
X,
73+
target,
74+
max_features=None,
75+
min_samples_split=10,
76+
max_depth=None,
77+
minimum_gain=0.01,
78+
):
6979
try:
7080
# Exit from recursion using assert syntax
7181
assert X.shape[0] > min_samples_split
@@ -86,22 +96,43 @@ def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=N
8696
self.impurity = gain
8797

8898
# Split dataset
89-
left_X, right_X, left_target, right_target = split_dataset(X, target, column, value)
99+
left_X, right_X, left_target, right_target = split_dataset(
100+
X, target, column, value
101+
)
90102

91103
# Grow left and right child
92104
self.left_child = Tree(self.regression, self.criterion, self.n_classes)
93105
self.left_child._train(
94-
left_X, left_target, max_features, min_samples_split, max_depth - 1, minimum_gain
106+
left_X,
107+
left_target,
108+
max_features,
109+
min_samples_split,
110+
max_depth - 1,
111+
minimum_gain,
95112
)
96113

97114
self.right_child = Tree(self.regression, self.criterion, self.n_classes)
98115
self.right_child._train(
99-
right_X, right_target, max_features, min_samples_split, max_depth - 1, minimum_gain
116+
right_X,
117+
right_target,
118+
max_features,
119+
min_samples_split,
120+
max_depth - 1,
121+
minimum_gain,
100122
)
101123
except AssertionError:
102124
self._calculate_leaf_value(target)
103125

104-
def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01, loss=None):
126+
def train(
127+
self,
128+
X,
129+
target,
130+
max_features=None,
131+
min_samples_split=10,
132+
max_depth=None,
133+
minimum_gain=0.01,
134+
loss=None,
135+
):
105136
"""Build a decision tree from training set.
106137
107138
Parameters
@@ -131,11 +162,16 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
131162
self.loss = loss
132163

133164
if not self.regression:
134-
self.n_classes = len(np.unique(target['y']))
135-
136-
self._train(X, target, max_features=max_features, min_samples_split=min_samples_split,
137-
max_depth=max_depth, minimum_gain=minimum_gain)
165+
self.n_classes = len(np.unique(target["y"]))
138166

167+
self._train(
168+
X,
169+
target,
170+
max_features=max_features,
171+
min_samples_split=min_samples_split,
172+
max_depth=max_depth,
173+
minimum_gain=minimum_gain,
174+
)
139175

140176
def _calculate_leaf_value(self, targets):
141177
"""Find optimal value for leaf."""
@@ -149,7 +185,10 @@ def _calculate_leaf_value(self, targets):
149185
self.outcome = np.mean(targets["y"])
150186
else:
151187
# Probability for classification task
152-
self.outcome = np.bincount(targets["y"], minlength=self.n_classes) / targets["y"].shape[0]
188+
self.outcome = (
189+
np.bincount(targets["y"], minlength=self.n_classes)
190+
/ targets["y"].shape[0]
191+
)
153192

154193
def predict_row(self, row):
155194
"""Predict single row."""

mla/fm.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from mla.base import BaseEstimator
77
from mla.metrics import mean_squared_error, binary_crossentropy
88

9+
910
np.random.seed(9999)
1011

1112
"""
@@ -16,7 +17,14 @@
1617

1718
class BaseFM(BaseEstimator):
1819
def __init__(
19-
self, n_components=10, max_iter=100, init_stdev=0.1, learning_rate=0.01, reg_v=0.1, reg_w=0.5, reg_w0=0.0
20+
self,
21+
n_components=10,
22+
max_iter=100,
23+
init_stdev=0.1,
24+
learning_rate=0.01,
25+
reg_v=0.1,
26+
reg_w=0.5,
27+
reg_w0=0.0,
2028
):
2129
"""Simplified factorization machines implementation using SGD optimizer."""
2230
self.reg_w0 = reg_w0
@@ -36,7 +44,9 @@ def fit(self, X, y=None):
3644
# Feature weights
3745
self.w = np.zeros(self.n_features)
3846
# Factor weights
39-
self.v = np.random.normal(scale=self.init_stdev, size=(self.n_features, self.n_components))
47+
self.v = np.random.normal(
48+
scale=self.init_stdev, size=(self.n_features, self.n_components)
49+
)
4050
self._train()
4151

4252
def _train(self):
@@ -56,7 +66,9 @@ def _factor_step(self, loss):
5666

5767
def _predict(self, X=None):
5868
linear_output = np.dot(X, self.w)
59-
factors_output = np.sum(np.dot(X, self.v) ** 2 - np.dot(X ** 2, self.v ** 2), axis=1) / 2.0
69+
factors_output = (
70+
np.sum(np.dot(X, self.v) ** 2 - np.dot(X**2, self.v**2), axis=1) / 2.0
71+
)
6072
return self.wo + linear_output + factors_output
6173

6274

mla/gaussian_mixture.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ def _initialize(self):
6868
"""
6969
self.weights = np.ones(self.K)
7070
if self.init == "random":
71-
self.means = [self.X[x] for x in random.sample(range(self.n_samples), self.K)]
71+
self.means = [
72+
self.X[x] for x in random.sample(range(self.n_samples), self.K)
73+
]
7274
self.covs = [np.cov(self.X.T) for _ in range(self.K)]
7375

7476
elif self.init == "kmeans":
@@ -106,7 +108,9 @@ def _M_step(self):
106108

107109
def _is_converged(self):
108110
"""Check if the difference of the latest two likelihood is less than the tolerance."""
109-
if (len(self.likelihood) > 1) and (self.likelihood[-1] - self.likelihood[-2] <= self.tolerance):
111+
if (len(self.likelihood) > 1) and (
112+
self.likelihood[-1] - self.likelihood[-2] <= self.tolerance
113+
):
110114
return True
111115
return False
112116

@@ -123,7 +127,9 @@ def _get_likelihood(self, data):
123127
n_data = data.shape[0]
124128
likelihoods = np.zeros([n_data, self.K])
125129
for c in range(self.K):
126-
likelihoods[:, c] = multivariate_normal.pdf(data, self.means[c], self.covs[c])
130+
likelihoods[:, c] = multivariate_normal.pdf(
131+
data, self.means[c], self.covs[c]
132+
)
127133
return likelihoods
128134

129135
def _get_weighted_likelihood(self, likelihood):
@@ -151,7 +157,9 @@ def plot(self, data=None, ax=None, holdon=False):
151157
margin = 0.2
152158
xmax, ymax = self.X.max(axis=0) + margin
153159
xmin, ymin = self.X.min(axis=0) - margin
154-
axis_X, axis_Y = np.meshgrid(np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta))
160+
axis_X, axis_Y = np.meshgrid(
161+
np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta)
162+
)
155163

156164
def grid_gaussian_pdf(mean, cov):
157165
grid_array = np.array(list(zip(axis_X.flatten(), axis_Y.flatten())))

mla/kmeans.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ def _initialize_centroids(self, init):
5353
"""Set the initial centroids."""
5454

5555
if init == "random":
56-
self.centroids = [self.X[x] for x in random.sample(range(self.n_samples), self.K)]
56+
self.centroids = [
57+
self.X[x] for x in random.sample(range(self.n_samples), self.K)
58+
]
5759
elif init == "++":
5860
self.centroids = [random.choice(self.X)]
5961
while len(self.centroids) < self.K:
@@ -88,7 +90,6 @@ def _get_predictions(self):
8890
return predictions
8991

9092
def _assign(self, centroids):
91-
9293
for row in range(self.n_samples):
9394
for i, cluster in enumerate(self.clusters):
9495
if row in cluster:
@@ -115,11 +116,13 @@ def _get_centroid(self, cluster):
115116

116117
def _dist_from_centers(self):
117118
"""Calculate distance from centers."""
118-
return np.array([min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X])
119+
return np.array(
120+
[min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X]
121+
)
119122

120123
def _choose_next_center(self):
121124
distances = self._dist_from_centers()
122-
squared_distances = distances ** 2
125+
squared_distances = distances**2
123126
probs = squared_distances / squared_distances.sum()
124127
ind = np.random.choice(self.X.shape[0], 1, p=probs)[0]
125128
return self.X[ind]
@@ -141,7 +144,12 @@ def plot(self, ax=None, holdon=False):
141144

142145
for i, index in enumerate(self.clusters):
143146
point = np.array(data[index]).T
144-
ax.scatter(*point, c=[palette[i], ])
147+
ax.scatter(
148+
*point,
149+
c=[
150+
palette[i],
151+
],
152+
)
145153

146154
for point in self.centroids:
147155
ax.scatter(*point, marker="x", linewidths=10)

mla/knn.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ def _predict_x(self, x):
4040
distances = (self.distance_func(x, example) for example in self.X)
4141

4242
# Sort all examples by their distance to x and keep their target value.
43-
neighbors = sorted(((dist, target) for (dist, target) in zip(distances, self.y)), key=lambda x: x[0])
43+
neighbors = sorted(
44+
((dist, target) for (dist, target) in zip(distances, self.y)),
45+
key=lambda x: x[0],
46+
)
4447

4548
# Get targets of the k-nn and aggregate them (most common one or
4649
# average).

0 commit comments

Comments
 (0)