Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ jobs:
gensim \
opt-einsum \
nltk \
fvcore
fvcore \
scikit-optimize
kill $KA
cd src/main/python
python -m unittest discover -s tests/scuro -p 'test_*.py' -v
21 changes: 3 additions & 18 deletions src/main/python/systemds/scuro/dataloader/video_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,6 @@ def __init__(

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
self.file_sanity_check(file)
# if not self.load_data_from_file:
# self.metadata[file] = self.modality_type.create_metadata(
# 30, 10, 100, 100, 3
# )
# else:
cap = cv2.VideoCapture(file)

if not cap.isOpened():
Expand All @@ -71,13 +66,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
self.fps, length, width, height, num_channels
)

num_frames = (length + frame_interval - 1) // frame_interval

stacked_frames = np.zeros(
(num_frames, height, width, num_channels), dtype=self._data_type
)

frame_idx = 0
frames = []
idx = 0
while cap.isOpened():
ret, frame = cap.read()
Expand All @@ -87,11 +76,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
if idx % frame_interval == 0:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = frame.astype(self._data_type) / 255.0
stacked_frames[frame_idx] = frame
frame_idx += 1
frames.append(frame)
idx += 1

if frame_idx < num_frames:
stacked_frames = stacked_frames[:frame_idx]

self.data.append(stacked_frames)
self.data.append(np.stack(frames))
62 changes: 54 additions & 8 deletions src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
#
# -------------------------------------------------------------
from typing import Dict, List, Tuple, Any, Optional
import numpy as np
from sklearn.model_selection import ParameterGrid
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
import json
import logging
from dataclasses import dataclass
import time
import copy

from systemds.scuro.modality.modality import Modality
from systemds.scuro.drsearch.task import Task


@dataclass
Expand Down Expand Up @@ -163,18 +163,64 @@ def visit_node(node_id):
start_time = time.time()
rep_name = "_".join([rep.__name__ for rep in reps])

param_grid = list(ParameterGrid(hyperparams))
if max_evals and len(param_grid) > max_evals:
np.random.shuffle(param_grid)
param_grid = param_grid[:max_evals]
search_space = []
param_names = []
for param_name, param_values in hyperparams.items():
param_names.append(param_name)
if isinstance(param_values, list):
if all(isinstance(v, (int, float)) for v in param_values):
if all(isinstance(v, int) for v in param_values):
search_space.append(
Integer(
min(param_values), max(param_values), name=param_name
)
)
else:
search_space.append(
Real(min(param_values), max(param_values), name=param_name)
)
else:
search_space.append(Categorical(param_values, name=param_name))
elif isinstance(param_values, tuple) and len(param_values) == 2:
if isinstance(param_values[0], int) and isinstance(
param_values[1], int
):
search_space.append(
Integer(param_values[0], param_values[1], name=param_name)
)
else:
search_space.append(
Real(param_values[0], param_values[1], name=param_name)
)
else:
search_space.append(Categorical([param_values], name=param_name))

n_calls = max_evals if max_evals else 50

all_results = []
for params in param_grid:

@use_named_args(search_space)
def objective(**params):
result = self.evaluate_dag_config(
dag, params, node_order, modality_ids, task
)
all_results.append(result)

score = result[1].average_scores[self.scoring_metric]
if self.maximize_metric:
return -score
else:
return score

result = gp_minimize(
objective,
search_space,
n_calls=n_calls,
random_state=42,
verbose=self.debug,
n_initial_points=min(10, n_calls // 2),
)

if self.maximize_metric:
best_params, best_score = max(
all_results, key=lambda x: x[1].average_scores[self.scoring_metric]
Expand Down
30 changes: 16 additions & 14 deletions src/main/python/systemds/scuro/modality/unimodal_modality.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,26 +156,28 @@ def apply_representation(self, representation):
if current_length < target_length:
padding_needed = target_length - current_length
if pad_dim_one:
padding = np.zeros((embeddings.shape[0], padding_needed))
padded_embeddings.append(
np.concatenate((embeddings, padding), axis=1)
padded = np.pad(
embeddings,
((0, 0), (0, padding_needed)),
mode="constant",
constant_values=0,
)
padded_embeddings.append(padded)
else:
if len(embeddings.shape) == 1:
padded = np.zeros(
embeddings.shape[0] + padding_needed,
dtype=embeddings.dtype,
padded = np.pad(
embeddings,
(0, padding_needed),
mode="constant",
constant_values=0,
)
padded[: embeddings.shape[0]] = embeddings
else:
padded = np.zeros(
(
embeddings.shape[0] + padding_needed,
embeddings.shape[1],
),
dtype=embeddings.dtype,
padded = np.pad(
embeddings,
((0, padding_needed), (0, 0)),
mode="constant",
constant_values=0,
)
padded[: embeddings.shape[0], :] = embeddings
padded_embeddings.append(padded)
else:
padded_embeddings.append(embeddings)
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/systemds/scuro/representations/bow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
@register_representation(ModalityType.TEXT)
class BoW(UnimodalRepresentation):
def __init__(self, ngram_range=2, min_df=2, output_file=None):
parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
parameters = {"ngram_range": [2, 3, 5, 10], "min_df": [1, 2, 4, 8]}
super().__init__("BoW", ModalityType.EMBEDDING, parameters)
self.ngram_range = int(ngram_range)
self.min_df = int(min_df)
Expand Down
32 changes: 21 additions & 11 deletions src/main/python/systemds/scuro/representations/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from systemds.scuro.utils.torch_dataset import CustomDataset


@register_representation(ModalityType.VIDEO)
@register_representation([ModalityType.VIDEO, ModalityType.IMAGE])
class CLIPVisual(UnimodalRepresentation):
def __init__(self, output_file=None):
parameters = {}
Expand All @@ -46,8 +46,10 @@ def __init__(self, output_file=None):
self.output_file = output_file

def transform(self, modality):
transformed_modality = TransformedModality(modality, self)
self.data_type = numpy_dtype_to_torch_dtype(modality.data_type)
transformed_modality = TransformedModality(
modality, self, self.output_modality_type
)
self.data_type = torch.float32
if next(self.model.parameters()).dtype != self.data_type:
self.model = self.model.to(self.data_type)

Expand All @@ -60,14 +62,20 @@ def transform(self, modality):
return transformed_modality

def create_visual_embeddings(self, modality):
tf = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor()])

clip_transform = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.ConvertImageDtype(dtype=self.data_type),
]
)
dataset = CustomDataset(
modality.data,
self.data_type,
get_device(),
(modality.metadata[0]["width"], modality.metadata[0]["height"]),
tf=tf,
modality.data, self.data_type, get_device(), tf=clip_transform
)

embeddings = {}
for instance in torch.utils.data.DataLoader(dataset):
id = int(instance["id"][0])
Expand All @@ -94,7 +102,7 @@ def create_visual_embeddings(self, modality):
.cpu()
.float()
.numpy()
.astype(modality.data_type)
.astype(np.float32)
)

embeddings[id] = np.array(embeddings[id])
Expand All @@ -113,7 +121,9 @@ def __init__(self, output_file=None):
self.output_file = output_file

def transform(self, modality):
transformed_modality = TransformedModality(modality, self)
transformed_modality = TransformedModality(
modality, self, self.output_modality_type
)

embeddings = self.create_text_embeddings(modality.data, self.model)

Expand Down
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/representations/fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def transform_with_training(self, modalities: List[Modality], task):
transformed_data = np.zeros(
(len(modalities[0].data), transformed_train.shape[1])
)
transformed_data[task.train_indices] = transformed_train
transformed_data[task.test_indices] = transformed_other
transformed_data[fusion_train_indices] = transformed_train
transformed_data[all_other_indices] = transformed_other

return transformed_data

Expand Down
12 changes: 8 additions & 4 deletions src/main/python/systemds/scuro/representations/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,11 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)

X_tensor = torch.FloatTensor(X).to(device)
X_tensor = torch.FloatTensor(X)
if self.is_multilabel:
y_tensor = torch.FloatTensor(y).to(device)
y_tensor = torch.FloatTensor(y)
else:
y_tensor = torch.LongTensor(y).to(device)
y_tensor = torch.LongTensor(y)

dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
Expand All @@ -201,6 +201,8 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
for epoch in range(self.epochs):
total_loss = 0
for batch_X, batch_y in dataloader:
batch_X = batch_X.to(device)
batch_y = batch_y.to(device)
optimizer.zero_grad()

features, predictions = self.model(batch_X)
Expand Down Expand Up @@ -230,6 +232,7 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False
)
for (batch_X,) in inference_dataloader:
batch_X = batch_X.to(device)
features, _ = self.model(batch_X)
all_features.append(features.cpu())

Expand All @@ -244,14 +247,15 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
device = get_device()
self.model.to(device)

X_tensor = torch.FloatTensor(X).to(device)
X_tensor = torch.FloatTensor(X)
all_features = []
self.model.eval()
with torch.no_grad():
inference_dataloader = DataLoader(
TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False
)
for (batch_X,) in inference_dataloader:
batch_X = batch_X.to(device)
features, _ = self.model(batch_X)
all_features.append(features.cpu())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,12 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
)

for modality_name in inputs:
inputs[modality_name] = inputs[modality_name].to(device)
inputs[modality_name] = inputs[modality_name]

if self.is_multilabel:
labels_tensor = torch.from_numpy(y).float().to(device)
labels_tensor = torch.from_numpy(y).float()
else:
labels_tensor = torch.from_numpy(y).long().to(device)
labels_tensor = torch.from_numpy(y).long()

dataset_inputs = []
for i in range(len(y)):
Expand Down Expand Up @@ -199,9 +199,9 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
for modality_name in batch_inputs:
batch_inputs[modality_name] = torch.stack(
batch_inputs[modality_name]
)
).to(device)

batch_labels = torch.stack(batch_labels)
batch_labels = torch.stack(batch_labels).to(device)

optimizer.zero_grad()

Expand Down Expand Up @@ -250,7 +250,9 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):

batch_inputs = {}
for modality_name, tensor in inputs.items():
batch_inputs[modality_name] = tensor[batch_start:batch_end]
batch_inputs[modality_name] = tensor[batch_start:batch_end].to(
device
)

encoder_output = self.encoder(batch_inputs)
all_features.append(encoder_output["fused"].cpu())
Expand All @@ -266,9 +268,6 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
device = get_device()
self.encoder.to(device)

for modality_name in inputs:
inputs[modality_name] = inputs[modality_name].to(device)

self.encoder.eval()
all_features = []

Expand All @@ -281,7 +280,9 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:

batch_inputs = {}
for modality_name, tensor in inputs.items():
batch_inputs[modality_name] = tensor[batch_start:batch_end]
batch_inputs[modality_name] = tensor[batch_start:batch_end].to(
device
)

encoder_output = self.encoder(batch_inputs)
all_features.append(encoder_output["fused"].cpu())
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/systemds/scuro/representations/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def hook(
.cpu()
.float()
.numpy()
.astype(modality.data_type)
.astype(np.float32)
)

embeddings[video_id] = np.array(embeddings[video_id])
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/systemds/scuro/representations/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
@register_representation(ModalityType.TEXT)
class TfIdf(UnimodalRepresentation):
def __init__(self, min_df=2, output_file=None):
parameters = {"min_df": [min_df]}
parameters = {"min_df": [min_df, 4, 8]}
super().__init__("TF-IDF", ModalityType.EMBEDDING, parameters)
self.min_df = int(min_df)
self.output_file = output_file
Expand Down
Loading
Loading