This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 758e060e33 [SYSTEMDS-3701] Additional scuro data representations 758e060e33 is described below commit 758e060e33a637d649b12d0c5bdce2d5e5324b03 Author: Christina Dionysio <diony...@tu-berlin.de> AuthorDate: Tue Sep 24 09:38:56 2024 +0200 [SYSTEMDS-3701] Additional scuro data representations Closes #2111. --- src/main/python/systemds/scuro/__init__.py | 51 +++++++ .../python/systemds/scuro/aligner/dr_search.py | 97 +++++++----- src/main/python/systemds/scuro/aligner/task.py | 2 +- src/main/python/systemds/scuro/main.py | 24 +-- .../python/systemds/scuro/modality/__init__.py | 8 - .../systemds/scuro/modality/aligned_modality.py | 13 +- .../systemds/scuro/modality/audio_modality.py | 4 +- .../python/systemds/scuro/modality/modality.py | 2 +- .../systemds/scuro/modality/text_modality.py | 4 +- .../systemds/scuro/modality/video_modality.py | 4 +- src/main/python/systemds/scuro/models/__init__.py | 5 - .../python/systemds/scuro/models/discrete_model.py | 2 +- .../systemds/scuro/representations/__init__.py | 18 --- .../systemds/scuro/representations/average.py | 15 +- .../python/systemds/scuro/representations/bert.py | 96 ++++++++++++ .../scuro/representations/concatenation.py | 24 +-- .../systemds/scuro/representations/fusion.py | 24 +-- .../python/systemds/scuro/representations/lstm.py | 4 +- .../python/systemds/scuro/representations/max.py | 72 +++++++++ .../scuro/representations/mel_spectrogram.py | 66 ++++++++ .../{average.py => multiplication.py} | 32 ++-- .../systemds/scuro/representations/resnet.py | 168 +++++++++++++++++++++ .../representations/{average.py => rowmax.py} | 51 +++++-- .../scuro/representations/{average.py => sum.py} | 28 ++-- .../systemds/scuro/representations/unimodal.py | 66 +------- .../representations/{unimodal.py => utils.py} | 103 ++++++------- .../RewriteMatrixMultChainOptSparseTest.java | 20 ++- 27 files changed, 702 insertions(+), 301 deletions(-) diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py index e66abb4646..1ef36539f0 100644 --- a/src/main/python/systemds/scuro/__init__.py +++ b/src/main/python/systemds/scuro/__init__.py @@ -18,3 +18,54 @@ # under the License. # # ------------------------------------------------------------- +from systemds.scuro.representations.representation import Representation +from systemds.scuro.representations.average import Average +from systemds.scuro.representations.concatenation import Concatenation +from systemds.scuro.representations.fusion import Fusion +from systemds.scuro.representations.sum import Sum +from systemds.scuro.representations.max import RowMax +from systemds.scuro.representations.multiplication import Multiplication +from systemds.scuro.representations.mel_spectrogram import MelSpectrogram +from systemds.scuro.representations.resnet import ResNet +from systemds.scuro.representations.bert import Bert +from systemds.scuro.representations.unimodal import UnimodalRepresentation +from systemds.scuro.representations.lstm import LSTM +from systemds.scuro.representations.utils import NPY, Pickle, HDF5, JSON +from systemds.scuro.models.model import Model +from systemds.scuro.models.discrete_model import DiscreteModel +from systemds.scuro.modality.aligned_modality import AlignedModality +from systemds.scuro.modality.audio_modality import AudioModality +from systemds.scuro.modality.video_modality import VideoModality +from systemds.scuro.modality.text_modality import TextModality +from systemds.scuro.modality.modality import Modality +from systemds.scuro.aligner.dr_search import DRSearch +from systemds.scuro.aligner.task import Task + + +__all__ = ["Representation", + "Average", + "Concatenation", + "Fusion", + "Sum", + "RowMax", + "Multiplication", + "MelSpectrogram", + "ResNet", + "Bert", + "UnimodalRepresentation", + "LSTM", + "NPY", + "Pickle", + "HDF5", + "JSON", + "Model", + "DiscreteModel", + "AlignedModality", + "AudioModality", + "VideoModality", + "TextModality", + "Modality", + "DRSearch", + "Task" + ] + diff --git a/src/main/python/systemds/scuro/aligner/dr_search.py b/src/main/python/systemds/scuro/aligner/dr_search.py index 4bdc7da4a2..b2a92ab75b 100644 --- a/src/main/python/systemds/scuro/aligner/dr_search.py +++ b/src/main/python/systemds/scuro/aligner/dr_search.py @@ -19,19 +19,24 @@ # # ------------------------------------------------------------- import itertools +import random from typing import List -from aligner.task import Task -from modality.aligned_modality import AlignedModality -from modality.modality import Modality -from representations.representation import Representation +from systemds.scuro.aligner.task import Task +from systemds.scuro.modality.aligned_modality import AlignedModality +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.representation import Representation + +import warnings + +warnings.filterwarnings('ignore') def get_modalities_by_name(modalities, name): for modality in modalities: if modality.name == name: return modality - + raise 'Modality ' + name + 'not in modalities' @@ -51,9 +56,9 @@ class DRSearch: self.best_modalities = None self.best_representation = None self.best_score = -1 - + def set_best_params(self, modality_name: str, representation: Representation, - score: float, modality_names: List[str]): + scores: List[float], modality_names: List[str]): """ Updates the best parameters for given modalities, representation, and score :param modality_name: The name of the aligned modality @@ -62,43 +67,66 @@ class DRSearch: :param modality_names: List of modality names used in this setting :return: """ - + # check if modality name is already in dictionary if modality_name not in self.scores.keys(): # if not add it to dictionary self.scores[modality_name] = {} - + # set score for representation - self.scores[modality_name][representation] = score - + self.scores[modality_name][representation] = scores + # compare current score with best score - if score > self.best_score: - self.best_score = score + if scores[1] > self.best_score: + self.best_score = scores[1] self.best_representation = representation self.best_modalities = modality_names - - def fit(self): + + def reset_best_params(self): + self.best_score = -1 + self.best_modalities = None + self.best_representation = None + self.scores = {} + + def fit_random(self, seed=-1): + """ + This method randomly selects a modality or combination of modalities and representation + """ + if seed != -1: + random.seed(seed) + + modalities = [] + for M in range(1, len(self.modalities) + 1): + for combination in itertools.combinations(self.modalities, M): + modalities.append(combination) + + modality_combination = random.choice(modalities) + representation = random.choice(self.representations) + + modality = AlignedModality(representation, list(modality_combination)) # noqa + modality.combine() + + scores = self.task.run(modality.data) + self.set_best_params(modality.name, representation, scores, modality.get_modality_names()) + + return self.best_representation, self.best_score, self.best_modalities + + def fit_enumerate_all(self): """ This method finds the best representation out of a given List of uni-modal modalities and representations :return: The best parameters found in the search procedure """ - + for M in range(1, len(self.modalities) + 1): for combination in itertools.combinations(self.modalities, M): - if len(combination) == 1: - modality = combination[0] - score = self.task.run(modality.representation.scale_data(modality.data, self.task.train_indices)) - self.set_best_params(modality.name, modality.representation.name, score, [modality.name]) - self.scores[modality] = score - else: - for representation in self.representations: - modality = AlignedModality(representation, list(combination)) # noqa - modality.combine(self.task.train_indices) - - score = self.task.run(modality.data) - self.set_best_params(modality.name, representation, score, modality.get_modality_names()) - + for representation in self.representations: + modality = AlignedModality(representation, list(combination)) # noqa + modality.combine() + + scores = self.task.run(modality.data) + self.set_best_params(modality.name, representation, scores, modality.get_modality_names()) + return self.best_representation, self.best_score, self.best_modalities def transform(self, modalities: List[Modality]): @@ -108,17 +136,16 @@ class DRSearch: :param modalities: List of uni-modal modalities :return: aligned data """ - + if self.best_score == -1: raise 'Please fit representations first!' - + used_modalities = [] - + for modality_name in self.best_modalities: used_modalities.append(get_modalities_by_name(modalities, modality_name)) - + modality = AlignedModality(self.best_representation, used_modalities) # noqa modality.combine(self.task.train_indices) - + return modality.data - \ No newline at end of file diff --git a/src/main/python/systemds/scuro/aligner/task.py b/src/main/python/systemds/scuro/aligner/task.py index 79f9690e65..efaafce32d 100644 --- a/src/main/python/systemds/scuro/aligner/task.py +++ b/src/main/python/systemds/scuro/aligner/task.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- from typing import List -from models.model import Model +from systemds.scuro.models.model import Model class Task: diff --git a/src/main/python/systemds/scuro/main.py b/src/main/python/systemds/scuro/main.py index 22477eb549..0648972fd8 100644 --- a/src/main/python/systemds/scuro/main.py +++ b/src/main/python/systemds/scuro/main.py @@ -22,16 +22,16 @@ import collections import json from datetime import datetime -from representations.average import Averaging -from representations.concatenation import Concatenation -from modality.aligned_modality import AlignedModality -from modality.text_modality import TextModality -from modality.video_modality import VideoModality -from modality.audio_modality import AudioModality -from representations.unimodal import Pickle, JSON, HDF5, NPY -from models.discrete_model import DiscreteModel -from aligner.task import Task -from aligner.dr_search import DRSearch +from systemds.scuro.representations.average import Average +from systemds.scuro.representations.concatenation import Concatenation +from systemds.scuro.modality.aligned_modality import AlignedModality +from systemds.scuro.modality.text_modality import TextModality +from systemds.scuro.modality.video_modality import VideoModality +from systemds.scuro.modality.audio_modality import AudioModality +from systemds.scuro.representations.unimodal import Pickle, JSON, HDF5, NPY +from systemds.scuro.models.discrete_model import DiscreteModel +from systemds.scuro.aligner.task import Task +from systemds.scuro.aligner.dr_search import DRSearch class CustomTask(Task): @@ -66,8 +66,8 @@ modalities = [text, audio, video] model = DiscreteModel() custom_task = CustomTask(model, labels, train_indices, val_indices) -representations = [Concatenation(), Averaging()] +representations = [Concatenation(), Average()] dr_search = DRSearch(modalities, custom_task, representations) -best_representation, best_score, best_modalities = dr_search.fit() +best_representation, best_score, best_modalities = dr_search.fit_random() aligned_representation = dr_search.transform(modalities) diff --git a/src/main/python/systemds/scuro/modality/__init__.py b/src/main/python/systemds/scuro/modality/__init__.py index d09f468da2..e66abb4646 100644 --- a/src/main/python/systemds/scuro/modality/__init__.py +++ b/src/main/python/systemds/scuro/modality/__init__.py @@ -18,11 +18,3 @@ # under the License. # # ------------------------------------------------------------- -from systemds.scuro.modality.aligned_modality import AlignedModality -from systemds.scuro.modality.audio_modality import AudioModality -from systemds.scuro.modality.video_modality import VideoModality -from systemds.scuro.modality.test_modality import TextModality -from systemds.scuro.modality.modality import Modality - - -__all__ = ["AlignedModality", "AudioModality", "VideoModality", "TextModality", "Modality"] \ No newline at end of file diff --git a/src/main/python/systemds/scuro/modality/aligned_modality.py b/src/main/python/systemds/scuro/modality/aligned_modality.py index d4d20b962c..7950ec1919 100644 --- a/src/main/python/systemds/scuro/modality/aligned_modality.py +++ b/src/main/python/systemds/scuro/modality/aligned_modality.py @@ -20,8 +20,8 @@ # ------------------------------------------------------------- from typing import List -from modality.modality import Modality -from representations.fusion import Fusion +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.fusion import Fusion class AlignedModality(Modality): @@ -36,9 +36,16 @@ class AlignedModality(Modality): name += modality.name super().__init__(representation, modality_name=name) self.modalities = modalities - + def combine(self): """ Initiates the call to fuse the given modalities depending on the Fusion type """ self.data = self.representation.fuse(self.modalities) # noqa + + def get_modality_names(self): + names = [] + for modality in self.modalities: + names.append(modality.name) + + return names \ No newline at end of file diff --git a/src/main/python/systemds/scuro/modality/audio_modality.py b/src/main/python/systemds/scuro/modality/audio_modality.py index 01c71ad1e0..570faaad77 100644 --- a/src/main/python/systemds/scuro/modality/audio_modality.py +++ b/src/main/python/systemds/scuro/modality/audio_modality.py @@ -20,8 +20,8 @@ # ------------------------------------------------------------- import os -from modality.modality import Modality -from representations.unimodal import UnimodalRepresentation +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.unimodal import UnimodalRepresentation class AudioModality(Modality): diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index c7fe7cff8b..b15321be40 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -19,7 +19,7 @@ # # ------------------------------------------------------------- -from representations.representation import Representation +from systemds.scuro.representations.representation import Representation class Modality: diff --git a/src/main/python/systemds/scuro/modality/text_modality.py b/src/main/python/systemds/scuro/modality/text_modality.py index 71f384626d..ab6d7f0547 100644 --- a/src/main/python/systemds/scuro/modality/text_modality.py +++ b/src/main/python/systemds/scuro/modality/text_modality.py @@ -20,8 +20,8 @@ # ------------------------------------------------------------- import os -from modality.modality import Modality -from representations.unimodal import UnimodalRepresentation +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.unimodal import UnimodalRepresentation class TextModality(Modality): diff --git a/src/main/python/systemds/scuro/modality/video_modality.py b/src/main/python/systemds/scuro/modality/video_modality.py index 8062c26a89..110a13ffca 100644 --- a/src/main/python/systemds/scuro/modality/video_modality.py +++ b/src/main/python/systemds/scuro/modality/video_modality.py @@ -20,8 +20,8 @@ # ------------------------------------------------------------- import os -from modality.modality import Modality -from representations.unimodal import UnimodalRepresentation +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.unimodal import UnimodalRepresentation class VideoModality(Modality): diff --git a/src/main/python/systemds/scuro/models/__init__.py b/src/main/python/systemds/scuro/models/__init__.py index d7c003fb48..e66abb4646 100644 --- a/src/main/python/systemds/scuro/models/__init__.py +++ b/src/main/python/systemds/scuro/models/__init__.py @@ -18,8 +18,3 @@ # under the License. # # ------------------------------------------------------------- -from systemds.scuro.models import Model -from systemds.scuro.discrete_model import DiscreteModel - - -__all__ = ["Model", "DiscreteModel"] \ No newline at end of file diff --git a/src/main/python/systemds/scuro/models/discrete_model.py b/src/main/python/systemds/scuro/models/discrete_model.py index 994f0882e5..288643e5d8 100644 --- a/src/main/python/systemds/scuro/models/discrete_model.py +++ b/src/main/python/systemds/scuro/models/discrete_model.py @@ -18,7 +18,7 @@ # under the License. # # ------------------------------------------------------------- -from models.model import Model +from systemds.scuro.models.model import Model class DiscreteModel(Model): diff --git a/src/main/python/systemds/scuro/representations/__init__.py b/src/main/python/systemds/scuro/representations/__init__.py index 9a2007319d..e66abb4646 100644 --- a/src/main/python/systemds/scuro/representations/__init__.py +++ b/src/main/python/systemds/scuro/representations/__init__.py @@ -18,21 +18,3 @@ # under the License. # # ------------------------------------------------------------- -from systemds.scuro.representations.representation import Representation -from systemds.scuro.representations.average import Average -from systemds.scuro.representations.concatenation import Concatenation -from systemds.scuro.representations.fusion import Fusion -from systemds.scuro.representations.unimodal import UnimodalRepresentation, HDF5, NPY, Pickle, JSON -from systemds.scuro.representations.lstm import LSTM - - -__all__ = ["Representation", - "Average", - "Concatenation", - "Fusion", - "UnimodalRepresentation", - "HDF5", - "NPY", - "Pickle", - "JSON", - "LSTM"] diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/average.py index 77896b1914..11ce431566 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/average.py @@ -23,18 +23,18 @@ from typing import List import numpy as np -from modality.modality import Modality +from systemds.scuro.modality.modality import Modality from keras.api.preprocessing.sequence import pad_sequences -from representations.fusion import Fusion +from systemds.scuro.representations.fusion import Fusion -class Averaging(Fusion): +class Average(Fusion): def __init__(self): """ Combines modalities using averaging """ - super().__init__('Averaging') + super().__init__('Average') def fuse(self, modalities: List[Modality]): max_emb_size = self.get_max_embedding_size(modalities) @@ -43,12 +43,11 @@ class Averaging(Fusion): for modality in modalities: d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post') padded_modalities.append(d) - + data = padded_modalities[0] for i in range(1, len(modalities)): data += padded_modalities[i] - - data = self.scale_data(data, modalities[0].train_indices) + data /= len(modalities) - + return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/bert.py b/src/main/python/systemds/scuro/representations/bert.py new file mode 100644 index 0000000000..365b39c322 --- /dev/null +++ b/src/main/python/systemds/scuro/representations/bert.py @@ -0,0 +1,96 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import pickle + +import numpy as np + +from systemds.scuro.representations.unimodal import UnimodalRepresentation +import torch +from transformers import BertTokenizer, BertModel +import os + + +def read_text_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read() + return text + + +class Bert(UnimodalRepresentation): + def __init__(self, avg_layers=None, output_file=None): + super().__init__('Bert') + + self.avg_layers = avg_layers + self.output_file = output_file + + def parse_all(self, filepath, indices, get_sequences=False): + # Assumes text is stored in .txt files + data = [] + if os.path.isdir(filepath): + for filename in os.listdir(filepath): + f = os.path.join(filepath, filename) + if os.path.isfile(f): + with open(f, 'r') as file: + data.append(file.readlines()[0]) + else: + with open(filepath, 'r') as file: + data = file.readlines() + + model_name = 'bert-base-uncased' + tokenizer = BertTokenizer.from_pretrained(model_name) + + if self.avg_layers is not None: + model = BertModel.from_pretrained(model_name, output_hidden_states=True) + else: + model = BertModel.from_pretrained(model_name) + + embeddings = self.create_embeddings(data, model, tokenizer) + + if self.output_file is not None: + data = {} + for i in range(0, embeddings.shape[0]): + data[indices[i]] = embeddings[i] + self.save_embeddings(data) + + return embeddings + + def create_embeddings(self, data, model, tokenizer): + embeddings = [] + for d in data: + inputs = tokenizer(d, return_tensors="pt", padding=True, truncation=True) + + with torch.no_grad(): + outputs = model(**inputs) + + if self.avg_layers is not None: + cls_embedding = [outputs.hidden_states[i][:, 0, :] for i in range(-self.avg_layers, 0)] + cls_embedding = torch.mean(torch.stack(cls_embedding), dim=0) + else: + cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy() + embeddings.append(cls_embedding) + + embeddings = np.array(embeddings) + return embeddings.reshape((embeddings.shape[0], embeddings.shape[-1])) + + def save_embeddings(self, data): + with open(self.output_file, 'wb') as file: + pickle.dump(data, file) diff --git a/src/main/python/systemds/scuro/representations/concatenation.py b/src/main/python/systemds/scuro/representations/concatenation.py index 149e1f8801..81b6fe33fc 100644 --- a/src/main/python/systemds/scuro/representations/concatenation.py +++ b/src/main/python/systemds/scuro/representations/concatenation.py @@ -23,10 +23,10 @@ from typing import List import numpy as np -from modality.modality import Modality +from systemds.scuro.modality.modality import Modality from keras.api.preprocessing.sequence import pad_sequences -from representations.fusion import Fusion +from systemds.scuro.representations.fusion import Fusion class Concatenation(Fusion): @@ -38,15 +38,21 @@ class Concatenation(Fusion): self.padding = padding def fuse(self, modalities: List[Modality]): + if len(modalities) == 1: + return np.array(modalities[0].data) + max_emb_size = self.get_max_embedding_size(modalities) - size = len(modalities[0].data) - data = np.zeros((size, 0)) - + + if modalities[0].data.ndim > 2: + data = np.zeros((size, max_emb_size, 0)) + else: + data = np.zeros((size, 0)) + for modality in modalities: if self.padding: - data = np.concatenate(pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post'), axis=1) + data = np.concatenate([data, pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post')], axis=-1) else: - data = np.concatenate([data, modality.data], axis=1) - - return self.scale_data(data, modalities[0].train_indices) + data = np.concatenate([data, modality.data], axis=-1) + + return np.array(data) \ No newline at end of file diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py index 4e242137f1..04e9ebbb64 100644 --- a/src/main/python/systemds/scuro/representations/fusion.py +++ b/src/main/python/systemds/scuro/representations/fusion.py @@ -22,8 +22,8 @@ from typing import List from sklearn.preprocessing import StandardScaler -from modality.modality import Modality -from representations.representation import Representation +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.representation import Representation class Fusion(Representation): @@ -33,7 +33,7 @@ class Fusion(Representation): :param name: Name of the fusion type """ super().__init__(name) - + def fuse(self, modalities: List[Modality]): """ Implemented for every child class and creates a fused representation out of @@ -42,7 +42,7 @@ class Fusion(Representation): :return: fused data """ raise f'Not implemented for Fusion: {self.name}' - + def get_max_embedding_size(self, modalities: List[Modality]): """ Computes the maximum embedding size from a given list of modalities @@ -56,17 +56,5 @@ class Fusion(Representation): raise f'Modality sizes don\'t match!' elif curr_shape[1] > max_size: max_size = curr_shape[1] - - return max_size - - def scale_data(self, data, train_indices): - """ - Scales the data using the StandardScaler. - The scaler is fit on the training data before performing the scaling on the whole data array - :param data: data to be scaled - :param train_indices: - :return: scaled data - """ - scaler = StandardScaler() - scaler.fit(data[train_indices]) - return scaler.transform(data) + + return max_size \ No newline at end of file diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py index a38ca1e577..dcdd9b65c1 100644 --- a/src/main/python/systemds/scuro/representations/lstm.py +++ b/src/main/python/systemds/scuro/representations/lstm.py @@ -25,8 +25,8 @@ from typing import List import numpy as np -from modality.modality import Modality -from representations.fusion import Fusion +from systemds.scuro.modality.modality import Modality +from systemds.scuro.representations.fusion import Fusion class LSTM(Fusion): diff --git a/src/main/python/systemds/scuro/representations/max.py b/src/main/python/systemds/scuro/representations/max.py new file mode 100644 index 0000000000..2f58581cb8 --- /dev/null +++ b/src/main/python/systemds/scuro/representations/max.py @@ -0,0 +1,72 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +import itertools +from typing import List + +import numpy as np + +from systemds.scuro.modality.modality import Modality +from keras.preprocessing.sequence import pad_sequences + +from systemds.scuro.representations.fusion import Fusion + + +class RowMax(Fusion): + def __init__(self, split=4): + """ + Combines modalities by computing the outer product of a modality combination and + taking the row max + """ + super().__init__('RowMax') + self.split = split + + def fuse(self, modalities: List[Modality],): + if len(modalities) < 2: + return np.array(modalities[0].data) + + max_emb_size = self.get_max_embedding_size(modalities) + + padded_modalities = [] + for modality in modalities: + d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post') + padded_modalities.append(d) + + split_rows = int(len(modalities[0].data) / self.split) + + data = [] + + for combination in itertools.combinations(padded_modalities, 2): + combined = None + for i in range(0, self.split): + start = split_rows * i + end = split_rows * (i + 1) if i < (self.split - 1) else len(modalities[0].data) + m = np.einsum('bi,bo->bio', combination[0][start:end], combination[1][start:end]) + m = m.max(axis=2) + if combined is None: + combined = m + else: + combined = np.concatenate((combined, m), axis=0) + data.append(combined) + + data = np.stack(data) + data = data.max(axis=0) + + return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py b/src/main/python/systemds/scuro/representations/mel_spectrogram.py new file mode 100644 index 0000000000..395b2977a0 --- /dev/null +++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py @@ -0,0 +1,66 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import os +import pickle + +import librosa +import numpy as np +from keras.src.utils import pad_sequences + +from systemds.scuro.representations.unimodal import UnimodalRepresentation + + +class MelSpectrogram(UnimodalRepresentation): + def __init__(self, avg=True, output_file=None): + super().__init__('MelSpectrogram') + self.avg = avg + self.output_file = output_file + + def parse_all(self, file_path, indices, get_sequences=False): + result = [] + max_length = 0 + if os.path.isdir(file_path): + for filename in os.listdir(file_path): + f = os.path.join(file_path, filename) + if os.path.isfile(f): + y, sr = librosa.load(f) + S = librosa.feature.melspectrogram(y=y, sr=sr) + S_dB = librosa.power_to_db(S, ref=np.max) + if S_dB.shape[-1] > max_length: + max_length = S_dB.shape[-1] + result.append(S_dB) + + r = [] + for elem in result: + d = pad_sequences(elem, maxlen=max_length, dtype='float32', padding='post') + r.append(d) + + np_array_r = np.array(r) if not self.avg else np.mean(np.array(r), axis=1) + + if self.output_file is not None: + data = {} + for i in range(0, np_array_r.shape[0]): + data[indices[i]] = np_array_r[i] + with open(self.output_file, 'wb') as file: + pickle.dump(data, file) + + return np_array_r diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/multiplication.py similarity index 59% copy from src/main/python/systemds/scuro/representations/average.py copy to src/main/python/systemds/scuro/representations/multiplication.py index 77896b1914..2b3ae64eac 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/multiplication.py @@ -23,32 +23,26 @@ from typing import List import numpy as np -from modality.modality import Modality -from keras.api.preprocessing.sequence import pad_sequences +from systemds.scuro.modality.modality import Modality +from keras.preprocessing.sequence import pad_sequences -from representations.fusion import Fusion +from systemds.scuro.representations.fusion import Fusion -class Averaging(Fusion): +class Multiplication(Fusion): def __init__(self): """ - Combines modalities using averaging + Combines modalities using elementwise multiply """ - super().__init__('Averaging') + super().__init__('Multiplication') - def fuse(self, modalities: List[Modality]): + def fuse(self, modalities: List[Modality], train_indices=None): max_emb_size = self.get_max_embedding_size(modalities) - - padded_modalities = [] - for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post') - padded_modalities.append(d) - - data = padded_modalities[0] - for i in range(1, len(modalities)): - data += padded_modalities[i] - data = self.scale_data(data, modalities[0].train_indices) - data /= len(modalities) + data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32', padding='post') + + for m in range(1, len(modalities)): + # scaled = self.scale_data(modalities[m].data, train_indices) + data = np.multiply(data, pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32', padding='post')) - return np.array(data) + return data diff --git a/src/main/python/systemds/scuro/representations/resnet.py b/src/main/python/systemds/scuro/representations/resnet.py new file mode 100644 index 0000000000..52802288de --- /dev/null +++ b/src/main/python/systemds/scuro/representations/resnet.py @@ -0,0 +1,168 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + + +import h5py + +from systemds.scuro.representations.unimodal import UnimodalRepresentation +from typing import Callable, Dict, Tuple, Any +import torch.utils.data +import os +import cv2 +import torch +import torchvision.models as models +import torchvision.transforms as transforms +import numpy as np + +DEVICE = 'cpu' + + +class ResNet(UnimodalRepresentation): + def __init__(self, output_file=None): + super().__init__('ResNet') + + self.output_file = output_file + + def parse_all(self, file_path, indices, get_sequences=False): + resnet = models.resnet152(weights=models.ResNet152_Weights.DEFAULT) + resnet.eval() + + for param in resnet.parameters(): + param.requires_grad = False + + transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + dataset = ResNetDataset(transform=transform, video_folder_path=file_path) + embeddings = {} + + class Identity(torch.nn.Module): + def forward(self, input_: torch.Tensor) -> torch.Tensor: + return input_ + + resnet.fc = Identity() + + res5c_output = None + + def avg_pool_hook(_module: torch.nn.Module, input_: Tuple[torch.Tensor], _output: Any) -> None: + nonlocal res5c_output + res5c_output = input_[0] + + resnet.avgpool.register_forward_hook(avg_pool_hook) + + for instance in torch.utils.data.DataLoader(dataset): + video_id = instance["id"][0] + frames = instance["frames"][0].to(DEVICE) + embeddings[video_id] = torch.empty((len(frames), 2048)) + batch_size = 32 + for start_index in range(0, len(frames), batch_size): + end_index = min(start_index + batch_size, len(frames)) + frame_ids_range = range(start_index, end_index) + frame_batch = frames[frame_ids_range] + + avg_pool_value = resnet(frame_batch) + + embeddings[video_id][frame_ids_range] = avg_pool_value.to(DEVICE) + + if self.output_file is not None: + with h5py.File(self.output_file, 'w') as hdf: + for key, value in embeddings.items(): + hdf.create_dataset(key, data=value) + + emb = np.zeros((len(indices), 2048), dtype='float32') + if indices is not None: + for i in indices: + emb[i] = embeddings.get(str(i)).mean(dim=0).numpy() + else: + for i, key in enumerate(embeddings.keys()): + emb[i] = embeddings.get(key).mean(dim=0).numpy() + + return emb + + @staticmethod + def extract_features_from_video(video_path, model, transform): + cap = cv2.VideoCapture(video_path) + features = [] + count = 0 + success, frame = cap.read() + + while success: + success, frame = cap.read() + transformed_frame = transform(frame).unsqueeze(0) + + with torch.no_grad(): + feature_vector = model(transformed_frame) + feature_vector = feature_vector.view(-1).numpy() + + features.append(feature_vector) + + count += 1 + + cap.release() + return features, count + + +class ResNetDataset(torch.utils.data.Dataset): + def __init__(self, video_folder_path: str, transform: Callable = None): + self.video_folder_path = video_folder_path + self.transform = transform + self.video_ids = [] + video_files = [f for f in os.listdir(self.video_folder_path) if + f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv'))] + self.file_extension = video_files[0].split('.')[-1] + + for video in video_files: + video_id, _ = video.split('/')[-1].split('.') + self.video_ids.append(video_id) + + self.frame_count_by_video_id = {video_id: 0 for video_id in self.video_ids} + + def __getitem__(self, index) -> Dict[str, object]: + video_id = self.video_ids[index] + video_path = self.video_folder_path + '/' + video_id + '.' + self.file_extension + + frames = None + count = 0 + + cap = cv2.VideoCapture(video_path) + + success, frame = cap.read() + + num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + self.frame_count_by_video_id[video_id] = num_frames + if frames is None and success: + frames = torch.empty((num_frames, 3, 224, 224)) + + while success: + frame = self.transform(frame) + frames[count] = frame # noqa + success, frame = cap.read() + count += 1 + + cap.release() + return {"id": video_id, "frames": frames} + + def __len__(self) -> int: + return len(self.video_ids) diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/rowmax.py similarity index 50% copy from src/main/python/systemds/scuro/representations/average.py copy to src/main/python/systemds/scuro/representations/rowmax.py index 77896b1914..c4184687a1 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/rowmax.py @@ -18,7 +18,7 @@ # under the License. # # ------------------------------------------------------------- - +import itertools from typing import List import numpy as np @@ -29,26 +29,45 @@ from keras.api.preprocessing.sequence import pad_sequences from representations.fusion import Fusion -class Averaging(Fusion): - def __init__(self): +class RowMax(Fusion): + def __init__(self, split=1): """ - Combines modalities using averaging + Combines modalities by computing the outer product of a modality combination and + taking the row max """ - super().__init__('Averaging') - - def fuse(self, modalities: List[Modality]): + super().__init__('RowMax') + self.split = split + + def fuse(self, modalities: List[Modality], train_indices): + if len(modalities) < 2: + return np.array(modalities) + max_emb_size = self.get_max_embedding_size(modalities) padded_modalities = [] for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post') + scaled = self.scale_data(modality.data, train_indices) + d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32', padding='post') padded_modalities.append(d) - - data = padded_modalities[0] - for i in range(1, len(modalities)): - data += padded_modalities[i] - - data = self.scale_data(data, modalities[0].train_indices) - data /= len(modalities) - + + split_rows = int(len(modalities[0].data) / self.split) + + data = [] + + for combination in itertools.combinations(padded_modalities, 2): + combined = None + for i in range(0, self.split): + start = split_rows * i + end = split_rows * (i + 1) if i < (self.split - 1) else len(modalities[0].data) + m = np.einsum('bi,bo->bio', combination[0][start:end], combination[1][start:end]) + m = m.max(axis=2) + if combined is None: + combined = m + else: + combined = np.concatenate((combined, m), axis=0) + data.append(combined) + + data = np.stack(data) + data = data.max(axis=0) + return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/sum.py similarity index 60% copy from src/main/python/systemds/scuro/representations/average.py copy to src/main/python/systemds/scuro/representations/sum.py index 77896b1914..9c75606627 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/sum.py @@ -21,34 +21,26 @@ from typing import List -import numpy as np -from modality.modality import Modality -from keras.api.preprocessing.sequence import pad_sequences +from systemds.scuro.modality.modality import Modality +from keras.preprocessing.sequence import pad_sequences -from representations.fusion import Fusion +from systemds.scuro.representations.fusion import Fusion -class Averaging(Fusion): +class Sum(Fusion): def __init__(self): """ - Combines modalities using averaging + Combines modalities using colum-wise sum """ - super().__init__('Averaging') + super().__init__('Sum') def fuse(self, modalities: List[Modality]): max_emb_size = self.get_max_embedding_size(modalities) - padded_modalities = [] - for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post') - padded_modalities.append(d) - - data = padded_modalities[0] - for i in range(1, len(modalities)): - data += padded_modalities[i] + data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32', padding='post') - data = self.scale_data(data, modalities[0].train_indices) - data /= len(modalities) + for m in range(1, len(modalities)): + data += pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32', padding='post') - return np.array(data) + return data diff --git a/src/main/python/systemds/scuro/representations/unimodal.py b/src/main/python/systemds/scuro/representations/unimodal.py index 659ad32468..da0e721a57 100644 --- a/src/main/python/systemds/scuro/representations/unimodal.py +++ b/src/main/python/systemds/scuro/representations/unimodal.py @@ -18,13 +18,7 @@ # under the License. # # ------------------------------------------------------------- -import json -import pickle - -import h5py -import numpy as np - -from representations.representation import Representation +from systemds.scuro.representations.representation import Representation class UnimodalRepresentation(Representation): @@ -42,61 +36,3 @@ class UnimodalRepresentation(Representation): class PixelRepresentation(UnimodalRepresentation): def __init__(self): super().__init__('Pixel') - - -class ResNet(UnimodalRepresentation): - def __init__(self): - super().__init__('ResNet') - - -class Pickle(UnimodalRepresentation): - def __init__(self): - super().__init__('Pickle') - - def parse_all(self, filepath, indices): - with open(filepath, "rb") as file: - data = pickle.load(file, encoding='latin1') - - if indices is not None: - for n, idx in enumerate(indices): - result = np.empty((len(data), np.mean(data[idx][()], axis=1).shape[0])) - break - for n, idx in enumerate(indices): - result[n] = np.mean(data[idx], axis=1) - return result - else: - return np.array([np.mean(data[index], axis=1) for index in data]) - - -class JSON(UnimodalRepresentation): - def __init__(self): - super().__init__('JSON') - - def parse_all(self, filepath, indices): - with open(filepath) as file: - return json.load(file) - - -class NPY(UnimodalRepresentation): - def __init__(self): - super().__init__('NPY') - - def parse_all(self, filepath, indices): - data = np.load(filepath) - - if indices is not None: - return np.array([data[n, 0] for n, index in enumerate(indices)]) - else: - return np.array([data[index, 0] for index in data]) - - -class HDF5(UnimodalRepresentation): - def __init__(self): - super().__init__('HDF5') - - def parse_all(self, filepath, indices=None): - data = h5py.File(filepath) - if indices is not None: - return np.array([np.mean(data[index][()], axis=0) for index in indices]) - else: - return np.array([np.mean(data[index][()], axis=0) for index in data]) diff --git a/src/main/python/systemds/scuro/representations/unimodal.py b/src/main/python/systemds/scuro/representations/utils.py similarity index 50% copy from src/main/python/systemds/scuro/representations/unimodal.py copy to src/main/python/systemds/scuro/representations/utils.py index 659ad32468..d611cd9c71 100644 --- a/src/main/python/systemds/scuro/representations/unimodal.py +++ b/src/main/python/systemds/scuro/representations/utils.py @@ -18,85 +18,78 @@ # under the License. # # ------------------------------------------------------------- + + import json import pickle import h5py import numpy as np -from representations.representation import Representation - - -class UnimodalRepresentation(Representation): - def __init__(self, name): - """ - Parent class for all unimodal representation types - :param name: name of the representation - """ - super().__init__(name) - - def parse_all(self, file_path, indices): - raise f'Not implemented for {self.name}' - - -class PixelRepresentation(UnimodalRepresentation): - def __init__(self): - super().__init__('Pixel') - - -class ResNet(UnimodalRepresentation): - def __init__(self): - super().__init__('ResNet') +from systemds.scuro.representations.unimodal import UnimodalRepresentation -class Pickle(UnimodalRepresentation): +class NPY(UnimodalRepresentation): def __init__(self): - super().__init__('Pickle') + super().__init__('NPY') - def parse_all(self, filepath, indices): - with open(filepath, "rb") as file: - data = pickle.load(file, encoding='latin1') + def parse_all(self, filepath, indices, get_sequences=False): + data = np.load(filepath, allow_pickle=True) if indices is not None: - for n, idx in enumerate(indices): - result = np.empty((len(data), np.mean(data[idx][()], axis=1).shape[0])) - break - for n, idx in enumerate(indices): - result[n] = np.mean(data[idx], axis=1) - return result + return np.array([data[index] for index in indices]) else: - return np.array([np.mean(data[index], axis=1) for index in data]) + return np.array([data[index] for index in data]) -class JSON(UnimodalRepresentation): +class Pickle(UnimodalRepresentation): def __init__(self): - super().__init__('JSON') + super().__init__('Pickle') - def parse_all(self, filepath, indices): - with open(filepath) as file: - return json.load(file) + def parse_all(self, file_path, indices, get_sequences=False): + with open(file_path, 'rb') as f: + data = pickle.load(f) + + embeddings = [] + for n, idx in enumerate(indices): + embeddings.append(data[idx]) + + return np.array(embeddings) -class NPY(UnimodalRepresentation): +class HDF5(UnimodalRepresentation): def __init__(self): - super().__init__('NPY') + super().__init__('HDF5') - def parse_all(self, filepath, indices): - data = np.load(filepath) + def parse_all(self, filepath, indices=None, get_sequences=False): + data = h5py.File(filepath) - if indices is not None: - return np.array([data[n, 0] for n, index in enumerate(indices)]) + if get_sequences: + max_emb = 0 + for index in indices: + if max_emb < len(data[index][()]): + max_emb = len(data[index][()]) + + emb = [] + if indices is not None: + for index in indices: + emb_i = data[index].tolist() + for i in range(len(emb_i), max_emb): + emb_i.append([0 for x in range(0, len(emb_i[0]))]) + emb.append(emb_i) + + return np.array(emb) else: - return np.array([data[index, 0] for index in data]) + if indices is not None: + return np.array([np.mean(data[index], axis=0) for index in indices]) + else: + return np.array([np.mean(data[index][()], axis=0) for index in data]) -class HDF5(UnimodalRepresentation): +class JSON(UnimodalRepresentation): def __init__(self): - super().__init__('HDF5') + super().__init__('JSON') - def parse_all(self, filepath, indices=None): - data = h5py.File(filepath) - if indices is not None: - return np.array([np.mean(data[index][()], axis=0) for index in indices]) - else: - return np.array([np.mean(data[index][()], axis=0) for index in data]) + def parse_all(self, filepath, indices): + with open(filepath) as file: + return json.load(file) diff --git a/src/test/java/org/apache/sysds/test/functions/rewrite/RewriteMatrixMultChainOptSparseTest.java b/src/test/java/org/apache/sysds/test/functions/rewrite/RewriteMatrixMultChainOptSparseTest.java index a3bafd2ebb..f5c9bc09b7 100644 --- a/src/test/java/org/apache/sysds/test/functions/rewrite/RewriteMatrixMultChainOptSparseTest.java +++ b/src/test/java/org/apache/sysds/test/functions/rewrite/RewriteMatrixMultChainOptSparseTest.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.sysds.test.functions.rewrite; import org.apache.sysds.hops.OptimizerUtils; @@ -76,7 +95,6 @@ public class RewriteMatrixMultChainOptSparseTest extends AutomatedTestBase { Assert.assertFalse(heavyHittersContainsSubString("mmchain") || heavyHittersContainsSubString("sp_mapmmchain")); } - } finally { OptimizerUtils.ALLOW_ADVANCED_MMCHAIN_REWRITES = oldFlag1;