This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new ec164cd93e [SYSTEMDS-3701] Add new modalities and representations to
scuro
ec164cd93e is described below
commit ec164cd93e7e69f02b7646f50cd13a5e26b050d1
Author: Christina Dionysio <[email protected]>
AuthorDate: Sat Aug 17 16:08:13 2024 +0200
[SYSTEMDS-3701] Add new modalities and representations to scuro
Closes #2060.
---
src/main/python/systemds/scuro/main.py | 58 ++++++++----
.../python/systemds/scuro/modality/__init__.py | 8 ++
.../systemds/scuro/modality/aligned_modality.py | 23 +++--
.../{video_modality.py => audio_modality.py} | 20 ++--
.../python/systemds/scuro/modality/modality.py | 18 +++-
.../{video_modality.py => text_modality.py} | 20 ++--
.../systemds/scuro/modality/video_modality.py | 18 ++--
.../scuro/{modality => models}/__init__.py | 5 +
.../representation.py => models/discrete_model.py} | 26 ++++--
.../{modality/modality.py => models/model.py} | 30 +++---
.../{modality => representations}/__init__.py | 8 ++
.../average.py} | 49 +++++-----
.../concatenation.py} | 47 +++++-----
.../systemds/scuro/representations/fusion.py | 72 +++++++++++++++
.../representation.py | 12 +--
.../systemds/scuro/representations/unimodal.py | 102 +++++++++++++++++++++
16 files changed, 392 insertions(+), 124 deletions(-)
diff --git a/src/main/python/systemds/scuro/main.py
b/src/main/python/systemds/scuro/main.py
index 9a6349568c..398db9921c 100644
--- a/src/main/python/systemds/scuro/main.py
+++ b/src/main/python/systemds/scuro/main.py
@@ -18,22 +18,44 @@
# under the License.
#
# -------------------------------------------------------------
-from aligner.alignment import Alignment
-from aligner.alignment_strategy import ChunkedCrossCorrelation
-from modality.representation import PixelRepresentation
+import collections
+import json
+from datetime import datetime
+
+from representations.average import Averaging
+from modality.aligned_modality import AlignedModality
+from modality.text_modality import TextModality
from modality.video_modality import VideoModality
-from aligner.similarity_measures import CosineSimilarity
-
-# Setup modalities
-file_path_a = ''
-file_path_b = ''
-representation_a = PixelRepresentation() # Concrete Representation
-representation_b = PixelRepresentation() # Concrete Representation
-modality_a = VideoModality(file_path_a, representation_a)
-modality_b = VideoModality(file_path_b, representation_b)
-
-# Align modalities
-alignment_strategy = ChunkedCrossCorrelation() # Concrete Alignment Strategy
-similarity_measure = CosineSimilarity()
-aligner = Alignment(modality_a, modality_b, alignment_strategy,
similarity_measure)
-aligned_modality = aligner.align_modalities()
+from modality.audio_modality import AudioModality
+from representations.unimodal import Pickle, JSON, HDF5, NPY
+from models.discrete_model import DiscreteModel
+
+
+labels = []
+train_indices = []
+
+video_path = ''
+audio_path = ''
+text_path = ''
+
+# Load modalities (audio, video, text)
+video = VideoModality(video_path, HDF5(), train_indices)
+audio = AudioModality(audio_path, Pickle(), train_indices)
+text = TextModality(text_path, NPY(), train_indices)
+
+video.read_all()
+audio.read_all()
+text.read_all()
+
+combined_modality = AlignedModality(Averaging(), [text, video, audio])
+combined_modality.combine()
+
+# create train-val split
+train_X, train_y = None, None
+val_X, val_y = None, None
+
+model = DiscreteModel()
+model.fit(train_X, train_y)
+model.test(val_X, val_y)
+
+
diff --git a/src/main/python/systemds/scuro/modality/__init__.py
b/src/main/python/systemds/scuro/modality/__init__.py
index e66abb4646..d09f468da2 100644
--- a/src/main/python/systemds/scuro/modality/__init__.py
+++ b/src/main/python/systemds/scuro/modality/__init__.py
@@ -18,3 +18,11 @@
# under the License.
#
# -------------------------------------------------------------
+from systemds.scuro.modality.aligned_modality import AlignedModality
+from systemds.scuro.modality.audio_modality import AudioModality
+from systemds.scuro.modality.video_modality import VideoModality
+from systemds.scuro.modality.test_modality import TextModality
+from systemds.scuro.modality.modality import Modality
+
+
+__all__ = ["AlignedModality", "AudioModality", "VideoModality",
"TextModality", "Modality"]
\ No newline at end of file
diff --git a/src/main/python/systemds/scuro/modality/aligned_modality.py
b/src/main/python/systemds/scuro/modality/aligned_modality.py
index ef6a102472..d4d20b962c 100644
--- a/src/main/python/systemds/scuro/modality/aligned_modality.py
+++ b/src/main/python/systemds/scuro/modality/aligned_modality.py
@@ -18,16 +18,27 @@
# under the License.
#
# -------------------------------------------------------------
+from typing import List
+
from modality.modality import Modality
-from modality.representation import Representation
+from representations.fusion import Fusion
class AlignedModality(Modality):
- def __init__(self, representation: Representation):
+ def __init__(self, representation: Fusion, modalities: List[Modality]):
"""
- Defines the modality that is created during the alignment process
+ Defines the modality that is created during the fusion process
:param representation: The representation for the aligned modality
- (made up of the #columns from the modalities
that are being aligned)
+ :param modalities: List of modalities to be combined
+ """
+ name = ''
+ for modality in modalities:
+ name += modality.name
+ super().__init__(representation, modality_name=name)
+ self.modalities = modalities
+
+ def combine(self):
+ """
+ Initiates the call to fuse the given modalities depending on the
Fusion type
"""
- super().__init__(representation)
-
\ No newline at end of file
+ self.data = self.representation.fuse(self.modalities) # noqa
diff --git a/src/main/python/systemds/scuro/modality/video_modality.py
b/src/main/python/systemds/scuro/modality/audio_modality.py
similarity index 68%
copy from src/main/python/systemds/scuro/modality/video_modality.py
copy to src/main/python/systemds/scuro/modality/audio_modality.py
index 04ed24cf7c..01c71ad1e0 100644
--- a/src/main/python/systemds/scuro/modality/video_modality.py
+++ b/src/main/python/systemds/scuro/modality/audio_modality.py
@@ -21,12 +21,17 @@
import os
from modality.modality import Modality
-from modality.representation import Representation
+from representations.unimodal import UnimodalRepresentation
-class VideoModality(Modality):
- def __init__(self, file_path: str, representation: Representation,
start_index: int = 0):
- super().__init__(representation, start_index)
+class AudioModality(Modality):
+ def __init__(self, file_path: str, representation: UnimodalRepresentation,
train_indices=None, start_index: int = 0):
+ """
+ Creates an audio modality
+ :param file_path: path to file where the audio embeddings are stored
+ :param representation: Unimodal representation that indicates how to
extract the data from the file
+ """
+ super().__init__(representation, start_index, 'Audio', train_indices)
self.file_path = file_path
def file_sanity_check(self):
@@ -42,8 +47,7 @@ class VideoModality(Modality):
raise ("File {0} is empty".format(self.file_path))
def read_chunk(self):
- # Read chunk (self.params.window_size)
- # self.representation.apply_representation()
- # additional preprocessing?
- # return numpy array
pass
+
+ def read_all(self, indices=None):
+ self.data = self.representation.parse_all(self.file_path,
indices=indices) # noqa
diff --git a/src/main/python/systemds/scuro/modality/modality.py
b/src/main/python/systemds/scuro/modality/modality.py
index d82ac3e989..c7fe7cff8b 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/modality/modality.py
@@ -19,22 +19,34 @@
#
# -------------------------------------------------------------
-from modality.representation import Representation
+from representations.representation import Representation
class Modality:
- def __init__(self, representation: Representation, start_index: int = 0):
+ def __init__(self, representation: Representation, start_index: int = 0,
modality_name='', train_indices=None):
"""
- Defines the modality that is to be aligned
+ Parent class of the different Modalities
:param representation: Specifies how the data should be represented
for a specific modality
:param start_index: Defines the first index used for the alignment
+ :param modality_name: Name of the modality
+ :param train_indices: List of indices used for train-test split
"""
self.representation = representation
self.start_index = start_index
+ self.name = modality_name
+ self.data = None
+ self.train_indices = train_indices
def read_chunk(self):
"""
Extracts a data chunk of the modality according to the window size
specified in params
"""
raise NotImplementedError
+
+ def read_all(self, indices):
+ """
+ Implemented for every unique modality to read all samples from a
specified format
+ :param indices: List of indices to be read
+ """
+ pass
diff --git a/src/main/python/systemds/scuro/modality/video_modality.py
b/src/main/python/systemds/scuro/modality/text_modality.py
similarity index 68%
copy from src/main/python/systemds/scuro/modality/video_modality.py
copy to src/main/python/systemds/scuro/modality/text_modality.py
index 04ed24cf7c..71f384626d 100644
--- a/src/main/python/systemds/scuro/modality/video_modality.py
+++ b/src/main/python/systemds/scuro/modality/text_modality.py
@@ -21,12 +21,17 @@
import os
from modality.modality import Modality
-from modality.representation import Representation
+from representations.unimodal import UnimodalRepresentation
-class VideoModality(Modality):
- def __init__(self, file_path: str, representation: Representation,
start_index: int = 0):
- super().__init__(representation, start_index)
+class TextModality(Modality):
+ def __init__(self, file_path: str, representation: UnimodalRepresentation,
train_indices=None, start_index: int = 0):
+ """
+ Creates a text modality
+ :param file_path: path to file(s) where the text data is stored
+ :param representation: Unimodal representation that indicates how to
extract the data from the file
+ """
+ super().__init__(representation, start_index, 'Text', train_indices)
self.file_path = file_path
def file_sanity_check(self):
@@ -42,8 +47,7 @@ class VideoModality(Modality):
raise ("File {0} is empty".format(self.file_path))
def read_chunk(self):
- # Read chunk (self.params.window_size)
- # self.representation.apply_representation()
- # additional preprocessing?
- # return numpy array
pass
+
+ def read_all(self, indices=None):
+ self.data = self.representation.parse_all(self.file_path,
indices=indices) # noqa
diff --git a/src/main/python/systemds/scuro/modality/video_modality.py
b/src/main/python/systemds/scuro/modality/video_modality.py
index 04ed24cf7c..ff7eebc9cc 100644
--- a/src/main/python/systemds/scuro/modality/video_modality.py
+++ b/src/main/python/systemds/scuro/modality/video_modality.py
@@ -21,12 +21,17 @@
import os
from modality.modality import Modality
-from modality.representation import Representation
+from representations.unimodal import UnimodalRepresentation
class VideoModality(Modality):
- def __init__(self, file_path: str, representation: Representation,
start_index: int = 0):
- super().__init__(representation, start_index)
+ def __init__(self, file_path: str, representation: UnimodalRepresentation,
train_indices=None, start_index: int = 0):
+ """
+ Creates a video modality
+ :param file_path: path to file where the video embeddings (for now)
are stored
+ :param representation: Unimodal representation that indicates how to
extract the data from the file
+ """
+ super().__init__(representation, start_index, 'Video', train_indices)
self.file_path = file_path
def file_sanity_check(self):
@@ -42,8 +47,7 @@ class VideoModality(Modality):
raise ("File {0} is empty".format(self.file_path))
def read_chunk(self):
- # Read chunk (self.params.window_size)
- # self.representation.apply_representation()
- # additional preprocessing?
- # return numpy array
pass
+
+ def read_all(self, indices):
+ self.data = self.representation.parse_all(self.file_path,
indices=indices) # noqa
diff --git a/src/main/python/systemds/scuro/modality/__init__.py
b/src/main/python/systemds/scuro/models/__init__.py
similarity index 87%
copy from src/main/python/systemds/scuro/modality/__init__.py
copy to src/main/python/systemds/scuro/models/__init__.py
index e66abb4646..d7c003fb48 100644
--- a/src/main/python/systemds/scuro/modality/__init__.py
+++ b/src/main/python/systemds/scuro/models/__init__.py
@@ -18,3 +18,8 @@
# under the License.
#
# -------------------------------------------------------------
+from systemds.scuro.models import Model
+from systemds.scuro.discrete_model import DiscreteModel
+
+
+__all__ = ["Model", "DiscreteModel"]
\ No newline at end of file
diff --git a/src/main/python/systemds/scuro/modality/representation.py
b/src/main/python/systemds/scuro/models/discrete_model.py
similarity index 69%
copy from src/main/python/systemds/scuro/modality/representation.py
copy to src/main/python/systemds/scuro/models/discrete_model.py
index cdf9bf9e93..994f0882e5 100644
--- a/src/main/python/systemds/scuro/modality/representation.py
+++ b/src/main/python/systemds/scuro/models/discrete_model.py
@@ -18,13 +18,23 @@
# under the License.
#
# -------------------------------------------------------------
-class Representation:
+from models.model import Model
+
+
+class DiscreteModel(Model):
def __init__(self):
- pass
+ """
+ Placeholder for a discrete model implementation
+ """
+ super().__init__('DiscreteModel')
+
+ def fit(self, X_train, y_train):
+ self.clf = None
+ train_accuracy = 0
+ return train_accuracy
-
-class PixelRepresentation(Representation):
- def __init__(self):
- super().__init__()
-
-
\ No newline at end of file
+ def test(self, X_test, y_test):
+ test_accuracy = 0
+ return test_accuracy
+
+
\ No newline at end of file
diff --git a/src/main/python/systemds/scuro/modality/modality.py
b/src/main/python/systemds/scuro/models/model.py
similarity index 60%
copy from src/main/python/systemds/scuro/modality/modality.py
copy to src/main/python/systemds/scuro/models/model.py
index d82ac3e989..115bf75ad6 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/models/model.py
@@ -19,22 +19,26 @@
#
# -------------------------------------------------------------
-from modality.representation import Representation
-
-
-class Modality:
+class Model:
+ def __init__(self, name: str):
+ """
+ Parent class for models used to perform a given task
+ :param name: Name of the model
+
+ The classifier (clf) should be set in the fit method of each child
class
+ """
+ self.name = name
+ self.clf = None
- def __init__(self, representation: Representation, start_index: int = 0):
+ def fit(self, X_train, y_train, X_val=None, y_val=None):
"""
- Defines the modality that is to be aligned
- :param representation: Specifies how the data should be represented
for a specific modality
- :param start_index: Defines the first index used for the alignment
+ Fits a model to the training data
"""
- self.representation = representation
- self.start_index = start_index
+ raise f'Fit method not implemented for {self.name}'
- def read_chunk(self):
+ def test(self, X_test, y_test):
"""
- Extracts a data chunk of the modality according to the window size
specified in params
+ Tests the classifier on a test or validation set
"""
- raise NotImplementedError
+ raise f'Test method not implemented for {self.name}'
+
\ No newline at end of file
diff --git a/src/main/python/systemds/scuro/modality/__init__.py
b/src/main/python/systemds/scuro/representations/__init__.py
similarity index 65%
copy from src/main/python/systemds/scuro/modality/__init__.py
copy to src/main/python/systemds/scuro/representations/__init__.py
index e66abb4646..38df913019 100644
--- a/src/main/python/systemds/scuro/modality/__init__.py
+++ b/src/main/python/systemds/scuro/representations/__init__.py
@@ -18,3 +18,11 @@
# under the License.
#
# -------------------------------------------------------------
+from systemds.scuro.representations.representation import Representation
+from systemds.scuro.representations.average import Average
+from systemds.scuro.representations.concatenation import Concatenation
+from systemds.scuro.representations.fusion import Fusion
+from systemds.scuro.representations.unimodal import UnimodalRepresentation,
HDF5, NPY, Pickle, JSON
+
+
+__all__ = ["Representation", "Average", "Concatenation", "Fusion",
"UnimodalRepresentation", "HDF5", "NPY", "Pickle", "JSON"]
diff --git a/src/main/python/systemds/scuro/modality/video_modality.py
b/src/main/python/systemds/scuro/representations/average.py
similarity index 53%
copy from src/main/python/systemds/scuro/modality/video_modality.py
copy to src/main/python/systemds/scuro/representations/average.py
index 04ed24cf7c..77896b1914 100644
--- a/src/main/python/systemds/scuro/modality/video_modality.py
+++ b/src/main/python/systemds/scuro/representations/average.py
@@ -18,32 +18,37 @@
# under the License.
#
# -------------------------------------------------------------
-import os
+
+from typing import List
+
+import numpy as np
from modality.modality import Modality
-from modality.representation import Representation
+from keras.api.preprocessing.sequence import pad_sequences
+from representations.fusion import Fusion
-class VideoModality(Modality):
- def __init__(self, file_path: str, representation: Representation,
start_index: int = 0):
- super().__init__(representation, start_index)
- self.file_path = file_path
-
- def file_sanity_check(self):
+
+class Averaging(Fusion):
+ def __init__(self):
"""
- Checks if the file can be found is not empty
+ Combines modalities using averaging
"""
- try:
- file_size = os.path.getsize(self.file_path)
- except:
- raise (f"Error: File {0} not found!".format(self.file_path))
-
- if file_size == 0:
- raise ("File {0} is empty".format(self.file_path))
+ super().__init__('Averaging')
- def read_chunk(self):
- # Read chunk (self.params.window_size)
- # self.representation.apply_representation()
- # additional preprocessing?
- # return numpy array
- pass
+ def fuse(self, modalities: List[Modality]):
+ max_emb_size = self.get_max_embedding_size(modalities)
+
+ padded_modalities = []
+ for modality in modalities:
+ d = pad_sequences(modality.data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ padded_modalities.append(d)
+
+ data = padded_modalities[0]
+ for i in range(1, len(modalities)):
+ data += padded_modalities[i]
+
+ data = self.scale_data(data, modalities[0].train_indices)
+ data /= len(modalities)
+
+ return np.array(data)
diff --git a/src/main/python/systemds/scuro/modality/video_modality.py
b/src/main/python/systemds/scuro/representations/concatenation.py
similarity index 52%
copy from src/main/python/systemds/scuro/modality/video_modality.py
copy to src/main/python/systemds/scuro/representations/concatenation.py
index 04ed24cf7c..149e1f8801 100644
--- a/src/main/python/systemds/scuro/modality/video_modality.py
+++ b/src/main/python/systemds/scuro/representations/concatenation.py
@@ -18,32 +18,35 @@
# under the License.
#
# -------------------------------------------------------------
-import os
+
+from typing import List
+
+import numpy as np
from modality.modality import Modality
-from modality.representation import Representation
+from keras.api.preprocessing.sequence import pad_sequences
+
+from representations.fusion import Fusion
-class VideoModality(Modality):
- def __init__(self, file_path: str, representation: Representation,
start_index: int = 0):
- super().__init__(representation, start_index)
- self.file_path = file_path
-
- def file_sanity_check(self):
+class Concatenation(Fusion):
+ def __init__(self, padding=True):
"""
- Checks if the file can be found is not empty
+ Combines modalities using concatenation
"""
- try:
- file_size = os.path.getsize(self.file_path)
- except:
- raise (f"Error: File {0} not found!".format(self.file_path))
+ super().__init__('Concatenation')
+ self.padding = padding
+
+ def fuse(self, modalities: List[Modality]):
+ max_emb_size = self.get_max_embedding_size(modalities)
+
+ size = len(modalities[0].data)
+ data = np.zeros((size, 0))
- if file_size == 0:
- raise ("File {0} is empty".format(self.file_path))
-
- def read_chunk(self):
- # Read chunk (self.params.window_size)
- # self.representation.apply_representation()
- # additional preprocessing?
- # return numpy array
- pass
+ for modality in modalities:
+ if self.padding:
+ data = np.concatenate(pad_sequences(modality.data,
maxlen=max_emb_size, dtype='float32', padding='post'), axis=1)
+ else:
+ data = np.concatenate([data, modality.data], axis=1)
+
+ return self.scale_data(data, modalities[0].train_indices)
diff --git a/src/main/python/systemds/scuro/representations/fusion.py
b/src/main/python/systemds/scuro/representations/fusion.py
new file mode 100644
index 0000000000..4e242137f1
--- /dev/null
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -0,0 +1,72 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from typing import List
+
+from sklearn.preprocessing import StandardScaler
+
+from modality.modality import Modality
+from representations.representation import Representation
+
+
+class Fusion(Representation):
+ def __init__(self, name):
+ """
+ Parent class for different multimodal fusion types
+ :param name: Name of the fusion type
+ """
+ super().__init__(name)
+
+ def fuse(self, modalities: List[Modality]):
+ """
+ Implemented for every child class and creates a fused representation
out of
+ multiple modalities
+ :param modalities: List of modalities used in the fusion
+ :return: fused data
+ """
+ raise f'Not implemented for Fusion: {self.name}'
+
+ def get_max_embedding_size(self, modalities: List[Modality]):
+ """
+ Computes the maximum embedding size from a given list of modalities
+ :param modalities: List of modalities
+ :return: maximum embedding size
+ """
+ max_size = modalities[0].data.shape[1]
+ for idx in range(1, len(modalities)):
+ curr_shape = modalities[idx].data.shape
+ if len(modalities[idx - 1].data) != curr_shape[0]:
+ raise f'Modality sizes don\'t match!'
+ elif curr_shape[1] > max_size:
+ max_size = curr_shape[1]
+
+ return max_size
+
+ def scale_data(self, data, train_indices):
+ """
+ Scales the data using the StandardScaler.
+ The scaler is fit on the training data before performing the scaling
on the whole data array
+ :param data: data to be scaled
+ :param train_indices:
+ :return: scaled data
+ """
+ scaler = StandardScaler()
+ scaler.fit(data[train_indices])
+ return scaler.transform(data)
diff --git a/src/main/python/systemds/scuro/modality/representation.py
b/src/main/python/systemds/scuro/representations/representation.py
similarity index 86%
rename from src/main/python/systemds/scuro/modality/representation.py
rename to src/main/python/systemds/scuro/representations/representation.py
index cdf9bf9e93..13da5d26a5 100644
--- a/src/main/python/systemds/scuro/modality/representation.py
+++ b/src/main/python/systemds/scuro/representations/representation.py
@@ -18,13 +18,7 @@
# under the License.
#
# -------------------------------------------------------------
+
class Representation:
- def __init__(self):
- pass
-
-
-class PixelRepresentation(Representation):
- def __init__(self):
- super().__init__()
-
-
\ No newline at end of file
+ def __init__(self, name):
+ self.name = name
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py
b/src/main/python/systemds/scuro/representations/unimodal.py
new file mode 100644
index 0000000000..659ad32468
--- /dev/null
+++ b/src/main/python/systemds/scuro/representations/unimodal.py
@@ -0,0 +1,102 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import json
+import pickle
+
+import h5py
+import numpy as np
+
+from representations.representation import Representation
+
+
+class UnimodalRepresentation(Representation):
+ def __init__(self, name):
+ """
+ Parent class for all unimodal representation types
+ :param name: name of the representation
+ """
+ super().__init__(name)
+
+ def parse_all(self, file_path, indices):
+ raise f'Not implemented for {self.name}'
+
+
+class PixelRepresentation(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('Pixel')
+
+
+class ResNet(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('ResNet')
+
+
+class Pickle(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('Pickle')
+
+ def parse_all(self, filepath, indices):
+ with open(filepath, "rb") as file:
+ data = pickle.load(file, encoding='latin1')
+
+ if indices is not None:
+ for n, idx in enumerate(indices):
+ result = np.empty((len(data), np.mean(data[idx][()],
axis=1).shape[0]))
+ break
+ for n, idx in enumerate(indices):
+ result[n] = np.mean(data[idx], axis=1)
+ return result
+ else:
+ return np.array([np.mean(data[index], axis=1) for index in data])
+
+
+class JSON(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('JSON')
+
+ def parse_all(self, filepath, indices):
+ with open(filepath) as file:
+ return json.load(file)
+
+
+class NPY(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('NPY')
+
+ def parse_all(self, filepath, indices):
+ data = np.load(filepath)
+
+ if indices is not None:
+ return np.array([data[n, 0] for n, index in enumerate(indices)])
+ else:
+ return np.array([data[index, 0] for index in data])
+
+
+class HDF5(UnimodalRepresentation):
+ def __init__(self):
+ super().__init__('HDF5')
+
+ def parse_all(self, filepath, indices=None):
+ data = h5py.File(filepath)
+ if indices is not None:
+ return np.array([np.mean(data[index][()], axis=0) for index in
indices])
+ else:
+ return np.array([np.mean(data[index][()], axis=0) for index in
data])