This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 95c74be188 [MINOR] add correct python dependencies
95c74be188 is described below
commit 95c74be188ca35de762380525197881d2a045ff1
Author: Christina Dionysio <[email protected]>
AuthorDate: Tue Sep 24 15:35:51 2024 +0200
[MINOR] add correct python dependencies
This commit fixes the python dependencies to support scuro.
Closes #2117
---
.github/workflows/python.yml | 15 ++++++++++++++-
src/main/python/systemds/scuro/representations/average.py | 4 ++--
.../systemds/scuro/representations/concatenation.py | 4 ++--
src/main/python/systemds/scuro/representations/max.py | 4 ++--
.../systemds/scuro/representations/mel_spectrogram.py | 4 ++--
.../systemds/scuro/representations/multiplication.py | 6 +++---
src/main/python/systemds/scuro/representations/rowmax.py | 4 ++--
src/main/python/systemds/scuro/representations/sum.py | 6 +++---
src/main/python/systemds/scuro/representations/utils.py | 13 +++++++++++++
9 files changed, 43 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 217e2c157d..79b2277b08 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -100,7 +100,20 @@ jobs:
pip install --upgrade pip
pip install --upgrade pip
pip install wheel
- pip install numpy py4j scipy scikit-learn keras requests pandas
unittest-parallel
+ pip install \
+ numpy \
+ py4j \
+ scipy \
+ scikit-learn \
+ requests \
+ pandas \
+ unittest-parallel \
+ torchvision \
+ transformers \
+ opencv-python \
+ torch \
+ librosa \
+ h5py
- name: Build Python Package
run: |
diff --git a/src/main/python/systemds/scuro/representations/average.py
b/src/main/python/systemds/scuro/representations/average.py
index 11ce431566..708812d21b 100644
--- a/src/main/python/systemds/scuro/representations/average.py
+++ b/src/main/python/systemds/scuro/representations/average.py
@@ -24,7 +24,7 @@ from typing import List
import numpy as np
from systemds.scuro.modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.fusion import Fusion
@@ -41,7 +41,7 @@ class Average(Fusion):
padded_modalities = []
for modality in modalities:
- d = pad_sequences(modality.data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ d = pad_sequences(modality.data, maxlen=max_emb_size,
dtype='float32')
padded_modalities.append(d)
data = padded_modalities[0]
diff --git a/src/main/python/systemds/scuro/representations/concatenation.py
b/src/main/python/systemds/scuro/representations/concatenation.py
index 81b6fe33fc..a61ab69fce 100644
--- a/src/main/python/systemds/scuro/representations/concatenation.py
+++ b/src/main/python/systemds/scuro/representations/concatenation.py
@@ -24,7 +24,7 @@ from typing import List
import numpy as np
from systemds.scuro.modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.fusion import Fusion
@@ -51,7 +51,7 @@ class Concatenation(Fusion):
for modality in modalities:
if self.padding:
- data = np.concatenate([data, pad_sequences(modality.data,
maxlen=max_emb_size, dtype='float32', padding='post')], axis=-1)
+ data = np.concatenate([data, pad_sequences(modality.data,
maxlen=max_emb_size, dtype='float32')], axis=-1)
else:
data = np.concatenate([data, modality.data], axis=-1)
diff --git a/src/main/python/systemds/scuro/representations/max.py
b/src/main/python/systemds/scuro/representations/max.py
index 2f58581cb8..50038d5463 100644
--- a/src/main/python/systemds/scuro/representations/max.py
+++ b/src/main/python/systemds/scuro/representations/max.py
@@ -24,7 +24,7 @@ from typing import List
import numpy as np
from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.fusion import Fusion
@@ -46,7 +46,7 @@ class RowMax(Fusion):
padded_modalities = []
for modality in modalities:
- d = pad_sequences(modality.data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ d = pad_sequences(modality.data, maxlen=max_emb_size,
dtype='float32')
padded_modalities.append(d)
split_rows = int(len(modalities[0].data) / self.split)
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index 395b2977a0..15d4277c2c 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -24,7 +24,7 @@ import pickle
import librosa
import numpy as np
-from keras.src.utils import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.unimodal import UnimodalRepresentation
@@ -51,7 +51,7 @@ class MelSpectrogram(UnimodalRepresentation):
r = []
for elem in result:
- d = pad_sequences(elem, maxlen=max_length, dtype='float32',
padding='post')
+ d = pad_sequences(elem, maxlen=max_length, dtype='float32')
r.append(d)
np_array_r = np.array(r) if not self.avg else np.mean(np.array(r),
axis=1)
diff --git a/src/main/python/systemds/scuro/representations/multiplication.py
b/src/main/python/systemds/scuro/representations/multiplication.py
index 2b3ae64eac..032ae70fe4 100644
--- a/src/main/python/systemds/scuro/representations/multiplication.py
+++ b/src/main/python/systemds/scuro/representations/multiplication.py
@@ -24,7 +24,7 @@ from typing import List
import numpy as np
from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.fusion import Fusion
@@ -39,10 +39,10 @@ class Multiplication(Fusion):
def fuse(self, modalities: List[Modality], train_indices=None):
max_emb_size = self.get_max_embedding_size(modalities)
- data = pad_sequences(modalities[0].data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ data = pad_sequences(modalities[0].data, maxlen=max_emb_size,
dtype='float32')
for m in range(1, len(modalities)):
# scaled = self.scale_data(modalities[m].data, train_indices)
- data = np.multiply(data, pad_sequences(modalities[m].data,
maxlen=max_emb_size, dtype='float32', padding='post'))
+ data = np.multiply(data, pad_sequences(modalities[m].data,
maxlen=max_emb_size, dtype='float32'))
return data
diff --git a/src/main/python/systemds/scuro/representations/rowmax.py
b/src/main/python/systemds/scuro/representations/rowmax.py
index c4184687a1..b06839b334 100644
--- a/src/main/python/systemds/scuro/representations/rowmax.py
+++ b/src/main/python/systemds/scuro/representations/rowmax.py
@@ -24,7 +24,7 @@ from typing import List
import numpy as np
from modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from representations.fusion import Fusion
@@ -47,7 +47,7 @@ class RowMax(Fusion):
padded_modalities = []
for modality in modalities:
scaled = self.scale_data(modality.data, train_indices)
- d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32',
padding='post')
+ d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32')
padded_modalities.append(d)
split_rows = int(len(modalities[0].data) / self.split)
diff --git a/src/main/python/systemds/scuro/representations/sum.py
b/src/main/python/systemds/scuro/representations/sum.py
index 9c75606627..1f1740c548 100644
--- a/src/main/python/systemds/scuro/representations/sum.py
+++ b/src/main/python/systemds/scuro/representations/sum.py
@@ -23,7 +23,7 @@ from typing import List
from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.fusion import Fusion
@@ -38,9 +38,9 @@ class Sum(Fusion):
def fuse(self, modalities: List[Modality]):
max_emb_size = self.get_max_embedding_size(modalities)
- data = pad_sequences(modalities[0].data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ data = pad_sequences(modalities[0].data, maxlen=max_emb_size,
dtype='float32')
for m in range(1, len(modalities)):
- data += pad_sequences(modalities[m].data, maxlen=max_emb_size,
dtype='float32', padding='post')
+ data += pad_sequences(modalities[m].data, maxlen=max_emb_size,
dtype='float32')
return data
diff --git a/src/main/python/systemds/scuro/representations/utils.py
b/src/main/python/systemds/scuro/representations/utils.py
index d611cd9c71..720c3386d7 100644
--- a/src/main/python/systemds/scuro/representations/utils.py
+++ b/src/main/python/systemds/scuro/representations/utils.py
@@ -93,3 +93,16 @@ class JSON(UnimodalRepresentation):
def parse_all(self, filepath, indices):
with open(filepath) as file:
return json.load(file)
+
+
+def pad_sequences(sequences, maxlen=None, dtype='float32', value=0):
+ if maxlen is None:
+ maxlen = max([len(seq) for seq in sequences])
+
+ result = np.full((len(sequences), maxlen), value, dtype=dtype)
+
+ for i, seq in enumerate(sequences):
+ data = seq[:maxlen]
+ result[i, :len(data)] = data
+
+ return result