(systemds) branch main updated: [SYSTEMDS-3913] Adapt visual representations to image modality

cdionysio Mon, 08 Dec 2025 04:28:57 -0800

This is an automated email from the ASF dual-hosted git repository.

cdionysio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new 5292b423ec [SYSTEMDS-3913] Adapt visual representations to image 
modality
5292b423ec is described below

commit 5292b423ecf1eda53d2966b948d0211f45286fca
Author: Christina Dionysio <[email protected]>
AuthorDate: Mon Dec 8 13:28:36 2025 +0100

    [SYSTEMDS-3913] Adapt visual representations to image modality
    
    This patch fixes some errors in visual representations in order to make 
them work for video and images.
---
 src/main/python/systemds/scuro/dataloader/image_loader.py         | 2 +-
 src/main/python/systemds/scuro/dataloader/json_loader.py          | 2 +-
 src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py   | 4 ++--
 src/main/python/systemds/scuro/representations/color_histogram.py | 8 +++++---
 src/main/python/systemds/scuro/representations/fusion.py          | 2 +-
 src/main/python/systemds/scuro/representations/resnet.py          | 2 +-
 src/main/python/systemds/scuro/representations/vgg.py             | 2 +-
 src/main/python/systemds/scuro/utils/torch_dataset.py             | 1 -
 8 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/main/python/systemds/scuro/dataloader/image_loader.py 
b/src/main/python/systemds/scuro/dataloader/image_loader.py
index 0667e703b1..21ad27bf04 100644
--- a/src/main/python/systemds/scuro/dataloader/image_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/image_loader.py
@@ -54,7 +54,7 @@ class ImageLoader(BaseLoader):
         else:
             height, width, channels = image.shape
 
-        image = image.astype(np.float32) / 255.0
+        image = image.astype(np.uint8, copy=False)
 
         self.metadata[file] = self.modality_type.create_metadata(
             width, height, channels
diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py 
b/src/main/python/systemds/scuro/dataloader/json_loader.py
index ed15448597..53e98e7e19 100644
--- a/src/main/python/systemds/scuro/dataloader/json_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -55,6 +55,6 @@ class JSONLoader(BaseLoader):
                 except:
                     text = json_file[self.field]
 
-                text = " ".join(text)
+                text = " ".join(text) if isinstance(text, list) else text
                 self.data.append(text)
                 self.metadata[idx] = 
self.modality_type.create_metadata(len(text), text)
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py 
b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index a90485dfa2..2a5f826283 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -177,11 +177,11 @@ class HyperparameterTuner:
 
         if self.maximize_metric:
             best_params, best_score = max(
-                all_results, key=lambda x: x[1].scores[self.scoring_metric]
+                all_results, key=lambda x: 
x[1].average_scores[self.scoring_metric]
             )
         else:
             best_params, best_score = min(
-                all_results, key=lambda x: x[1].scores[self.scoring_metric]
+                all_results, key=lambda x: 
x[1].average_scores[self.scoring_metric]
             )
 
         tuning_time = time.time() - start_time
diff --git a/src/main/python/systemds/scuro/representations/color_histogram.py 
b/src/main/python/systemds/scuro/representations/color_histogram.py
index 6412b1979d..2d780939e1 100644
--- a/src/main/python/systemds/scuro/representations/color_histogram.py
+++ b/src/main/python/systemds/scuro/representations/color_histogram.py
@@ -22,17 +22,19 @@
 import numpy as np
 import cv2
 
+from systemds.scuro.drsearch.operator_registry import register_representation
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.modality.transformed import TransformedModality
 
 
+@register_representation(ModalityType.IMAGE)
 class ColorHistogram(UnimodalRepresentation):
     def __init__(
         self,
         color_space="RGB",
-        bins=32,
-        normalize=True,
+        bins=64,
+        normalize=False,
         aggregation="mean",
         output_file=None,
     ):
@@ -48,7 +50,7 @@ class ColorHistogram(UnimodalRepresentation):
     def _get_parameters(self):
         return {
             "color_space": ["RGB", "HSV", "GRAY"],
-            "bins": [8, 16, 32, 64, 128, 256, (8, 8, 8), (16, 16, 16)],
+            "bins": [8, 16, 32, 64, 128, 256],
             "normalize": [True, False],
             "aggregation": ["mean", "max", "concat"],
         }
diff --git a/src/main/python/systemds/scuro/representations/fusion.py 
b/src/main/python/systemds/scuro/representations/fusion.py
index d491dcad6b..693689bf92 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -86,7 +86,7 @@ class Fusion(Representation):
             (len(modalities[0].data), transformed_train.shape[1])
         )
         transformed_data[task.train_indices] = transformed_train
-        transformed_data[task.val_indices] = transformed_val
+        transformed_data[task.test_indices] = transformed_other
 
         return transformed_data
 
diff --git a/src/main/python/systemds/scuro/representations/resnet.py 
b/src/main/python/systemds/scuro/representations/resnet.py
index f544e6a46f..55e7d36948 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -114,7 +114,7 @@ class ResNet(UnimodalRepresentation):
         return parameters
 
     def transform(self, modality):
-        self.data_type = numpy_dtype_to_torch_dtype(modality.data_type)
+        self.data_type = torch.float32
         if next(self.model.parameters()).dtype != self.data_type:
             self.model = self.model.to(self.data_type)
 
diff --git a/src/main/python/systemds/scuro/representations/vgg.py 
b/src/main/python/systemds/scuro/representations/vgg.py
index 4d0212883c..4f4324a372 100644
--- a/src/main/python/systemds/scuro/representations/vgg.py
+++ b/src/main/python/systemds/scuro/representations/vgg.py
@@ -65,7 +65,7 @@ class VGG19(UnimodalRepresentation):
         return parameters
 
     def transform(self, modality):
-        self.data_type = numpy_dtype_to_torch_dtype(modality.data_type)
+        self.data_type = torch.float32
         if next(self.model.parameters()).dtype != self.data_type:
             self.model = self.model.to(self.data_type)
 
diff --git a/src/main/python/systemds/scuro/utils/torch_dataset.py 
b/src/main/python/systemds/scuro/utils/torch_dataset.py
index 19875f8802..9c462e3675 100644
--- a/src/main/python/systemds/scuro/utils/torch_dataset.py
+++ b/src/main/python/systemds/scuro/utils/torch_dataset.py
@@ -62,7 +62,6 @@ class CustomDataset(torch.utils.data.Dataset):
 
         if isinstance(data, np.ndarray) and data.ndim == 3:
             # image
-            data = torch.tensor(data).permute(2, 0, 1)
             output = self.tf(data).to(self.device)
         else:
             for i, d in enumerate(data):

(systemds) branch main updated: [SYSTEMDS-3913] Adapt visual representations to image modality

Reply via email to