metatensor
diff --git a/‎docs/src/engines/plumed-model.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/src/engines/plumed-model.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎metatomic-torch/include/metatomic/torch/model.hpp‎
Lines changed: 47 additions & 3 deletions b/‎metatomic-torch/include/metatomic/torch/model.hpp‎
Lines changed: 47 additions & 3 deletions
diff --git a/‎metatomic-torch/src/model.cpp‎
Lines changed: 136 additions & 3 deletions b/‎metatomic-torch/src/model.cpp‎
Lines changed: 136 additions & 3 deletions
diff --git a/‎metatomic-torch/src/outputs.cpp‎
Lines changed: 57 additions & 9 deletions b/‎metatomic-torch/src/outputs.cpp‎
Lines changed: 57 additions & 9 deletions
@@ -25,7 +25,7 @@ def forward(
         if "features" not in outputs:
             return {}
 
-        if outputs["features"].per_atom:
+        if outputs["features"].sample_kind == "atom":
             raise ValueError("per-atoms features are not supported in this model")
 
         # PLUMED will first call the model with 0 atoms to get the size of the
@@ -94,7 +94,7 @@ def forward(
 # metatdata about what the model can do
 capabilities = mta.ModelCapabilities(
     length_unit="Angstrom",
-    outputs={"features": mta.ModelOutput(per_atom=False)},
+    outputs={"features": mta.ModelOutput(sample_kind="system")},
     atomic_types=[0],
     interaction_range=torch.inf,
     supported_devices=["cpu", "cuda"],
 
@@ -35,6 +35,22 @@ class METATOMIC_TORCH_EXPORT ModelOutputHolder: public torch::CustomClassHolder
     ModelOutputHolder() = default;
 
     /// Initialize `ModelOutput` with the given data
+    ModelOutputHolder(
+        std::string quantity,
+        std::string unit,
+        std::string sample_kind,
+        std::vector<std::string> explicit_gradients_,
+        std::string description_
+    ):
+        description(std::move(description_)),
+        explicit_gradients(std::move(explicit_gradients_))
+    {
+        this->set_quantity(std::move(quantity));
+        this->set_unit(std::move(unit));
+        this->set_sample_kind(std::move(sample_kind));
+    }
+
+    /// For backward compatibility in the C++ API (per_atom argument)
     ModelOutputHolder(
         std::string quantity,
         std::string unit,
@@ -43,13 +59,24 @@ class METATOMIC_TORCH_EXPORT ModelOutputHolder: public torch::CustomClassHolder
         std::string description_
     ):
         description(std::move(description_)),
-        per_atom(per_atom_),
         explicit_gradients(std::move(explicit_gradients_))
     {
         this->set_quantity(std::move(quantity));
         this->set_unit(std::move(unit));
+        this->set_per_atom(per_atom_);
     }
 
+    /// For backward compatibility in the Python API
+    ModelOutputHolder(
+        std::string quantity,
+        std::string unit,
+        torch::IValue per_atom_or_sample_kind,
+        std::vector<std::string> explicit_gradients_,
+        std::string description_,
+        torch::optional<bool> per_atom = torch::nullopt,
+        torch::optional<std::string> sample_kind = torch::nullopt
+    );
+
     ~ModelOutputHolder() override = default;
 
     /// description of this output, defaults to empty string of not set by the user
@@ -72,8 +99,21 @@ class METATOMIC_TORCH_EXPORT ModelOutputHolder: public torch::CustomClassHolder
     /// set the unit of the output
     void set_unit(std::string unit);
 
-    /// is the output defined per-atom or for the overall structure
-    bool per_atom = false;
+    /// The setter and getter for `per_atom` that are used in TorchBind, which
+    /// allow us to raise an error if `sample_kind` can't be mapped to a boolean
+    /// value for `per_atom`.
+    void set_per_atom(bool per_atom);
+    bool get_per_atom() const;
+
+    /// This is deprecated in favor of `sample_kind`, and kept for backward compatibility reasons only.
+    [[deprecated("use sample_kind instead")]]
+    bool per_atom;
+
+    /// Get the sample kind of the output. TODO: explain
+    std::string sample_kind() const;
+
+    /// Set the `sample_kind` of the output.
+    void set_sample_kind(std::string sample_kind);
 
     /// Which gradients should be computed eagerly and stored inside the output
     /// `TensorMap`
@@ -85,8 +125,12 @@ class METATOMIC_TORCH_EXPORT ModelOutputHolder: public torch::CustomClassHolder
     static ModelOutput from_json(std::string_view json);
 
 private:
+    void set_per_atom_no_deprecation(bool per_atom);
+    bool get_per_atom_no_deprecation() const;
+
     std::string quantity_;
     std::string unit_;
+    torch::optional<std::string> sample_kind_;
 };
 
 
 
@@ -53,6 +53,51 @@ static void read_vector_int_json(
 
 /******************************************************************************/
 
+ModelOutputHolder::ModelOutputHolder(
+    std::string quantity,
+    std::string unit,
+    torch::IValue per_atom_or_sample_kind,
+    std::vector<std::string> explicit_gradients_,
+    std::string description_,
+    torch::optional<bool> per_atom,
+    torch::optional<std::string> sample_kind
+):
+    description(std::move(description_)),
+    explicit_gradients(std::move(explicit_gradients_))
+{
+    this->set_quantity(std::move(quantity));
+    this->set_unit(std::move(unit));
+
+    if (per_atom_or_sample_kind.isNone()) {
+        // check the kwargs for backward compatibility
+        if (sample_kind.has_value() && per_atom.has_value()) {
+            C10_THROW_ERROR(ValueError, "cannot specify both `per_atom` and `sample_kind`");
+        } else if (sample_kind.has_value()) {
+            this->set_sample_kind(sample_kind.value());
+        } else if (per_atom.has_value()) {
+            this->set_per_atom(per_atom.value());
+        }
+    } else if (per_atom_or_sample_kind.isBool()) {
+        if (per_atom.has_value()) {
+            C10_THROW_ERROR(ValueError,
+                "cannot specify `per_atom` both as a positional and keyword argument"
+            );
+        }
+        this->set_per_atom(per_atom_or_sample_kind.toBool());
+    } else if (per_atom_or_sample_kind.isString()) {
+        if (sample_kind.has_value()) {
+            C10_THROW_ERROR(ValueError,
+                "cannot specify `sample_kind` both as a positional and keyword argument"
+            );
+        }
+        this->set_sample_kind(per_atom_or_sample_kind.toStringRef());
+    } else {
+        C10_THROW_ERROR(ValueError,
+            "positional argument for `per_atom`/`sample_kind` must be either a boolean or a string"
+        );
+    }
+}
+
 void ModelOutputHolder::set_quantity(std::string quantity) {
     if (valid_quantity(quantity)) {
         validate_unit(quantity, unit_);
@@ -72,7 +117,7 @@ static nlohmann::json model_output_to_json(const ModelOutputHolder& self) {
     result["class"] = "ModelOutput";
     result["quantity"] = self.quantity();
     result["unit"] = self.unit();
-    result["per_atom"] = self.per_atom;
+    result["sample_kind"] = self.sample_kind();
     result["explicit_gradients"] = self.explicit_gradients;
     result["description"] = self.description;
 
@@ -112,11 +157,18 @@ static ModelOutput model_output_from_json(const nlohmann::json& data) {
         result->set_unit(data["unit"]);
     }
 
-    if (data.contains("per_atom")) {
+    if (data.contains("sample_kind")) {
+        if (!data["sample_kind"].is_string()) {
+            throw std::runtime_error("'sample_kind' in JSON for ModelOutput must be a string");
+        }
+        result->set_sample_kind(data["sample_kind"]);
+    } else if (data.contains("per_atom")) {
         if (!data["per_atom"].is_boolean()) {
             throw std::runtime_error("'per_atom' in JSON for ModelOutput must be a boolean");
         }
-        result->per_atom = data["per_atom"];
+        result->set_per_atom(data["per_atom"]);
+    } else {
+        result->set_sample_kind("system");
     }
 
     if (data.contains("explicit_gradients")) {
@@ -145,6 +197,87 @@ ModelOutput ModelOutputHolder::from_json(std::string_view json) {
     return model_output_from_json(data);
 }
 
+static std::set<std::string> SUPPORTED_SAMPLE_KINDS = {
+    "system",
+    "atom",
+    "atom_pair",
+};
+
+void ModelOutputHolder::set_sample_kind(std::string sample_kind) {
+    if (sample_kind == "atom") {
+        this->set_per_atom_no_deprecation(true);
+    } else if (sample_kind == "system") {
+        this->set_per_atom_no_deprecation(false);
+    } else {
+        if (SUPPORTED_SAMPLE_KINDS.find(sample_kind) == SUPPORTED_SAMPLE_KINDS.end()) {
+            C10_THROW_ERROR(ValueError,
+                "invalid sample_kind '" + sample_kind + "': supported values are [" +
+                torch::str(SUPPORTED_SAMPLE_KINDS) + "]"
+            );
+        }
+
+        this->sample_kind_ = std::move(sample_kind);
+    }
+}
+
+std::string ModelOutputHolder::sample_kind() const {
+    if (sample_kind_.has_value()) {
+        return sample_kind_.value();
+    } else if (this->get_per_atom_no_deprecation()) {
+        return "atom";
+    } else {
+        return "system";
+    }
+}
+
+void ModelOutputHolder::set_per_atom(bool per_atom_) {
+    TORCH_WARN_DEPRECATION(
+        "`per_atom` is deprecated, please use `sample_kind` instead"
+    );
+
+    this->set_per_atom_no_deprecation(per_atom_);
+}
+
+bool ModelOutputHolder::get_per_atom() const {
+    TORCH_WARN_DEPRECATION(
+        "`per_atom` is deprecated, please use `sample_kind` instead"
+    );
+
+    return this->get_per_atom_no_deprecation();
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
+void ModelOutputHolder::set_per_atom_no_deprecation(bool per_atom) {
+    this->per_atom = per_atom;
+
+    this->sample_kind_ = torch::nullopt;
+}
+
+bool ModelOutputHolder::get_per_atom_no_deprecation() const {
+    if (sample_kind_.has_value()) {
+        if (sample_kind_.value() == "atom") {
+            return true;
+        } else if (sample_kind_.value() == "system") {
+            return false;
+        } else {
+            C10_THROW_ERROR(
+                ValueError,
+                "Can't infer `per_atom` from `sample_kind` '" + this->sample_kind() + "'. "
+                "`per_atom` only makes sense for `sample_kind` 'atom' and 'system'."
+            );
+        }
+    }
+    return per_atom;
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
 /******************************************************************************/
 
 
 
@@ -84,13 +84,27 @@ static void validate_atomic_samples(
     auto tensor_options = torch::TensorOptions().device(value->device());
     TensorBlock block = TensorMapHolder::block_by_id(value, 0);
 
-    // Check if the samples names are as expected based on whether the output is
-    // per-atom or global
+    // Check if the samples names are as expected based on the sample_kind
     std::vector<std::string> expected_samples_names;
-    if (request->per_atom) {
+    if (request->sample_kind() == "atom") {
         expected_samples_names = {"system", "atom"};
-    } else {
+    } else if (request->sample_kind() == "system") {
         expected_samples_names = {"system"};
+    } else if (request->sample_kind() == "atom_pair") {
+        expected_samples_names = {
+            "system",
+            "first_atom",
+            "second_atom",
+            "cell_shift_a",
+            "cell_shift_b",
+            "cell_shift_c"
+        };
+    } else {
+        C10_THROW_ERROR(ValueError,
+            "Metatomic does not support validating samples for sample_kind"
+            "other than 'system', 'atom' or 'atom_pair' at the moment."
+            " Received sample_kind '" + request->sample_kind()
+        );
     }
 
     if (block->samples()->names() != expected_samples_names) {
@@ -103,7 +117,7 @@ static void validate_atomic_samples(
 
     // Check if the samples match the systems and selected_atoms
     Labels expected_samples;
-    if (request->per_atom) {
+    if (request->sample_kind() == "atom") {
         std::vector<int64_t> expected_values_flat;
         for (size_t s; s < systems.size(); s++) {
             for (size_t a; a < systems[s]->size(); a++) {
@@ -122,7 +136,7 @@ static void validate_atomic_samples(
         if (selected_atoms) {
             expected_samples = expected_samples->set_intersection(selected_atoms.value());
         }
-    } else {
+    } else if (request->sample_kind() == "system") {
         expected_samples = torch::make_intrusive<LabelsHolder>(
             "system",
             torch::arange(static_cast<int64_t>(systems.size()), tensor_options).reshape({-1, 1}),
@@ -138,6 +152,40 @@ static void validate_atomic_samples(
             );
             expected_samples = expected_samples->set_intersection(selected_systems);
         }
+    } else if (request->sample_kind() == "atom_pair") {
+        // minimal validation, just that indices are in-bounds
+        auto values = block->samples()->values().to(torch::kCPU);
+        for (int64_t i = 0; i < values.size(0); i++) {
+            auto system_idx = values[i][0].item<int64_t>();
+            auto first_atom_idx = values[i][1].item<int64_t>();
+            auto second_atom_idx = values[i][2].item<int64_t>();
+
+            if (system_idx < 0 || system_idx >= static_cast<int64_t>(systems.size())) {
+                C10_THROW_ERROR(ValueError,
+                    "invalid system index in samples for '" + name + "' output: " +
+                    std::to_string(system_idx) + " is out of bounds"
+                );
+            }
+            const auto& system = systems[system_idx];
+            if (first_atom_idx < 0 || first_atom_idx >= system->size()) {
+                C10_THROW_ERROR(ValueError,
+                    "invalid first_atom index in samples for '" + name + "' output: " +
+                    std::to_string(first_atom_idx) + " is out of bounds for system " +
+                    std::to_string(system_idx)
+                );
+            }
+            if (second_atom_idx < 0 || second_atom_idx >= system->size()) {
+                C10_THROW_ERROR(ValueError,
+                    "invalid second_atom index in samples for '" + name + "' output: " +
+                    std::to_string(second_atom_idx) + " is out of bounds for system " +
+                    std::to_string(system_idx)
+                );
+            }
+        }
+    } else {
+        C10_THROW_ERROR(ValueError,
+            "got invalid sample_kind '" + request->sample_kind() + "' for '" + name + "'"
+        );
     }
 
     if (expected_samples->set_union(block->samples())->size() != expected_samples->size()) {
@@ -594,10 +642,10 @@ static void check_heat_flux(
     validate_single_block("heat_flux", value);
 
     // Check samples values from systems
-    if (request->per_atom) {
+    if (request->sample_kind() == "atom") {
         C10_THROW_ERROR(ValueError,
-            "invalid 'heat_flux' output: heat flux cannot be per-atom, but the request "
-            "indicates `per_atom=True`"
+            "invalid 'heat_flux' output: heat flux cannot be per-atom, "
+            "but the request indicates `sample_kind='atom'`"
         );
     }
     validate_atomic_samples("heat_flux", value, systems, request, torch::nullopt);