qml2code
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 4 deletions b/‎README.md‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎examples/models/ex_Coulomb_Matrix_SORF_model.py‎
Lines changed: 51 additions & 0 deletions b/‎examples/models/ex_Coulomb_Matrix_SORF_model.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/models/ex_Coulomb_Matrix_model.py‎
Lines changed: 51 additions & 0 deletions b/‎examples/models/ex_Coulomb_Matrix_model.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/models/ex_FCHL19_SORF_model.py‎
Lines changed: 51 additions & 0 deletions b/‎examples/models/ex_FCHL19_SORF_model.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/models/ex_FCHL19_model.py‎
Lines changed: 51 additions & 0 deletions b/‎examples/models/ex_FCHL19_model.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/models/ex_aSLATM_Matern_model.py‎
Lines changed: 68 additions & 0 deletions b/‎examples/models/ex_aSLATM_Matern_model.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎examples/models/ex_aSLATM_SORF_model.py‎
Lines changed: 67 additions & 0 deletions b/‎examples/models/ex_aSLATM_SORF_model.py‎
Lines changed: 67 additions & 0 deletions
@@ -78,7 +78,7 @@ repos:
             stages: [commit-msg]
             args: []
 
-    -   repo: https://github.com/kieran-ryan/pyprojectsort
-        rev: v0.4.0
+    -   repo: https://github.com/tox-dev/pyproject-fmt
+        rev: v2.5.0
         hooks:
-        -   id: pyprojectsort
+        -   id: pyproject-fmt
@@ -18,21 +18,21 @@ Some parts of the code depend on additional dependencies that can be installed w
 
 - `orb_ml` - for FJK (machine learning from orbital information).
 
-- `msorf` - for MSORF (everything in `qml2.multilevel_sorf`).
+- `models` - for hyperparameter optimization procedures in `qml2.models` and `qml2.multilevel_sorf`.
 
 - `morfeus` - for applications dependend on `morfeus-ml` package (everything related to conformer ensemble generation).
 
 - `torch` - Torch functionality (efficiency questionable right now TBH).
 
-For example, to use the `orb_ml` and `msorf` optional dependecy flags in your installation use
+For example, to use the `orb_ml` and `models` optional dependecy flags in your installation use
 
    ```bash
-   pip install .[orb_ml,msorf]
+   pip install .[orb_ml,models]
    ```
 or, if `makefile` is installed,
 
    ```
-   make install OPT=[orb_ml,msorf]
+   make install OPT=[orb_ml,models]
    ```
 
 ## :clipboard: Testing
@@ -72,6 +72,9 @@ This will create `manual.html` file that can be opened with an Internet browser.
 
 `QML2_AVOID_NUMBA_NUMPY_PARALLELIZATION` - some Numba routines in the code call in parallel Numpy routines, which creates problems in some setups (e.g. when both Numba and Numpy try to parallelize over a large number of threads without taking each other into account). Setting this environmental variable to `1` disables Numba parallelization in such routines, leaving them to be parallelized exclusively with Numpy.
 
+`QML2_AVOID_SORF_NUMBA_PARALLELIZATION` - setting this environmental variable to `1` disables Numba parallelization over feature vectors for SORF routines in `qml2.kernels.sorf` and `qml2.kernels.gradient_sorf` (also referred to in the corresponding `qml2.models` classes). Helps if reductors are used in systems where Numba and Numpy try to parallelize simultaneously (see `QML2_AVOID_NUMBA_NUMPY_PARALLELIZATION`).
+
+
 ### Experimental
 
 `QML2_DEFAULT_JIT` - setting to `NUMBA` (default) or `TORCH` (both are case insensitive) determines whether Numba or TorchScript JIT compilation is used. Also see `jit_interfaces.set_default_jit`.
 
@@ -0,0 +1,51 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.models.loss_functions import MAE
+from qml2.models.sorf import SORFModel
+
+xyzs = []
+energies = []
+
+training_set_size = 2001
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+# NOTE: we can use shift_quantities=True, shifting the labels by their mean, but since in this example they are extensive that is not likely to make the results better.
+model = SORFModel(shift_quantities=True)
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))
@@ -0,0 +1,51 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.models.krr import KRRModel
+from qml2.models.loss_functions import MAE
+
+xyzs = []
+energies = []
+
+training_set_size = 2001
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+# NOTE: we can use shift_quantities=True, shifting the labels by their mean, but since in this example they are extensive that is not likely to make the results better.
+model = KRRModel(shift_quantities=False)
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))
@@ -0,0 +1,51 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.models.loss_functions import MAE
+from qml2.models.sorf import SORFLocalModel
+
+xyzs = []
+energies = []
+
+training_set_size = 501
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+# using "shift_quantites=True" means using dressed atom approach; requires defining `possible_nuclear_charges` though.
+model = SORFLocalModel(shift_quantities=True, possible_nuclear_charges=np.array([1, 6, 7, 8, 16]))
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))
@@ -0,0 +1,51 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.models.krr import KRRLocalModel
+from qml2.models.loss_functions import MAE
+
+xyzs = []
+energies = []
+
+training_set_size = 501
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+# using "shift_quantites=True" means using dressed atom approach; requires defining `possible_nuclear_charges` though.
+model = KRRLocalModel(shift_quantities=True, possible_nuclear_charges=np.array([1, 6, 7, 8, 16]))
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))
@@ -0,0 +1,68 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.kernels import local_dn_matern_kernel, local_dn_matern_kernel_symmetric
+from qml2.models.krr import KRRLocalModel
+from qml2.models.loss_functions import MAE
+from qml2.representations.calculators import SLATMCalculator
+from qml2.utils import get_sorted_elements
+
+xyzs = []
+energies = []
+
+training_set_size = 501
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+all_nuclear_charges = []
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+        all_nuclear_charges.append(comp.nuclear_charges)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+slatm_calculator = SLATMCalculator(all_nuclear_charges)
+possible_nuclear_charges = get_sorted_elements(np.concatenate(all_nuclear_charges))
+print("Nuclear charges found:", possible_nuclear_charges)
+
+# using "shift_quantites=True" means using dressed atom approach; requires defining `possible_nuclear_charges` though.
+model = KRRLocalModel(
+    shift_quantities=True,
+    possible_nuclear_charges=possible_nuclear_charges,
+    representation_function=slatm_calculator,
+    rep_kwargs={"local": True},
+    kernel_kwargs={"order": 0, "metric": "l2"},
+    kernel_function=local_dn_matern_kernel,
+    kernel_function_symmetric=local_dn_matern_kernel_symmetric,
+)
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))
@@ -0,0 +1,67 @@
+import csv
+import random
+import tarfile
+
+import numpy as np
+
+from qml2 import Compound
+from qml2.models.loss_functions import MAE
+from qml2.models.sorf import SORFLocalModel
+from qml2.representations.calculators import SLATMCalculator
+from qml2.utils import get_sorted_elements
+
+xyzs = []
+energies = []
+
+training_set_size = 501
+test_set_size = 1000
+num_mols = training_set_size + test_set_size
+
+with open("../../tests/test_data/hof_qm7.txt") as csvfile:
+    reader = csv.reader(csvfile, delimiter=" ")
+    all_rows = list(reader)
+    random.shuffle(all_rows)
+    for row in all_rows[:num_mols]:
+        xyzs.append(row[0])
+        energies.append(float(row[1]))
+
+energies = np.array(energies)
+all_nuclear_charges = []
+
+compounds = []
+with tarfile.open("../../tests/test_data/qm7.tar.gz") as tar:
+    for xyz_name in xyzs:
+        xyz = tar.extractfile(xyz_name)
+        comp = Compound(xyz=xyz)
+        compounds.append(comp)
+        all_nuclear_charges.append(comp.nuclear_charges)
+
+train_compounds = compounds[:training_set_size]
+test_compounds = compounds[training_set_size:]
+
+train_quantities = energies[:training_set_size]
+test_quantities = energies[training_set_size:]
+
+slatm_calculator = SLATMCalculator(all_nuclear_charges)
+possible_nuclear_charges = get_sorted_elements(np.concatenate(all_nuclear_charges))
+print("Nuclear charges found:", possible_nuclear_charges)
+
+# using "shift_quantites=True" means using dressed atom approach; requires defining `possible_nuclear_charges` though.
+# NOTE: nfeatures might need to be increased if aSLATM representation is too large.
+model = SORFLocalModel(
+    shift_quantities=True,
+    possible_nuclear_charges=possible_nuclear_charges,
+    representation_function=slatm_calculator,
+    rep_kwargs={"local": True},
+    nfeatures=32768,
+    ntransforms=3,
+)
+
+model.train(training_compounds=train_compounds, training_quantities=train_quantities)
+
+print("Optimized sigma:", model.sigma)
+print("Optimized l2reg divided by average kernel element:", model.l2reg_diag_ratio)
+
+predictions = model.predict_from_compounds(test_compounds)
+print("Prediction MAE:", MAE()(predictions - test_quantities))
+print("Test set quantity STD:", np.std(test_quantities))