Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Equi(Kit)Script for workflows that compute, transform, join representations and estimate properties #10

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions src/equisolve/numpy/models/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class Ridge:
regularization strength.

:param parameter_keys: Parameters to perform the regression for.
Examples are ``"values"``, ``"positions"`` or
``"cell"``.
Examples are ``"values"``, ``"positions"``,
``"cell"`` or a combination of these.
"""

def __init__(
Expand All @@ -45,7 +45,7 @@ def __init__(
else:
self.parameter_keys = parameter_keys

self._coef = None
self._weights = None

def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None:
"""Validates :class:`equistore.TensorBlock`'s for the usage in models.
Expand Down Expand Up @@ -141,15 +141,15 @@ def fit(

:param X: training data
:param y: target values
:param alpha: Constant :math:`λ` that multiplies the L2 term, controlling
regularization strength. Values must be a non-negative floats
i.e. in [0, inf). :math:`λ` can be different for each column in ``X``
:param alpha: Constant α that multiplies the L2 term, controlling
regularization strength. Values must be non-negative floats
i.e. in [0, inf). α can be different for each column in ``X``
to regulerize each property differently.
:param sample_weight: sample weights
:param rcond: Cut-off ratio for small singular values during the fit. For
the purposes of rank determination, singular values are treated as
zero if they are smaller than ``rcond`` times the largest singular
value in "coefficient" matrix.
value in "weightsficient" matrix.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see now find->coef; replace->weights

"""

if type(alpha) is float:
Expand Down Expand Up @@ -204,19 +204,19 @@ def fit(
)
weights_blocks.append(weights_block)

# convert coefs to dictionary allowing dump of an instance in a pickle file
self._coef = tensor_map_to_dict(TensorMap(X.keys, coef_blocks))
# convert weightsficients to a dictionary allowing pickle dump of an instance
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

magnificent

self._weights = tensor_map_to_dict(TensorMap(X.keys, weights_blocks))

return self

@property
def coef(self) -> TensorMap:
def weights(self) -> TensorMap:
"""``Tensormap`` containing the weights of the provided training data."""

if self._coef is None:
if self._weights is None:
raise ValueError("No weights. Call fit method first.")

return dict_to_tensor_map(self._coef)
return dict_to_tensor_map(self._weights)

def predict(self, X: TensorMap) -> TensorMap:
"""
Expand All @@ -225,14 +225,14 @@ def predict(self, X: TensorMap) -> TensorMap:
:param X: samples
:returns: predicted values
"""
return dot(X, self.coef)
return dot(X, self.weights)

def score(self, X: TensorMap, y: TensorMap, parameter_keys: Union[List[str], str] = None) -> float:
"""Return the coefficient of determination of the prediction.
def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> float:
"""Return the weights of determination of the prediction.

:param X: Test samples
:param y: True values for `X`.
:param parameter_keys: Parameter to score for. Examples are ``"values"``,
:param y: True values for ``X``.
:param parameter_key: Parameter to score for. Examples are ``"values"``,
``"positions"`` or ``"cell"``.

:returns score: :math:`RMSE` for each block in ``self.predict(X)`` with
Expand Down
68 changes: 60 additions & 8 deletions tests/numpy/models/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,15 @@ def test_ridge(self, num_properties, num_targets):
clf = Ridge(parameter_keys="values")
clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw)

<<<<<<< HEAD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmmmm

assert len(clf.coef) == 2
assert clf.coef.block(0).values.shape[1] == num_properties
assert clf.coef.block(1).values.shape[1] == num_properties
=======
assert len(clf.weights) == 2
assert clf.weights.block(0).values.shape[1] == num_properties
assert clf.weights.block(1).values.shape[1] == num_properties
>>>>>>> main

def test_double_fit_call(self):
"""Test if regression works properly if fit method is called multiple times.
Expand All @@ -125,7 +131,11 @@ def test_double_fit_call(self):
clf.fit(X=X, y=y, alpha=alpha)
clf.fit(X=X, y=y, alpha=alpha)

<<<<<<< HEAD
assert len(clf.coef) == num_blocks
=======
assert len(clf.weights) == num_blocks
>>>>>>> main

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
Expand All @@ -145,7 +155,11 @@ def test_exact_no_regularization(self, num_properties, num_targets, mean):
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
<<<<<<< HEAD
w_solver = ridge_class.coef.block().values[0, :]
=======
w_solver = ridge_class.weights.block().values[0, :]
>>>>>>> main

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_exact, atol=1e-13, rtol=1e-10)
Expand Down Expand Up @@ -177,7 +191,11 @@ def test_exact(self, num_properties, num_targets, mean, regularization):
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
<<<<<<< HEAD
w_solver = ridge_class.coef.block().values[0, :]
=======
w_solver = ridge_class.weights.block().values[0, :]
>>>>>>> main
w_exact_with_regularization = self.numpy_solver(X, y, sample_w, property_w)

# Check that the two approaches yield the same result
Expand All @@ -200,7 +218,11 @@ def test_predict(self, num_properties, num_targets, mean):
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
<<<<<<< HEAD
w_solver = ridge_class.coef.block().values[0, :]
=======
w_solver = ridge_class.weights.block().values[0, :]
>>>>>>> main

# Generate new data
X_validation = self.rng.normal(mean, 1, size=(50, num_properties))
Expand Down Expand Up @@ -238,7 +260,11 @@ def test_infinite_regularization(
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
<<<<<<< HEAD
w_solver = ridge_class.coef.block().values[0, :]
=======
w_solver = ridge_class.weights.block().values[0, :]
>>>>>>> main
w_zeros = np.zeros((num_properties,))

# Check that the two approaches yield the same result
Expand All @@ -265,11 +291,11 @@ def test_consistent_weights_scaling(
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
w_ref = ridge_class.coef.block().values[0, :]
w_ref = ridge_class.weights.block().values[0, :]
ridge_class_scaled = self.equisolve_solver_from_numpy_arrays(
X, y, scaling * property_w, scaling * sample_w
)
w_scaled = ridge_class_scaled.coef.block().values[0, :]
w_scaled = ridge_class_scaled.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_scaled, w_ref, atol=1e-15, rtol=1e-8)
Expand All @@ -295,11 +321,11 @@ def test_consistent_target_scaling(
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
w_ref = ridge_class.coef.block().values[0, :]
w_ref = ridge_class.weights.block().values[0, :]
ridge_class_scaled = self.equisolve_solver_from_numpy_arrays(
scaling * X, scaling * y, property_w, scaling * sample_w
)
w_scaled = ridge_class_scaled.coef.block().values[0, :]
w_scaled = ridge_class_scaled.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_scaled, w_ref, atol=1e-11, rtol=1e-8)
Expand All @@ -324,13 +350,35 @@ def test_sample_weights(self):
# TODO
pass

def test_alpha_flota(self):
"""Test that Ridge takes also a float."""
pass
def test_alpha_float(self):
"""Test float alpha"""
X_arr = self.rng.random([1, 10, 10])
y_arr = self.rng.random([1, 10, 1])
alpha_arr = 2 * np.ones([1, 1, 10])

X = tensor_to_tensormap(X_arr)
y = tensor_to_tensormap(y_arr)
alpha = tensor_to_tensormap(alpha_arr)

clf = Ridge(parameter_keys="values")

weights_arr = clf.fit(X=X, y=y, alpha=alpha).weights
weights_float = clf.fit(X=X, y=y, alpha=2.0).weights

assert_equal(weights_float.block().values, weights_arr.block().values)

def test_alpha_wrong_type(self):
"""Test error raise if alpha is neither a float nor a TensorMap."""
pass
X_arr = self.rng.random([1, 10, 10])
y_arr = self.rng.random([1, 10, 1])

X = tensor_to_tensormap(X_arr)
y = tensor_to_tensormap(y_arr)

clf = Ridge(parameter_keys="values")

with pytest.raises(ValueError, match="alpha must either be a float or"):
clf.fit(X=X, y=y, alpha="foo")

@pytest.mark.parametrize(
"parameter_keys", [("values"), ("values", "positions"), ("positions")]
Expand Down Expand Up @@ -393,7 +441,11 @@ def test_parameter_keys(self, parameter_keys):
clf.fit(X=X, y=y, alpha=alpha)

assert_allclose(
<<<<<<< HEAD
clf.coef.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8
=======
clf.weights.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8
>>>>>>> main
)

# Test prediction
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.