This repository has been archived by the owner on Apr 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Equi(Kit)Script for workflows that compute, transform, join representations and estimate properties #10
Draft
agoscinski
wants to merge
17
commits into
main
Choose a base branch
from
equikit
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Equi(Kit)Script for workflows that compute, transform, join representations and estimate properties #10
Changes from 1 commit
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
0a5dba6
first draft
agoscinski d7ee21c
Slight restructure
PicoCentauri 423c0e4
Allow floats for Ridge
PicoCentauri 1c07eda
a lot of fixes
agoscinski b59a769
hack for scoring
agoscinski 8e4e93b
tuples -> dict; hack for scoring
agoscinski ce69b11
transforming has to havppen after moving keys
agoscinski 9e172a1
adding partially working example
agoscinski e931335
added md calculator
agoscinski 97cf6aa
fix typos
agoscinski 61cd5d9
change example to something working with i-pi
agoscinski 4f04538
fixing stuff
agoscinski b9f03fb
Merge branch 'main' into equikit
PicoCentauri d8dae73
Merge branch 'main' into equikit
PicoCentauri 1a35b9b
fixing the multi_spectra_script notebook for thorben
agoscinski bccd644
updates required to do CV with thes script module
agoscinski 82aa2d5
fix for forces hist
agoscinski File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,8 +30,8 @@ class Ridge: | |
regularization strength. | ||
|
||
:param parameter_keys: Parameters to perform the regression for. | ||
Examples are ``"values"``, ``"positions"`` or | ||
``"cell"``. | ||
Examples are ``"values"``, ``"positions"``, | ||
``"cell"`` or a combination of these. | ||
""" | ||
|
||
def __init__( | ||
|
@@ -45,7 +45,7 @@ def __init__( | |
else: | ||
self.parameter_keys = parameter_keys | ||
|
||
self._coef = None | ||
self._weights = None | ||
|
||
def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None: | ||
"""Validates :class:`equistore.TensorBlock`'s for the usage in models. | ||
|
@@ -141,15 +141,15 @@ def fit( | |
|
||
:param X: training data | ||
:param y: target values | ||
:param alpha: Constant :math:`λ` that multiplies the L2 term, controlling | ||
regularization strength. Values must be a non-negative floats | ||
i.e. in [0, inf). :math:`λ` can be different for each column in ``X`` | ||
:param alpha: Constant α that multiplies the L2 term, controlling | ||
regularization strength. Values must be non-negative floats | ||
i.e. in [0, inf). α can be different for each column in ``X`` | ||
to regulerize each property differently. | ||
:param sample_weight: sample weights | ||
:param rcond: Cut-off ratio for small singular values during the fit. For | ||
the purposes of rank determination, singular values are treated as | ||
zero if they are smaller than ``rcond`` times the largest singular | ||
value in "coefficient" matrix. | ||
value in "weightsficient" matrix. | ||
""" | ||
|
||
if type(alpha) is float: | ||
|
@@ -204,19 +204,19 @@ def fit( | |
) | ||
weights_blocks.append(weights_block) | ||
|
||
# convert coefs to dictionary allowing dump of an instance in a pickle file | ||
self._coef = tensor_map_to_dict(TensorMap(X.keys, coef_blocks)) | ||
# convert weightsficients to a dictionary allowing pickle dump of an instance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. magnificent |
||
self._weights = tensor_map_to_dict(TensorMap(X.keys, weights_blocks)) | ||
|
||
return self | ||
|
||
@property | ||
def coef(self) -> TensorMap: | ||
def weights(self) -> TensorMap: | ||
"""``Tensormap`` containing the weights of the provided training data.""" | ||
|
||
if self._coef is None: | ||
if self._weights is None: | ||
raise ValueError("No weights. Call fit method first.") | ||
|
||
return dict_to_tensor_map(self._coef) | ||
return dict_to_tensor_map(self._weights) | ||
|
||
def predict(self, X: TensorMap) -> TensorMap: | ||
""" | ||
|
@@ -225,14 +225,14 @@ def predict(self, X: TensorMap) -> TensorMap: | |
:param X: samples | ||
:returns: predicted values | ||
""" | ||
return dot(X, self.coef) | ||
return dot(X, self.weights) | ||
|
||
def score(self, X: TensorMap, y: TensorMap, parameter_keys: Union[List[str], str] = None) -> float: | ||
"""Return the coefficient of determination of the prediction. | ||
def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> float: | ||
"""Return the weights of determination of the prediction. | ||
|
||
:param X: Test samples | ||
:param y: True values for `X`. | ||
:param parameter_keys: Parameter to score for. Examples are ``"values"``, | ||
:param y: True values for ``X``. | ||
:param parameter_key: Parameter to score for. Examples are ``"values"``, | ||
``"positions"`` or ``"cell"``. | ||
|
||
:returns score: :math:`RMSE` for each block in ``self.predict(X)`` with | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,9 +100,15 @@ def test_ridge(self, num_properties, num_targets): | |
clf = Ridge(parameter_keys="values") | ||
clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) | ||
|
||
<<<<<<< HEAD | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mmmmm |
||
assert len(clf.coef) == 2 | ||
assert clf.coef.block(0).values.shape[1] == num_properties | ||
assert clf.coef.block(1).values.shape[1] == num_properties | ||
======= | ||
assert len(clf.weights) == 2 | ||
assert clf.weights.block(0).values.shape[1] == num_properties | ||
assert clf.weights.block(1).values.shape[1] == num_properties | ||
>>>>>>> main | ||
|
||
def test_double_fit_call(self): | ||
"""Test if regression works properly if fit method is called multiple times. | ||
|
@@ -125,7 +131,11 @@ def test_double_fit_call(self): | |
clf.fit(X=X, y=y, alpha=alpha) | ||
clf.fit(X=X, y=y, alpha=alpha) | ||
|
||
<<<<<<< HEAD | ||
assert len(clf.coef) == num_blocks | ||
======= | ||
assert len(clf.weights) == num_blocks | ||
>>>>>>> main | ||
|
||
@pytest.mark.parametrize("num_properties", num_properties) | ||
@pytest.mark.parametrize("num_targets", num_targets) | ||
|
@@ -145,7 +155,11 @@ def test_exact_no_regularization(self, num_properties, num_targets, mean): | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
<<<<<<< HEAD | ||
w_solver = ridge_class.coef.block().values[0, :] | ||
======= | ||
w_solver = ridge_class.weights.block().values[0, :] | ||
>>>>>>> main | ||
|
||
# Check that the two approaches yield the same result | ||
assert_allclose(w_solver, w_exact, atol=1e-13, rtol=1e-10) | ||
|
@@ -177,7 +191,11 @@ def test_exact(self, num_properties, num_targets, mean, regularization): | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
<<<<<<< HEAD | ||
w_solver = ridge_class.coef.block().values[0, :] | ||
======= | ||
w_solver = ridge_class.weights.block().values[0, :] | ||
>>>>>>> main | ||
w_exact_with_regularization = self.numpy_solver(X, y, sample_w, property_w) | ||
|
||
# Check that the two approaches yield the same result | ||
|
@@ -200,7 +218,11 @@ def test_predict(self, num_properties, num_targets, mean): | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
<<<<<<< HEAD | ||
w_solver = ridge_class.coef.block().values[0, :] | ||
======= | ||
w_solver = ridge_class.weights.block().values[0, :] | ||
>>>>>>> main | ||
|
||
# Generate new data | ||
X_validation = self.rng.normal(mean, 1, size=(50, num_properties)) | ||
|
@@ -238,7 +260,11 @@ def test_infinite_regularization( | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
<<<<<<< HEAD | ||
w_solver = ridge_class.coef.block().values[0, :] | ||
======= | ||
w_solver = ridge_class.weights.block().values[0, :] | ||
>>>>>>> main | ||
w_zeros = np.zeros((num_properties,)) | ||
|
||
# Check that the two approaches yield the same result | ||
|
@@ -265,11 +291,11 @@ def test_consistent_weights_scaling( | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
w_ref = ridge_class.coef.block().values[0, :] | ||
w_ref = ridge_class.weights.block().values[0, :] | ||
ridge_class_scaled = self.equisolve_solver_from_numpy_arrays( | ||
X, y, scaling * property_w, scaling * sample_w | ||
) | ||
w_scaled = ridge_class_scaled.coef.block().values[0, :] | ||
w_scaled = ridge_class_scaled.weights.block().values[0, :] | ||
|
||
# Check that the two approaches yield the same result | ||
assert_allclose(w_scaled, w_ref, atol=1e-15, rtol=1e-8) | ||
|
@@ -295,11 +321,11 @@ def test_consistent_target_scaling( | |
ridge_class = self.equisolve_solver_from_numpy_arrays( | ||
X, y, property_w, sample_w | ||
) | ||
w_ref = ridge_class.coef.block().values[0, :] | ||
w_ref = ridge_class.weights.block().values[0, :] | ||
ridge_class_scaled = self.equisolve_solver_from_numpy_arrays( | ||
scaling * X, scaling * y, property_w, scaling * sample_w | ||
) | ||
w_scaled = ridge_class_scaled.coef.block().values[0, :] | ||
w_scaled = ridge_class_scaled.weights.block().values[0, :] | ||
|
||
# Check that the two approaches yield the same result | ||
assert_allclose(w_scaled, w_ref, atol=1e-11, rtol=1e-8) | ||
|
@@ -324,13 +350,35 @@ def test_sample_weights(self): | |
# TODO | ||
pass | ||
|
||
def test_alpha_flota(self): | ||
"""Test that Ridge takes also a float.""" | ||
pass | ||
def test_alpha_float(self): | ||
"""Test float alpha""" | ||
X_arr = self.rng.random([1, 10, 10]) | ||
y_arr = self.rng.random([1, 10, 1]) | ||
alpha_arr = 2 * np.ones([1, 1, 10]) | ||
|
||
X = tensor_to_tensormap(X_arr) | ||
y = tensor_to_tensormap(y_arr) | ||
alpha = tensor_to_tensormap(alpha_arr) | ||
|
||
clf = Ridge(parameter_keys="values") | ||
|
||
weights_arr = clf.fit(X=X, y=y, alpha=alpha).weights | ||
weights_float = clf.fit(X=X, y=y, alpha=2.0).weights | ||
|
||
assert_equal(weights_float.block().values, weights_arr.block().values) | ||
|
||
def test_alpha_wrong_type(self): | ||
"""Test error raise if alpha is neither a float nor a TensorMap.""" | ||
pass | ||
X_arr = self.rng.random([1, 10, 10]) | ||
y_arr = self.rng.random([1, 10, 1]) | ||
|
||
X = tensor_to_tensormap(X_arr) | ||
y = tensor_to_tensormap(y_arr) | ||
|
||
clf = Ridge(parameter_keys="values") | ||
|
||
with pytest.raises(ValueError, match="alpha must either be a float or"): | ||
clf.fit(X=X, y=y, alpha="foo") | ||
|
||
@pytest.mark.parametrize( | ||
"parameter_keys", [("values"), ("values", "positions"), ("positions")] | ||
|
@@ -393,7 +441,11 @@ def test_parameter_keys(self, parameter_keys): | |
clf.fit(X=X, y=y, alpha=alpha) | ||
|
||
assert_allclose( | ||
<<<<<<< HEAD | ||
clf.coef.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8 | ||
======= | ||
clf.weights.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8 | ||
>>>>>>> main | ||
) | ||
|
||
# Test prediction | ||
|
You are viewing a condensed version of this merge commit. You can view the full changes here.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i see now find->coef; replace->weights