From 3015941b5c61b686161701887a8618f5f77044bb Mon Sep 17 00:00:00 2001
From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 20 Apr 2021 12:16:39 -0600
Subject: Option to keep parameters reference in `ExponentialMovingAverage`
 (#5)

---
 tests/test_ema.py | 64 ++++++++++++++++++++++++++++++++++++++----------
 torch_ema/ema.py  | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 113 insertions(+), 24 deletions(-)

diff --git a/tests/test_ema.py b/tests/test_ema.py
index 6d7e43e..ad6ee37 100644
--- a/tests/test_ema.py
+++ b/tests/test_ema.py
@@ -7,7 +7,8 @@ from torch_ema import ExponentialMovingAverage
 
 @pytest.mark.parametrize("decay", [0.995, 0.9])
 @pytest.mark.parametrize("use_num_updates", [True, False])
-def test_val_error(decay, use_num_updates):
+@pytest.mark.parametrize("explicit_params", [True, False])
+def test_val_error(decay, use_num_updates, explicit_params):
     """Confirm that EMA validation error is lower than raw validation error."""
     torch.manual_seed(0)
     x_train = torch.rand((100, 10))
@@ -30,27 +31,37 @@ def test_val_error(decay, use_num_updates):
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-        ema.update(model.parameters())
+        if explicit_params:
+            ema.update(model.parameters())
+        else:
+            ema.update()
 
     # Validation: original
     model.eval()
     logits = model(x_val)
     loss_orig = torch.nn.functional.cross_entropy(logits, y_val)
-    print(f"Original loss: {loss_orig}")
 
     # Validation: with EMA
     # First save original parameters before replacing with EMA version
-    ema.store(model.parameters())
+    if explicit_params:
+        ema.store(model.parameters())
+    else:
+        ema.store()
     # Copy EMA parameters to model
-    ema.copy_to(model.parameters())
+    if explicit_params:
+        ema.copy_to(model.parameters())
+    else:
+        ema.copy_to()
     logits = model(x_val)
     loss_ema = torch.nn.functional.cross_entropy(logits, y_val)
 
-    print(f"EMA loss: {loss_ema}")
     assert loss_ema < loss_orig, "EMA loss wasn't lower"
 
     # Test restore
-    ema.restore(model.parameters())
+    if explicit_params:
+        ema.restore(model.parameters())
+    else:
+        ema.restore()
     model.eval()
     logits = model(x_val)
     loss_orig2 = torch.nn.functional.cross_entropy(logits, y_val)
@@ -60,7 +71,8 @@ def test_val_error(decay, use_num_updates):
 
 @pytest.mark.parametrize("decay", [0.995, 0.9, 0.0, 1.0])
 @pytest.mark.parametrize("use_num_updates", [True, False])
-def test_store_restore(decay, use_num_updates):
+@pytest.mark.parametrize("explicit_params", [True, False])
+def test_store_restore(decay, use_num_updates, explicit_params):
     model = torch.nn.Linear(10, 2)
     ema = ExponentialMovingAverage(
         model.parameters(),
@@ -68,15 +80,22 @@ def test_store_restore(decay, use_num_updates):
         use_num_updates=use_num_updates
     )
     orig_weight = model.weight.clone().detach()
-    ema.store(model.parameters())
+    if explicit_params:
+        ema.store(model.parameters())
+    else:
+        ema.store()
     with torch.no_grad():
         model.weight.uniform_(0.0, 1.0)
-    ema.restore(model.parameters())
+    if explicit_params:
+        ema.restore(model.parameters())
+    else:
+        ema.restore()
     assert torch.all(model.weight == orig_weight)
 
 
 @pytest.mark.parametrize("decay", [0.995, 0.9, 0.0, 1.0])
-def test_update(decay):
+@pytest.mark.parametrize("explicit_params", [True, False])
+def test_update(decay, explicit_params):
     model = torch.nn.Linear(10, 2, bias=False)
     with torch.no_grad():
         model.weight.fill_(0.0)
@@ -87,10 +106,29 @@ def test_update(decay):
     )
     with torch.no_grad():
         model.weight.fill_(1.0)
-    ema.update(model.parameters())
+    if explicit_params:
+        ema.update(model.parameters())
+    else:
+        ema.update()
     assert torch.all(model.weight == 1.0), "ema.update changed model weights"
-    ema.copy_to(model.parameters())
+    if explicit_params:
+        ema.copy_to(model.parameters())
+    else:
+        ema.copy_to()
     assert torch.allclose(
         model.weight,
         torch.full(size=(1,), fill_value=(1.0 - decay))
     ), "average was wrong"
+
+
+def test_explicit_params():
+    model = torch.nn.Linear(10, 2)
+    with torch.no_grad():
+        model.weight.fill_(0.0)
+    ema = ExponentialMovingAverage(model.parameters(), decay=0.9)
+    model2 = torch.nn.Linear(10, 2)
+    with torch.no_grad():
+        model2.weight.fill_(1.0)
+    ema.update(model2.parameters())
+    ema.copy_to()
+    assert not torch.all(model.weight == 0.0)
\ No newline at end of file
diff --git a/torch_ema/ema.py b/torch_ema/ema.py
index 0233c78..2e8eb6f 100644
--- a/torch_ema/ema.py
+++ b/torch_ema/ema.py
@@ -1,7 +1,8 @@
 from __future__ import division
 from __future__ import unicode_literals
 
-from typing import Iterable
+from typing import Iterable, Optional
+import weakref
 
 import torch
 
@@ -13,8 +14,8 @@ class ExponentialMovingAverage:
     Maintains (exponential) moving average of a set of parameters.
 
     Args:
-        parameters: Iterable of `torch.nn.Parameter`; usually the result of
-            `model.parameters()`.
+        parameters: Iterable of `torch.nn.Parameter` (typically from
+            `model.parameters()`).
         decay: The exponential decay.
         use_num_updates: Whether to use number of updates when computing
             averages.
@@ -29,11 +30,40 @@ class ExponentialMovingAverage:
             raise ValueError('Decay must be between 0 and 1')
         self.decay = decay
         self.num_updates = 0 if use_num_updates else None
+        parameters = list(parameters)
         self.shadow_params = [p.clone().detach()
                               for p in parameters if p.requires_grad]
         self.collected_params = []
+        # By maintaining only a weakref to each parameter,
+        # we maintain the old GC behaviour of ExponentialMovingAverage:
+        # if the model goes out of scope but the ExponentialMovingAverage
+        # is kept, no references to the model or its parameters will be
+        # maintained, and the model will be cleaned up.
+        self._params_refs = [weakref.ref(p) for p in parameters]
 
-    def update(self, parameters: Iterable[torch.nn.Parameter]) -> None:
+    def _get_parameters(
+        self,
+        parameters: Optional[Iterable[torch.nn.Parameter]]
+    ) -> Iterable[torch.nn.Parameter]:
+        if parameters is None:
+            parameters = [p() for p in self._params_refs]
+            if any(p is None for p in parameters):
+                raise ValueError(
+                    "(One of) the parameters with which this "
+                    "ExponentialMovingAverage "
+                    "was initialized no longer exists (was garbage collected);"
+                    " please either provide `parameters` explicitly or keep "
+                    "the model to which they belong from being garbage "
+                    "collected."
+                )
+            return parameters
+        else:
+            return parameters
+
+    def update(
+        self,
+        parameters: Optional[Iterable[torch.nn.Parameter]] = None
+    ) -> None:
         """
         Update currently maintained parameters.
 
@@ -42,8 +72,11 @@ class ExponentialMovingAverage:
 
         Args:
           parameters: Iterable of `torch.nn.Parameter`; usually the same set of
-            parameters used to initialize this object.
+            parameters used to initialize this object. If `None`, the
+            parameters with which this `ExponentialMovingAverage` was
+            initialized will be used.
         """
+        parameters = self._get_parameters(parameters)
         decay = self.decay
         if self.num_updates is not None:
             self.num_updates += 1
@@ -60,31 +93,46 @@ class ExponentialMovingAverage:
                 tmp.mul_(one_minus_decay)
                 s_param.sub_(tmp)
 
-    def copy_to(self, parameters: Iterable[torch.nn.Parameter]) -> None:
+    def copy_to(
+        self,
+        parameters: Optional[Iterable[torch.nn.Parameter]] = None
+    ) -> None:
         """
         Copy current parameters into given collection of parameters.
 
         Args:
           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
-            updated with the stored moving averages.
+            updated with the stored moving averages. If `None`, the
+            parameters with which this `ExponentialMovingAverage` was
+            initialized will be used.
         """
+        parameters = self._get_parameters(parameters)
         for s_param, param in zip(self.shadow_params, parameters):
             if param.requires_grad:
                 param.data.copy_(s_param.data)
 
-    def store(self, parameters: Iterable[torch.nn.Parameter]) -> None:
+    def store(
+        self,
+        parameters: Optional[Iterable[torch.nn.Parameter]] = None
+    ) -> None:
         """
         Save the current parameters for restoring later.
 
         Args:
           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
-            temporarily stored.
+            temporarily stored. If `None`, the parameters of with which this
+            `ExponentialMovingAverage` was initialized will be used.
         """
+        parameters = self._get_parameters(parameters)
         self.collected_params = [param.clone()
                                  for param in parameters
                                  if param.requires_grad]
 
-    def restore(self, parameters: Iterable[torch.nn.Parameter]) -> None:
+
+    def restore(
+        self,
+        parameters: Optional[Iterable[torch.nn.Parameter]] = None
+    ) -> None:
         """
         Restore the parameters stored with the `store` method.
         Useful to validate the model with EMA parameters without affecting the
@@ -94,8 +142,11 @@ class ExponentialMovingAverage:
 
         Args:
           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
-            updated with the stored parameters.
+            updated with the stored parameters. If `None`, the
+            parameters with which this `ExponentialMovingAverage` was
+            initialized will be used.
         """
+        parameters = self._get_parameters(parameters)
         for c_param, param in zip(self.collected_params, parameters):
             if param.requires_grad:
                 param.data.copy_(c_param.data)
-- 
cgit v1.2.3