Face/utils/utils_amp.py

from typing import Dict, List

import torch
from torch._six import container_abcs
from torch.cuda.amp import GradScaler


class _MultiDeviceReplicator(object):
    """
    Lazily serves copies of a tensor to requested devices.  Copies are cached per-device.
    """

    def __init__(self, master_tensor: torch.Tensor) -> None:
        assert master_tensor.is_cuda
        self.master = master_tensor
        self._per_device_tensors: Dict[torch.device, torch.Tensor] = {}

    def get(self, device) -> torch.Tensor:
        retval = self._per_device_tensors.get(device, None)
        if retval is None:
            retval = self.master.to(device=device, non_blocking=True, copy=True)
            self._per_device_tensors[device] = retval
        return retval


class MaxClipGradScaler(GradScaler):
    def __init__(self, init_scale, max_scale: float, growth_interval=100):
        GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval)
        self.max_scale = max_scale

    def scale_clip(self):
        if self.get_scale() == self.max_scale:
            self.set_growth_factor(1)
        elif self.get_scale() < self.max_scale:
            self.set_growth_factor(2)
        elif self.get_scale() > self.max_scale:
            self._scale.fill_(self.max_scale)
            self.set_growth_factor(1)

    def scale(self, outputs):
        """
        Multiplies ('scales') a tensor or list of tensors by the scale factor.

        Returns scaled outputs.  If this instance of :class:`GradScaler` is not enabled, outputs are returned
        unmodified.

        Arguments:
            outputs (Tensor or iterable of Tensors):  Outputs to scale.
        """
        if not self._enabled:
            return outputs
        self.scale_clip()
        # Short-circuit for the common case.
        if isinstance(outputs, torch.Tensor):
            assert outputs.is_cuda
            if self._scale is None:
                self._lazy_init_scale_growth_tracker(outputs.device)
            assert self._scale is not None
            return outputs * self._scale.to(device=outputs.device, non_blocking=True)

        # Invoke the more complex machinery only if we're treating multiple outputs.
        stash: List[_MultiDeviceReplicator] = []  # holds a reference that can be overwritten by apply_scale

        def apply_scale(val):
            if isinstance(val, torch.Tensor):
                assert val.is_cuda
                if len(stash) == 0:
                    if self._scale is None:
                        self._lazy_init_scale_growth_tracker(val.device)
                    assert self._scale is not None
                    stash.append(_MultiDeviceReplicator(self._scale))
                return val * stash[0].get(val.device)
            elif isinstance(val, container_abcs.Iterable):
                iterable = map(apply_scale, val)
                if isinstance(val, list) or isinstance(val, tuple):
                    return type(val)(iterable)
                else:
                    return iterable
            else:
                raise ValueError("outputs must be a Tensor or an iterable of Tensors")
        return apply_scale(outputs)
first 2024-07-29 11:24:25 +08:00			`from typing import Dict, List`

			`import torch`
			`from torch._six import container_abcs`
			`from torch.cuda.amp import GradScaler`


			`class _MultiDeviceReplicator(object):`
			`"""`
			`Lazily serves copies of a tensor to requested devices. Copies are cached per-device.`
			`"""`

			`def __init__(self, master_tensor: torch.Tensor) -> None:`
			`assert master_tensor.is_cuda`
			`self.master = master_tensor`
			`self._per_device_tensors: Dict[torch.device, torch.Tensor] = {}`

			`def get(self, device) -> torch.Tensor:`
			`retval = self._per_device_tensors.get(device, None)`
			`if retval is None:`
			`retval = self.master.to(device=device, non_blocking=True, copy=True)`
			`self._per_device_tensors[device] = retval`
			`return retval`


			`class MaxClipGradScaler(GradScaler):`
			`def __init__(self, init_scale, max_scale: float, growth_interval=100):`
			`GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval)`
			`self.max_scale = max_scale`

			`def scale_clip(self):`
			`if self.get_scale() == self.max_scale:`
			`self.set_growth_factor(1)`
			`elif self.get_scale() < self.max_scale:`
			`self.set_growth_factor(2)`
			`elif self.get_scale() > self.max_scale:`
			`self._scale.fill_(self.max_scale)`
			`self.set_growth_factor(1)`

			`def scale(self, outputs):`
			`"""`
			`Multiplies ('scales') a tensor or list of tensors by the scale factor.`

			Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned
			`unmodified.`

			`Arguments:`
			`outputs (Tensor or iterable of Tensors): Outputs to scale.`
			`"""`
			`if not self._enabled:`
			`return outputs`
			`self.scale_clip()`
			`# Short-circuit for the common case.`
			`if isinstance(outputs, torch.Tensor):`
			`assert outputs.is_cuda`
			`if self._scale is None:`
			`self._lazy_init_scale_growth_tracker(outputs.device)`
			`assert self._scale is not None`
			`return outputs * self._scale.to(device=outputs.device, non_blocking=True)`

			`# Invoke the more complex machinery only if we're treating multiple outputs.`
			`stash: List[_MultiDeviceReplicator] = [] # holds a reference that can be overwritten by apply_scale`

			`def apply_scale(val):`
			`if isinstance(val, torch.Tensor):`
			`assert val.is_cuda`
			`if len(stash) == 0:`
			`if self._scale is None:`
			`self._lazy_init_scale_growth_tracker(val.device)`
			`assert self._scale is not None`
			`stash.append(_MultiDeviceReplicator(self._scale))`
			`return val * stash[0].get(val.device)`
			`elif isinstance(val, container_abcs.Iterable):`
			`iterable = map(apply_scale, val)`
			`if isinstance(val, list) or isinstance(val, tuple):`
			`return type(val)(iterable)`
			`else:`
			`return iterable`
			`else:`
			`raise ValueError("outputs must be a Tensor or an iterable of Tensors")`
			`return apply_scale(outputs)`