Memory-efficient one_hot implementation (#7005)

rusty1s · web-flow · commit 5f4a21c96e91 · 2023-03-22T12:14:59.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added a memory-efficient `utils.one_hot` implementation ([#7005](https://github.com/pyg-team/pytorch_geometric/pull/7005))
 - Added `HeteroDictLinear` and an optimized `FastHGTConv` module ([#6178](https://github.com/pyg-team/pytorch_geometric/pull/6178), [#6998](https://github.com/pyg-team/pytorch_geometric/pull/6998))
 - Added the `DenseGATConv` module ([#6928](https://github.com/pyg-team/pytorch_geometric/pull/6928))
 - Added `trim_to_layer` utility function for more efficient `NeighborLoader` use-cases ([#6661](https://github.com/pyg-team/pytorch_geometric/pull/6661))
diff --git a/test/nn/conv/test_wl_conv.py b/test/nn/conv/test_wl_conv.py
@@ -1,13 +1,13 @@
 import torch
-import torch.nn.functional as F
 from torch_sparse import SparseTensor
 
 from torch_geometric.nn import WLConv
+from torch_geometric.utils import one_hot
 
 
 def test_wl_conv():
     x1 = torch.tensor([1, 0, 0, 1])
-    x2 = F.one_hot(x1).to(torch.float)
+    x2 = one_hot(x1)
     edge_index = torch.tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]])
     adj1 = SparseTensor.from_edge_index(edge_index)
     adj2 = adj1.to_torch_sparse_csc_tensor()
diff --git a/test/utils/test_one_hot.py b/test/utils/test_one_hot.py
@@ -0,0 +1,17 @@
+import torch
+
+from torch_geometric.utils import one_hot
+
+
+def test_one_hot():
+    index = torch.tensor([0, 1, 2])
+
+    out = one_hot(index)
+    assert out.size() == (3, 3)
+    assert out.dtype == torch.float
+    assert out.tolist() == [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
+
+    out = one_hot(index, num_classes=4, dtype=torch.long)
+    assert out.size() == (3, 4)
+    assert out.dtype == torch.long
+    assert out.tolist() == [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]]
diff --git a/torch_geometric/datasets/ged_dataset.py b/torch_geometric/datasets/ged_dataset.py
@@ -5,7 +5,6 @@
 from typing import Callable, List, Optional
 
 import torch
-import torch.nn.functional as F
 
 from torch_geometric.data import (
     Data,
@@ -14,7 +13,7 @@
     extract_tar,
     extract_zip,
 )
-from torch_geometric.utils import to_undirected
+from torch_geometric.utils import one_hot, to_undirected
 
 
 class GEDDataset(InMemoryDataset):
@@ -201,8 +200,7 @@ def process(self):
                     x = torch.zeros(data.num_nodes, dtype=torch.long)
                     for node, info in G.nodes(data=True):
                         x[int(node)] = self.types.index(info['type'])
-                    data.x = F.one_hot(x, num_classes=len(self.types)).to(
-                        torch.float)
+                    data.x = one_hot(x, num_classes=len(self.types))
 
                 if self.pre_filter is not None and not self.pre_filter(data):
                     continue
diff --git a/torch_geometric/datasets/linkx_dataset.py b/torch_geometric/datasets/linkx_dataset.py
@@ -3,9 +3,9 @@
 
 import numpy as np
 import torch
-import torch.nn.functional as F
 
 from torch_geometric.data import Data, InMemoryDataset, download_url
+from torch_geometric.utils import one_hot
 
 
 class LINKXDataset(InMemoryDataset):
@@ -132,7 +132,7 @@ def _process_facebook(self):
         x = torch.cat([metadata[:, :1], metadata[:, 2:]], dim=-1)
         for i in range(x.size(1)):
             _, out = x[:, i].unique(return_inverse=True)
-            xs.append(F.one_hot(out).to(torch.float))
+            xs.append(one_hot(out))
         x = torch.cat(xs, dim=-1)
 
         data = Data(x=x, edge_index=edge_index, y=y)
diff --git a/torch_geometric/datasets/qm9.py b/torch_geometric/datasets/qm9.py
@@ -4,7 +4,6 @@
 from typing import Callable, List, Optional
 
 import torch
-import torch.nn.functional as F
 from tqdm import tqdm
 
 from torch_geometric.data import (
@@ -13,7 +12,7 @@
     download_url,
     extract_zip,
 )
-from torch_geometric.utils import scatter
+from torch_geometric.utils import one_hot, scatter
 
 HAR2EV = 27.211386246
 KCALMOL2EV = 0.04336414
@@ -271,8 +270,7 @@ def process(self):
 
             edge_index = torch.tensor([row, col], dtype=torch.long)
             edge_type = torch.tensor(edge_type, dtype=torch.long)
-            edge_attr = F.one_hot(edge_type,
-                                  num_classes=len(bonds)).to(torch.float)
+            edge_attr = one_hot(edge_type, num_classes=len(bonds))
 
             perm = (edge_index[0] * N + edge_index[1]).argsort()
             edge_index = edge_index[:, perm]
@@ -283,10 +281,10 @@ def process(self):
             hs = (z == 1).to(torch.float)
             num_hs = scatter(hs[row], col, dim_size=N, reduce='sum').tolist()
 
-            x1 = F.one_hot(torch.tensor(type_idx), num_classes=len(types))
+            x1 = one_hot(torch.tensor(type_idx), num_classes=len(types))
             x2 = torch.tensor([atomic_number, aromatic, sp, sp2, sp3, num_hs],
                               dtype=torch.float).t().contiguous()
-            x = torch.cat([x1.to(torch.float), x2], dim=-1)
+            x = torch.cat([x1, x2], dim=-1)
 
             y = target[i].unsqueeze(0)
             name = mol.GetProp('_Name')
diff --git a/torch_geometric/io/sdf.py b/torch_geometric/io/sdf.py
@@ -1,9 +1,8 @@
 import torch
-import torch.nn.functional as F
 
 from torch_geometric.data import Data
 from torch_geometric.io import parse_txt_array
-from torch_geometric.utils import coalesce
+from torch_geometric.utils import coalesce, one_hot
 
 elems = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4}
 
@@ -15,7 +14,7 @@ def parse_sdf(src):
     atom_block = src[1:num_atoms + 1]
     pos = parse_txt_array(atom_block, end=3)
     x = torch.tensor([elems[item.split()[3]] for item in atom_block])
-    x = F.one_hot(x, num_classes=len(elems))
+    x = one_hot(x, num_classes=len(elems))
 
     bond_block = src[1 + num_atoms:1 + num_atoms + num_bonds]
     row, col = parse_txt_array(bond_block, end=2, dtype=torch.long).t() - 1
diff --git a/torch_geometric/io/tu.py b/torch_geometric/io/tu.py
@@ -4,11 +4,10 @@
 
 import numpy as np
 import torch
-import torch.nn.functional as F
 
 from torch_geometric.data import Data
 from torch_geometric.io import read_txt_array
-from torch_geometric.utils import coalesce, remove_self_loops
+from torch_geometric.utils import coalesce, one_hot, remove_self_loops
 
 names = [
     'A', 'graph_indicator', 'node_labels', 'node_attributes'
@@ -36,8 +35,11 @@ def read_tu_data(folder, prefix):
             node_labels = node_labels.unsqueeze(-1)
         node_labels = node_labels - node_labels.min(dim=0)[0]
         node_labels = node_labels.unbind(dim=-1)
-        node_labels = [F.one_hot(x, num_classes=-1) for x in node_labels]
-        node_labels = torch.cat(node_labels, dim=-1).to(torch.float)
+        node_labels = [one_hot(x) for x in node_labels]
+        if len(node_labels) == 1:
+            node_labels = node_labels[0]
+        else:
+            node_labels = torch.cat(node_labels, dim=-1)
 
     edge_attributes = torch.empty((edge_index.size(1), 0))
     if 'edge_attributes' in names:
@@ -52,8 +54,11 @@ def read_tu_data(folder, prefix):
             edge_labels = edge_labels.unsqueeze(-1)
         edge_labels = edge_labels - edge_labels.min(dim=0)[0]
         edge_labels = edge_labels.unbind(dim=-1)
-        edge_labels = [F.one_hot(e, num_classes=-1) for e in edge_labels]
-        edge_labels = torch.cat(edge_labels, dim=-1).to(torch.float)
+        edge_labels = [one_hot(e) for e in edge_labels]
+        if len(edge_labels) == 1:
+            edge_labels = edge_labels[0]
+        else:
+            edge_labels = torch.cat(edge_labels, dim=-1)
 
     x = cat([node_attributes, node_labels])
     edge_attr = cat([edge_attributes, edge_labels])
diff --git a/torch_geometric/nn/conv/rgcn_conv.py b/torch_geometric/nn/conv/rgcn_conv.py
@@ -1,7 +1,6 @@
 from typing import Optional, Tuple, Union
 
 import torch
-import torch.nn.functional as F
 from torch import Tensor
 from torch.nn import Parameter
 from torch.nn import Parameter as Param
@@ -16,7 +15,7 @@
     pyg_lib,
     torch_sparse,
 )
-from torch_geometric.utils import index_sort, scatter, spmm
+from torch_geometric.utils import index_sort, one_hot, scatter, spmm
 from torch_geometric.utils.sparse import index2ptr
 
 
@@ -351,7 +350,7 @@ def aggregate(self, inputs: Tensor, edge_type: Tensor, index: Tensor,
 
         # Compute normalization in separation for each `edge_type`.
         if self.aggr == 'mean':
-            norm = F.one_hot(edge_type, self.num_relations).to(torch.float)
+            norm = one_hot(edge_type, self.num_relations, dtype=inputs.dtype)
             norm = scatter(norm, index, dim=0, dim_size=dim_size)[index]
             norm = torch.gather(norm, 1, edge_type.view(-1, 1))
             norm = 1. / norm.clamp_(1.)
diff --git a/torch_geometric/nn/models/correct_and_smooth.py b/torch_geometric/nn/models/correct_and_smooth.py
@@ -1,9 +1,9 @@
 import torch
-import torch.nn.functional as F
 from torch import Tensor
 
 from torch_geometric.nn.models import LabelPropagation
 from torch_geometric.typing import Adj, OptTensor
+from torch_geometric.utils import one_hot
 
 
 class CorrectAndSmooth(torch.nn.Module):
@@ -97,8 +97,8 @@ def correct(self, y_soft: Tensor, y_true: Tensor, mask: Tensor,
         assert y_true.size(0) == numel
 
         if y_true.dtype == torch.long and y_true.size(0) == y_true.numel():
-            y_true = F.one_hot(y_true.view(-1), y_soft.size(-1))
-            y_true = y_true.to(y_soft.dtype)
+            y_true = one_hot(y_true.view(-1), num_classes=y_soft.size(-1),
+                             dtype=y_soft.dtype)
 
         error = torch.zeros_like(y_soft)
         error[mask] = y_true - y_soft[mask]
@@ -141,8 +141,8 @@ def smooth(self, y_soft: Tensor, y_true: Tensor, mask: Tensor,
         assert y_true.size(0) == numel
 
         if y_true.dtype == torch.long and y_true.size(0) == y_true.numel():
-            y_true = F.one_hot(y_true.view(-1), y_soft.size(-1))
-            y_true = y_true.to(y_soft.dtype)
+            y_true = one_hot(y_true.view(-1), num_classes=y_soft.size(-1),
+                             dtype=y_soft.dtype)
 
         y_soft = y_soft.clone()
         y_soft[mask] = y_true
diff --git a/torch_geometric/nn/models/label_prop.py b/torch_geometric/nn/models/label_prop.py
@@ -1,13 +1,12 @@
 from typing import Callable, Optional
 
 import torch
-import torch.nn.functional as F
 from torch import Tensor
 
 from torch_geometric.nn.conv import MessagePassing
 from torch_geometric.nn.conv.gcn_conv import gcn_norm
 from torch_geometric.typing import Adj, OptTensor, SparseTensor
-from torch_geometric.utils import spmm
+from torch_geometric.utils import one_hot, spmm
 
 
 class LabelPropagation(MessagePassing):
@@ -62,7 +61,7 @@ def forward(
                 (default: :obj:`None`)
         """
         if y.dtype == torch.long and y.size(0) == y.numel():
-            y = F.one_hot(y.view(-1)).to(torch.float)
+            y = one_hot(y.view(-1))
 
         out = y
         if mask is not None:
diff --git a/torch_geometric/transforms/grid_sampling.py b/torch_geometric/transforms/grid_sampling.py
@@ -2,14 +2,13 @@
 from typing import List, Optional, Union
 
 import torch
-import torch.nn.functional as F
 from torch import Tensor
 
 import torch_geometric
 from torch_geometric.data import Data
 from torch_geometric.data.datapipes import functional_transform
 from torch_geometric.transforms import BaseTransform
-from torch_geometric.utils import scatter
+from torch_geometric.utils import one_hot, scatter
 
 
 @functional_transform('grid_sampling')
@@ -53,8 +52,7 @@ def __call__(self, data: Data) -> Data:
 
             if torch.is_tensor(item) and item.size(0) == num_nodes:
                 if key == 'y':
-                    item = F.one_hot(item)
-                    item = scatter(item, c, dim=0, reduce='sum')
+                    item = scatter(one_hot(item), c, dim=0, reduce='sum')
                     data[key] = item.argmax(dim=-1)
                 elif key == 'batch':
                     data[key] = item[perm]
diff --git a/torch_geometric/transforms/one_hot_degree.py b/torch_geometric/transforms/one_hot_degree.py
@@ -1,10 +1,9 @@
 import torch
-import torch.nn.functional as F
 
 from torch_geometric.data import Data
 from torch_geometric.data.datapipes import functional_transform
 from torch_geometric.transforms import BaseTransform
-from torch_geometric.utils import degree
+from torch_geometric.utils import degree, one_hot
 
 
 @functional_transform('one_hot_degree')
@@ -33,7 +32,7 @@ def __init__(
     def __call__(self, data: Data) -> Data:
         idx, x = data.edge_index[1 if self.in_degree else 0], data.x
         deg = degree(idx, data.num_nodes, dtype=torch.long)
-        deg = F.one_hot(deg, num_classes=self.max_degree + 1).to(torch.float)
+        deg = one_hot(deg, num_classes=self.max_degree + 1)
 
         if x is not None and self.cat:
             x = x.view(-1, 1) if x.dim() == 1 else x
diff --git a/torch_geometric/utils/__init__.py b/torch_geometric/utils/__init__.py
@@ -30,6 +30,7 @@
                      to_torch_csc_tensor, to_edge_index)
 from .spmm import spmm
 from .unbatch import unbatch, unbatch_edge_index
+from .one_hot import one_hot
 from .normalized_cut import normalized_cut
 from .grid import grid
 from .geodesic import geodesic_distance
@@ -101,6 +102,7 @@
     'spmm',
     'unbatch',
     'unbatch_edge_index',
+    'one_hot',
     'normalized_cut',
     'grid',
     'geodesic_distance',
diff --git a/torch_geometric/utils/one_hot.py b/torch_geometric/utils/one_hot.py
@@ -0,0 +1,38 @@
+from typing import Optional
+
+import torch
+from torch import Tensor
+
+
+def one_hot(
+    index: Tensor,
+    num_classes: Optional[int] = None,
+    dtype: Optional[torch.dtype] = None,
+) -> Tensor:
+    r"""Taskes a one-dimensional :obj:`index` tensor and returns a one-hot
+    encoded representation of it with shape :obj:`[*, num_classes]` that has
+    zeros everywhere except where the index of last dimension matches the
+    corresponding value of the input tensor, in which case it will be :obj:`1`.
+
+    .. note::
+        This is a more memory-efficient version of
+        :meth:`torch.nn.functional.one_hot` as you can customize the output
+        :obj:`dtype`.
+
+    Args:
+        index (torch.Tensor): The one-dimensional input tensor.
+        num_classes (int, optional): The total number of classes. If set to
+            :obj:`None`, the number of classes will be inferred as one greater
+            than the largest class value in the input tensor.
+            (default: :obj:`None`)
+        dtype (torch.dtype, optional): The :obj:`dtype` of the output tensor.
+    """
+    if index.dim() != 1:
+        raise ValueError("'index' tensor needs to be one-dimensional")
+
+    if num_classes is None:
+        num_classes = int(index.max()) + 1
+
+    out = torch.zeros((index.size(0), num_classes), dtype=dtype,
+                      device=index.device)
+    return out.scatter_(1, index.unsqueeze(1), 1)