Fixing concatenation error for fp16 ditributed training

5 years ago · df4a466af2
--- a/distributed.py
+++ b/distributed.py
@ -140,7 +140,7 @@ def apply_gradient_allreduce(module):
                buckets = {}
                for param in module.parameters():
                    if param.requires_grad and param.grad is not None:
                        tp = type(param.data)
                        tp = param.data.dtype
                        if tp not in buckets:
                            buckets[tp] = []
                        buckets[tp].append(param)