移动网站排名怎么做,网站托管目的是什么,网站如何排名,设计本官方网站下载跟踪一个Pytorch Module在训练过程中的内存分配情况 代码输出 目的:跟踪一个Pytorch Module在训练过程中的内存分配情况 方法: 1.通过pre_hook module的来区分module的边界 2.通过__torch_dispatch__拦截所有的aten算子,计算在该算子中新创建tensor的总内存占用量 3.通过tensor… 跟踪一个Pytorch Module在训练过程中的内存分配情况 代码输出 目的:跟踪一个Pytorch Module在训练过程中的内存分配情况 方法: 1.通过pre_hook module的来区分module的边界 2.通过__torch_dispatch__拦截所有的aten算子,计算在该算子中新创建tensor的总内存占用量 3.通过tensor.data_ptr()为tensor去重,表示一块独立的内存 代码 import numpy as np
import torch
from torch.nn import Module, Linear
import torch.nn as nn
from torch.optim import Adam,SGD
from torch.utils._python_dispatch import TorchDispatchMode
from dataclasses import dataclass
from typing import Any
import timedataclass
class _ProfilerState:cls: Anyobject: Any Nonecurrent_moduleNone
tesor_cacheset()def get_current_mem():global current_moduleprint(f[INFO]{current_module[name]}:{np.sum(current_module[size])})current_moduleNoneclass InputDescriptor:def __init__(self) - None:self.total_input_size0def _save_var(self,v):class_namev.__class__.__name__if class_name in [Tensor,Parameter]:global tesor_cachetensoridv.data_ptr()if v.device.type!cuda:return if tensorid not in tesor_cache:tesor_cache.add(tensorid)szv.numel()*v.element_size()print(v.shape,v.dtype)self.total_input_size szif class_nameParameter and v.grad is not None: tensoridv.grad.data_ptr()if tensorid not in tesor_cache:tesor_cache.add(tensorid)szv.grad.numel()*v.grad.element_size()print(grad,v.grad.shape,v.grad.dtype)self.total_input_size szelif class_name in [list,tuple]:for t in v:self._save_var(t)else:passdef save_vars(self,ret,*args,**kwargs):for arg in args:self._save_var(arg) for k,v in kwargs.items():self._save_var(v)self._save_var(ret)global current_module if current_module is None:current_module{name:Other,size:[]}current_module[size].append(self.total_input_size)# 对象和类名缓存
object_cache {}
class_name_count {}def get_unique_name(class_name, obj_id):# 生成唯一的对象名称if class_name not in class_name_count:class_name_count[class_name] 0uid f{class_name}_{obj_id}if uid not in object_cache:class_name_count[class_name] 1object_cache[uid] {idx: class_name_count[class_name]}return f{class_name}-{object_cache[uid][idx]}def initialize_module_attributes(module):# 初始化模块属性if not hasattr(module, uuid):module.uuid get_unique_name(module.__class__.__name__, id(module))if not hasattr(module, backward_mem):module.backward_mem []if not hasattr(module, forward_mem):module.forward_mem []def pre_backward_hook(module, grad_input):# 反向传播前的钩子函数initialize_module_attributes(module)global current_moduleif current_module is not None and np.sum(current_module[size])0:print(f[INFO]{current_module[name]}:{np.sum(current_module[size])})module.backward_mem.clear()current_module{name:fbackward-{module.uuid},size:module.backward_mem}def post_backward_hook(module, grad_input, grad_output):# 反向传播后的钩子函数initialize_module_attributes(module)def pre_forward_hook(module, input):# 前向传播前的钩子函数initialize_module_attributes(module)global current_moduleif current_module is not None and np.sum(current_module[size])0:print(f[INFO]{current_module[name]}:{np.sum(current_module[size])})module.forward_mem.clear()current_module{name:fforward-{module.uuid},size:module.forward_mem}def post_forward_hook(module, input, output):# 前向传播后的钩子函数initialize_module_attributes(module)def register_forward_hooks(module):# 注册反向传播钩子module.register_forward_pre_hook(pre_forward_hook)module.register_forward_hook(post_forward_hook)def register_backward_hooks(module):# 注册反向传播钩子module.register_full_backward_pre_hook(pre_backward_hook)module.register_full_backward_hook(post_backward_hook)class HookModel(object):def __init__(self, model):output_dict {}self.get_submodule_recrusicve(model, , output_dict)for name, module in output_dict.items():if name.endswith(Sequential):continueregister_forward_hooks(module)register_backward_hooks(module)def get_submodule_recrusicve(self,module, prefix, output_dict):prefix prefix / type(module).__name__output_dict[prefix] modulefor name, submodule in module.named_children():self.get_submodule_recrusicve(submodule, f{prefix}[{name}], output_dict)class TorchDumpDispatchMode(TorchDispatchMode):def __init__(self,parent):super().__init__()self.parentparentdef __torch_dispatch__(self, func, types, args(), kwargsNone):if kwargs is None:kwargs {} ret func(*args, **kwargs)descInputDescriptor()desc.save_vars(ret,*args,**kwargs)if desc.total_input_size0:print(f{func.__name__}:{desc.total_input_size})return retclass TorchDebugDumper:_CURRENT_Dumper Nonedef __init__(self):self.p _ProfilerState(TorchDumpDispatchMode)def __enter__(self):assert TorchDebugDumper._CURRENT_Dumper is NoneTorchDebugDumper._CURRENT_Dumper selfif self.p.object is None:o self.p.cls(self)o.__enter__()self.p.object oelse:self.p.object.step()return selfdef __exit__(self, exc_type, exc_val, exc_tb):TorchDebugDumper._CURRENT_Dumper Noneif self.p.object is not None:self.p.object.__exit__(exc_type, exc_val, exc_tb)del self.p.objectclass FeedForward(Module):def __init__(self,hidden_size,ffn_size):super().__init__()self.fc nn.Sequential(Linear(in_featureshidden_size, out_featuresffn_size,biasFalse),nn.ReLU(),Linear(in_featuresffn_size, out_featuresffn_size*2,biasFalse),nn.Dropout(0.5),Linear(in_featuresffn_size*2, out_featureshidden_size,biasFalse),)self.norm nn.LayerNorm(normalized_shapehidden_size, elementwise_affineFalse)def forward(self, x):return x self.fc(self.norm(x))def main():modelFeedForward(100,128)modelmodel.float().cuda()model.train()objHookModel(model)global current_modulewith TorchDebugDumper():optAdam(model.parameters(),lr0.001)inputtorch.randn(1,100).float().cuda()outputmodel(input)get_current_mem()loss-torch.log(output.sum())opt.zero_grad()loss.backward()get_current_mem()current_moduleNoneopt.step() get_current_mem()num_model_params sum(p.numel() for p in model.parameters())print(f[INFO]Number of model parameters: {num_model_params})
main()输出
torch.Size([1, 100]) torch.float32
_to_copy.default:400
[INFO]Other:400
torch.Size([1, 100]) torch.float32
torch.Size([1, 1]) torch.float32
torch.Size([1, 1]) torch.float32
native_layer_norm.default:408
[INFO]forward-LayerNorm-1:408
torch.Size([128, 100]) torch.float32
t.default:51200
[INFO]forward-Linear-1:51200
torch.Size([256, 128]) torch.float32
t.default:131072
torch.Size([1, 256]) torch.float32
mm.default:1024
[INFO]forward-Linear-2:132096
torch.Size([1, 256]) torch.float32
native_dropout.default:1024
[INFO]forward-Dropout-1:1024
torch.Size([100, 256]) torch.float32
t.default:102400
torch.Size([1, 100]) torch.float32
add.Tensor:400
[INFO]forward-Linear-3:102800
torch.Size([]) torch.float32
log.default:4
torch.Size([]) torch.float32
neg.default:4
torch.Size([]) torch.float32
neg.default:4
torch.Size([]) torch.float32
div.Tensor:4
[INFO]Other:16
torch.Size([100, 256]) torch.float32
mm.default:102400
torch.Size([1, 256]) torch.float32
mm.default:1024
[INFO]backward-Linear-3:103424
torch.Size([128, 100]) torch.float32
mm.default:51200
[INFO]backward-Linear-1:51200
torch.Size([128, 100]) torch.float32
zeros_like.default:51200
torch.Size([128, 100]) torch.float32
zeros_like.default:51200
torch.Size([256, 128]) torch.float32
zeros_like.default:131072
torch.Size([256, 128]) torch.float32
zeros_like.default:131072
torch.Size([100, 256]) torch.float32
zeros_like.default:102400
torch.Size([100, 256]) torch.float32
zeros_like.default:102400
torch.Size([128, 100]) torch.float32
torch.Size([256, 128]) torch.float32
torch.Size([100, 256]) torch.float32
_foreach_sqrt.default:284672
[INFO]Other:854016
[INFO]Number of model parameters: 71168