Created
November 25, 2024 23:39
-
-
Save yushangdi/cea2d213ad011ded7735f340b7c55c4e to your computer and use it in GitHub Desktop.
XLMRModel bug repro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # from torch.nn import * | |
| # class Repro(torch.nn.Module): | |
| # def __init__(self) -> None: | |
| # super().__init__() | |
| # self.true_graph_3 = GraphModule() | |
| # self.false_graph_3 = GraphModule() | |
| # self.true_graph_4 = GraphModule() | |
| # self.false_graph_4 = GraphModule() | |
| # def forward(self, encoder_sentence_encoder_layers_3_self_attn_q_proj_bias, encoder_sentence_encoder_layers_3_self_attn_k_proj_bias, encoder_sentence_encoder_layers_3_self_attn_v_proj_bias, encoder_sentence_encoder_layers_3_self_attn_out_proj_weight, encoder_sentence_encoder_layers_3_self_attn_out_proj_bias, encoder_sentence_encoder_layers_3_self_attn_q_proj_weight, encoder_sentence_encoder_layers_3_self_attn_k_proj_weight, encoder_sentence_encoder_layers_3_self_attn_v_proj_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_3_fc1_weight, encoder_sentence_encoder_layers_3_fc1_bias, encoder_sentence_encoder_layers_3_fc2_weight, encoder_sentence_encoder_layers_3_fc2_bias, encoder_sentence_encoder_layers_3_final_layer_norm_weight, encoder_sentence_encoder_layers_3_final_layer_norm_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_bias, encoder_sentence_encoder_layers_4_self_attn_k_proj_bias, encoder_sentence_encoder_layers_4_self_attn_v_proj_bias, encoder_sentence_encoder_layers_4_self_attn_out_proj_weight, encoder_sentence_encoder_layers_4_self_attn_out_proj_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_weight, encoder_sentence_encoder_layers_4_self_attn_k_proj_weight, encoder_sentence_encoder_layers_4_self_attn_v_proj_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_4_fc1_weight, encoder_sentence_encoder_layers_4_fc1_bias, encoder_sentence_encoder_layers_4_fc2_weight, encoder_sentence_encoder_layers_4_fc2_bias, encoder_sentence_encoder_layers_4_final_layer_norm_weight, encoder_sentence_encoder_layers_4_final_layer_norm_bias, eq, logical_or, getitem_2): | |
| # true_graph_3 = self.true_graph_3 | |
| # false_graph_3 = self.false_graph_3 | |
| # cond_3 = torch.ops.higher_order.cond(logical_or, true_graph_3, false_graph_3, [encoder_sentence_encoder_layers_3_fc1_bias, encoder_sentence_encoder_layers_3_fc1_weight, encoder_sentence_encoder_layers_3_fc2_bias, encoder_sentence_encoder_layers_3_fc2_weight, encoder_sentence_encoder_layers_3_final_layer_norm_bias, encoder_sentence_encoder_layers_3_final_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_k_proj_bias, encoder_sentence_encoder_layers_3_self_attn_k_proj_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_out_proj_bias, encoder_sentence_encoder_layers_3_self_attn_out_proj_weight, encoder_sentence_encoder_layers_3_self_attn_q_proj_bias, encoder_sentence_encoder_layers_3_self_attn_q_proj_weight, encoder_sentence_encoder_layers_3_self_attn_v_proj_bias, encoder_sentence_encoder_layers_3_self_attn_v_proj_weight, getitem_2, eq]); true_graph_3 = false_graph_3 = encoder_sentence_encoder_layers_3_fc1_bias = encoder_sentence_encoder_layers_3_fc1_weight = encoder_sentence_encoder_layers_3_fc2_bias = encoder_sentence_encoder_layers_3_fc2_weight = encoder_sentence_encoder_layers_3_final_layer_norm_bias = encoder_sentence_encoder_layers_3_final_layer_norm_weight = encoder_sentence_encoder_layers_3_self_attn_k_proj_bias = encoder_sentence_encoder_layers_3_self_attn_k_proj_weight = encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias = encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight = encoder_sentence_encoder_layers_3_self_attn_out_proj_bias = encoder_sentence_encoder_layers_3_self_attn_out_proj_weight = encoder_sentence_encoder_layers_3_self_attn_q_proj_bias = encoder_sentence_encoder_layers_3_self_attn_q_proj_weight = encoder_sentence_encoder_layers_3_self_attn_v_proj_bias = encoder_sentence_encoder_layers_3_self_attn_v_proj_weight = getitem_2 = None | |
| # getitem_3 = cond_3[0]; cond_3 = None | |
| # true_graph_4 = self.true_graph_4 | |
| # false_graph_4 = self.false_graph_4 | |
| # cond_4 = torch.ops.higher_order.cond(logical_or, true_graph_4, false_graph_4, [encoder_sentence_encoder_layers_4_fc1_bias, encoder_sentence_encoder_layers_4_fc1_weight, encoder_sentence_encoder_layers_4_fc2_bias, encoder_sentence_encoder_layers_4_fc2_weight, encoder_sentence_encoder_layers_4_final_layer_norm_bias, encoder_sentence_encoder_layers_4_final_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_k_proj_bias, encoder_sentence_encoder_layers_4_self_attn_k_proj_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_out_proj_bias, encoder_sentence_encoder_layers_4_self_attn_out_proj_weight, encoder_sentence_encoder_layers_4_self_attn_q_proj_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_weight, encoder_sentence_encoder_layers_4_self_attn_v_proj_bias, encoder_sentence_encoder_layers_4_self_attn_v_proj_weight, getitem_3, eq]); logical_or = true_graph_4 = false_graph_4 = encoder_sentence_encoder_layers_4_fc1_bias = encoder_sentence_encoder_layers_4_fc1_weight = encoder_sentence_encoder_layers_4_fc2_bias = encoder_sentence_encoder_layers_4_fc2_weight = encoder_sentence_encoder_layers_4_final_layer_norm_bias = encoder_sentence_encoder_layers_4_final_layer_norm_weight = encoder_sentence_encoder_layers_4_self_attn_k_proj_bias = encoder_sentence_encoder_layers_4_self_attn_k_proj_weight = encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias = encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight = encoder_sentence_encoder_layers_4_self_attn_out_proj_bias = encoder_sentence_encoder_layers_4_self_attn_out_proj_weight = encoder_sentence_encoder_layers_4_self_attn_q_proj_bias = encoder_sentence_encoder_layers_4_self_attn_q_proj_weight = encoder_sentence_encoder_layers_4_self_attn_v_proj_bias = encoder_sentence_encoder_layers_4_self_attn_v_proj_weight = getitem_3 = eq = None | |
| # return (cond_4,) | |
| import torch | |
| import torch._inductor.inductor_prims | |
| import torch._dynamo.config | |
| import torch._inductor.config | |
| import torch._functorch.config | |
| import torch.fx.experimental._config | |
| torch._dynamo.config.cache_size_limit = 8 | |
| torch._dynamo.config.accumulated_cache_size_limit = 256 | |
| torch._dynamo.config.specialize_int = False | |
| torch._dynamo.config.specialize_float = True | |
| torch._dynamo.config.assume_static_by_default = True | |
| torch._dynamo.config.automatic_dynamic_shapes = True | |
| torch._dynamo.config.traceable_tensor_subclasses = set() | |
| torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch.distributions', 'torch._decomp', 'torch._prims', 'torch._refs', 'torch.testing'} | |
| torch._dynamo.config.capture_scalar_outputs = False | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = False | |
| torch._dynamo.config.prefer_deferred_runtime_asserts_over_guards = False | |
| torch._dynamo.config.allow_complex_guards_as_runtime_asserts = False | |
| torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization'] | |
| torch._dynamo.config.allow_rnn = False | |
| torch._dynamo.config._save_config_ignore = {'repro_level', 'constant_functions', 'skipfiles_inline_module_allowlist', 'repro_after'} | |
| torch._dynamo.config.reorderable_logging_functions = set() | |
| torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu'] | |
| torch._dynamo.config.compiled_autograd_kwargs_override = {} | |
| torch._inductor.config.cpp_wrapper = False | |
| torch._inductor.config.pre_grad_fusion_options = {} | |
| torch._inductor.config.post_grad_fusion_options = {} | |
| torch._inductor.config.fx_passes_numeric_check = {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True} | |
| torch._inductor.config.reorder_for_compute_comm_overlap_passes = ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms'] | |
| torch._inductor.config._fuse_ddp_communication_passes = ['fuse_ddp_with_concat_op', 'schedule_comm_wait'] | |
| torch._inductor.config.generate_intermediate_hooks = True | |
| torch._inductor.config.triton.cudagraphs = False | |
| torch._inductor.config.triton.autotune_cublasLt = True | |
| torch._inductor.config.triton.autotune_at_compile_time = None | |
| torch._inductor.config.triton.store_cubin = False | |
| torch._inductor.config.aot_inductor.output_path = '' | |
| torch._inductor.config.aot_inductor.serialized_in_spec = '' | |
| torch._inductor.config.aot_inductor.serialized_out_spec = '' | |
| torch._inductor.config.aot_inductor.package = False | |
| torch._inductor.config.aot_inductor.metadata = {'AOTI_DEVICE_KEY': 'cuda'} | |
| torch._inductor.config.aot_inductor.presets = {} | |
| torch._inductor.config.rocm.arch = [] | |
| torch._inductor.config.rocm.ck_supported_arch = ['gfx90a', 'gfx940', 'gfx941', 'gfx942'] | |
| torch._inductor.config._save_config_ignore = ['trace.upload_tar', 'joint_custom_pre_pass', 'joint_custom_post_pass', 'pre_grad_custom_pass'] | |
| torch._inductor.config._cache_config_ignore_prefix = ['trace', 'cuda.cutlass_dir', 'worker_start_method', 'compile_threads', 'post_grad_custom_post_pass', 'post_grad_custom_pre_pass'] | |
| torch._inductor.config.external_matmul = [] | |
| torch._functorch.config.functionalize_rng_ops = False | |
| torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True | |
| torch._functorch.config.unlift_effect_tokens = False | |
| isolate_fails_code_str = None | |
| # torch version: 2.6.0a0+gite6065fc | |
| # torch cuda version: 12.0 | |
| # torch git version: e6065fce42730763450167bca558593bbdbdddbb | |
| # CUDA Info: | |
| # nvcc: NVIDIA (R) Cuda compiler driver | |
| # Copyright (c) 2005-2024 NVIDIA Corporation | |
| # Built on Thu_Mar_28_02:18:24_PDT_2024 | |
| # Cuda compilation tools, release 12.4, V12.4.131 | |
| # Build cuda_12.4.r12.4/compiler.34097967_0 | |
| # GPU Hardware Info: | |
| # NVIDIA PG509-210 : 8 | |
| exported_program = torch.export.load('/data/users/shangdiy/pytorch/torch_compile_debug/run_2024_11_25_15_19_05_153775-pid_800279/minifier/checkpoints/exported_program.pt2') | |
| # print(exported_program.graph) | |
| config_patches={'aot_inductor.package': True} | |
| if __name__ == '__main__': | |
| from torch._dynamo.repro.aoti import run_repro | |
| with torch.no_grad(): | |
| run_repro(exported_program, config_patches=config_patches, accuracy=False, command='run', save_dir='/data/users/shangdiy/pytorch/torch_compile_debug/run_2024_11_25_15_19_05_153775-pid_800279/minifier/checkpoints', check_str=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment