Skip to content

Instantly share code, notes, and snippets.

@yushangdi
Created November 25, 2024 23:39
Show Gist options
  • Select an option

  • Save yushangdi/cea2d213ad011ded7735f340b7c55c4e to your computer and use it in GitHub Desktop.

Select an option

Save yushangdi/cea2d213ad011ded7735f340b7c55c4e to your computer and use it in GitHub Desktop.
XLMRModel bug repro
# from torch.nn import *
# class Repro(torch.nn.Module):
# def __init__(self) -> None:
# super().__init__()
# self.true_graph_3 = GraphModule()
# self.false_graph_3 = GraphModule()
# self.true_graph_4 = GraphModule()
# self.false_graph_4 = GraphModule()
# def forward(self, encoder_sentence_encoder_layers_3_self_attn_q_proj_bias, encoder_sentence_encoder_layers_3_self_attn_k_proj_bias, encoder_sentence_encoder_layers_3_self_attn_v_proj_bias, encoder_sentence_encoder_layers_3_self_attn_out_proj_weight, encoder_sentence_encoder_layers_3_self_attn_out_proj_bias, encoder_sentence_encoder_layers_3_self_attn_q_proj_weight, encoder_sentence_encoder_layers_3_self_attn_k_proj_weight, encoder_sentence_encoder_layers_3_self_attn_v_proj_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_3_fc1_weight, encoder_sentence_encoder_layers_3_fc1_bias, encoder_sentence_encoder_layers_3_fc2_weight, encoder_sentence_encoder_layers_3_fc2_bias, encoder_sentence_encoder_layers_3_final_layer_norm_weight, encoder_sentence_encoder_layers_3_final_layer_norm_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_bias, encoder_sentence_encoder_layers_4_self_attn_k_proj_bias, encoder_sentence_encoder_layers_4_self_attn_v_proj_bias, encoder_sentence_encoder_layers_4_self_attn_out_proj_weight, encoder_sentence_encoder_layers_4_self_attn_out_proj_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_weight, encoder_sentence_encoder_layers_4_self_attn_k_proj_weight, encoder_sentence_encoder_layers_4_self_attn_v_proj_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_4_fc1_weight, encoder_sentence_encoder_layers_4_fc1_bias, encoder_sentence_encoder_layers_4_fc2_weight, encoder_sentence_encoder_layers_4_fc2_bias, encoder_sentence_encoder_layers_4_final_layer_norm_weight, encoder_sentence_encoder_layers_4_final_layer_norm_bias, eq, logical_or, getitem_2):
# true_graph_3 = self.true_graph_3
# false_graph_3 = self.false_graph_3
# cond_3 = torch.ops.higher_order.cond(logical_or, true_graph_3, false_graph_3, [encoder_sentence_encoder_layers_3_fc1_bias, encoder_sentence_encoder_layers_3_fc1_weight, encoder_sentence_encoder_layers_3_fc2_bias, encoder_sentence_encoder_layers_3_fc2_weight, encoder_sentence_encoder_layers_3_final_layer_norm_bias, encoder_sentence_encoder_layers_3_final_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_k_proj_bias, encoder_sentence_encoder_layers_3_self_attn_k_proj_weight, encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_3_self_attn_out_proj_bias, encoder_sentence_encoder_layers_3_self_attn_out_proj_weight, encoder_sentence_encoder_layers_3_self_attn_q_proj_bias, encoder_sentence_encoder_layers_3_self_attn_q_proj_weight, encoder_sentence_encoder_layers_3_self_attn_v_proj_bias, encoder_sentence_encoder_layers_3_self_attn_v_proj_weight, getitem_2, eq]); true_graph_3 = false_graph_3 = encoder_sentence_encoder_layers_3_fc1_bias = encoder_sentence_encoder_layers_3_fc1_weight = encoder_sentence_encoder_layers_3_fc2_bias = encoder_sentence_encoder_layers_3_fc2_weight = encoder_sentence_encoder_layers_3_final_layer_norm_bias = encoder_sentence_encoder_layers_3_final_layer_norm_weight = encoder_sentence_encoder_layers_3_self_attn_k_proj_bias = encoder_sentence_encoder_layers_3_self_attn_k_proj_weight = encoder_sentence_encoder_layers_3_self_attn_layer_norm_bias = encoder_sentence_encoder_layers_3_self_attn_layer_norm_weight = encoder_sentence_encoder_layers_3_self_attn_out_proj_bias = encoder_sentence_encoder_layers_3_self_attn_out_proj_weight = encoder_sentence_encoder_layers_3_self_attn_q_proj_bias = encoder_sentence_encoder_layers_3_self_attn_q_proj_weight = encoder_sentence_encoder_layers_3_self_attn_v_proj_bias = encoder_sentence_encoder_layers_3_self_attn_v_proj_weight = getitem_2 = None
# getitem_3 = cond_3[0]; cond_3 = None
# true_graph_4 = self.true_graph_4
# false_graph_4 = self.false_graph_4
# cond_4 = torch.ops.higher_order.cond(logical_or, true_graph_4, false_graph_4, [encoder_sentence_encoder_layers_4_fc1_bias, encoder_sentence_encoder_layers_4_fc1_weight, encoder_sentence_encoder_layers_4_fc2_bias, encoder_sentence_encoder_layers_4_fc2_weight, encoder_sentence_encoder_layers_4_final_layer_norm_bias, encoder_sentence_encoder_layers_4_final_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_k_proj_bias, encoder_sentence_encoder_layers_4_self_attn_k_proj_weight, encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias, encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight, encoder_sentence_encoder_layers_4_self_attn_out_proj_bias, encoder_sentence_encoder_layers_4_self_attn_out_proj_weight, encoder_sentence_encoder_layers_4_self_attn_q_proj_bias, encoder_sentence_encoder_layers_4_self_attn_q_proj_weight, encoder_sentence_encoder_layers_4_self_attn_v_proj_bias, encoder_sentence_encoder_layers_4_self_attn_v_proj_weight, getitem_3, eq]); logical_or = true_graph_4 = false_graph_4 = encoder_sentence_encoder_layers_4_fc1_bias = encoder_sentence_encoder_layers_4_fc1_weight = encoder_sentence_encoder_layers_4_fc2_bias = encoder_sentence_encoder_layers_4_fc2_weight = encoder_sentence_encoder_layers_4_final_layer_norm_bias = encoder_sentence_encoder_layers_4_final_layer_norm_weight = encoder_sentence_encoder_layers_4_self_attn_k_proj_bias = encoder_sentence_encoder_layers_4_self_attn_k_proj_weight = encoder_sentence_encoder_layers_4_self_attn_layer_norm_bias = encoder_sentence_encoder_layers_4_self_attn_layer_norm_weight = encoder_sentence_encoder_layers_4_self_attn_out_proj_bias = encoder_sentence_encoder_layers_4_self_attn_out_proj_weight = encoder_sentence_encoder_layers_4_self_attn_q_proj_bias = encoder_sentence_encoder_layers_4_self_attn_q_proj_weight = encoder_sentence_encoder_layers_4_self_attn_v_proj_bias = encoder_sentence_encoder_layers_4_self_attn_v_proj_weight = getitem_3 = eq = None
# return (cond_4,)
import torch
import torch._inductor.inductor_prims
import torch._dynamo.config
import torch._inductor.config
import torch._functorch.config
import torch.fx.experimental._config
torch._dynamo.config.cache_size_limit = 8
torch._dynamo.config.accumulated_cache_size_limit = 256
torch._dynamo.config.specialize_int = False
torch._dynamo.config.specialize_float = True
torch._dynamo.config.assume_static_by_default = True
torch._dynamo.config.automatic_dynamic_shapes = True
torch._dynamo.config.traceable_tensor_subclasses = set()
torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch.distributions', 'torch._decomp', 'torch._prims', 'torch._refs', 'torch.testing'}
torch._dynamo.config.capture_scalar_outputs = False
torch._dynamo.config.capture_dynamic_output_shape_ops = False
torch._dynamo.config.prefer_deferred_runtime_asserts_over_guards = False
torch._dynamo.config.allow_complex_guards_as_runtime_asserts = False
torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
torch._dynamo.config.allow_rnn = False
torch._dynamo.config._save_config_ignore = {'repro_level', 'constant_functions', 'skipfiles_inline_module_allowlist', 'repro_after'}
torch._dynamo.config.reorderable_logging_functions = set()
torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu']
torch._dynamo.config.compiled_autograd_kwargs_override = {}
torch._inductor.config.cpp_wrapper = False
torch._inductor.config.pre_grad_fusion_options = {}
torch._inductor.config.post_grad_fusion_options = {}
torch._inductor.config.fx_passes_numeric_check = {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}
torch._inductor.config.reorder_for_compute_comm_overlap_passes = ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']
torch._inductor.config._fuse_ddp_communication_passes = ['fuse_ddp_with_concat_op', 'schedule_comm_wait']
torch._inductor.config.generate_intermediate_hooks = True
torch._inductor.config.triton.cudagraphs = False
torch._inductor.config.triton.autotune_cublasLt = True
torch._inductor.config.triton.autotune_at_compile_time = None
torch._inductor.config.triton.store_cubin = False
torch._inductor.config.aot_inductor.output_path = ''
torch._inductor.config.aot_inductor.serialized_in_spec = ''
torch._inductor.config.aot_inductor.serialized_out_spec = ''
torch._inductor.config.aot_inductor.package = False
torch._inductor.config.aot_inductor.metadata = {'AOTI_DEVICE_KEY': 'cuda'}
torch._inductor.config.aot_inductor.presets = {}
torch._inductor.config.rocm.arch = []
torch._inductor.config.rocm.ck_supported_arch = ['gfx90a', 'gfx940', 'gfx941', 'gfx942']
torch._inductor.config._save_config_ignore = ['trace.upload_tar', 'joint_custom_pre_pass', 'joint_custom_post_pass', 'pre_grad_custom_pass']
torch._inductor.config._cache_config_ignore_prefix = ['trace', 'cuda.cutlass_dir', 'worker_start_method', 'compile_threads', 'post_grad_custom_post_pass', 'post_grad_custom_pre_pass']
torch._inductor.config.external_matmul = []
torch._functorch.config.functionalize_rng_ops = False
torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
torch._functorch.config.unlift_effect_tokens = False
isolate_fails_code_str = None
# torch version: 2.6.0a0+gite6065fc
# torch cuda version: 12.0
# torch git version: e6065fce42730763450167bca558593bbdbdddbb
# CUDA Info:
# nvcc: NVIDIA (R) Cuda compiler driver
# Copyright (c) 2005-2024 NVIDIA Corporation
# Built on Thu_Mar_28_02:18:24_PDT_2024
# Cuda compilation tools, release 12.4, V12.4.131
# Build cuda_12.4.r12.4/compiler.34097967_0
# GPU Hardware Info:
# NVIDIA PG509-210 : 8
exported_program = torch.export.load('/data/users/shangdiy/pytorch/torch_compile_debug/run_2024_11_25_15_19_05_153775-pid_800279/minifier/checkpoints/exported_program.pt2')
# print(exported_program.graph)
config_patches={'aot_inductor.package': True}
if __name__ == '__main__':
from torch._dynamo.repro.aoti import run_repro
with torch.no_grad():
run_repro(exported_program, config_patches=config_patches, accuracy=False, command='run', save_dir='/data/users/shangdiy/pytorch/torch_compile_debug/run_2024_11_25_15_19_05_153775-pid_800279/minifier/checkpoints', check_str=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment