V0819 12:17:11.089000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__run_lpar_main__.py", 0]}
V0819 12:17:11.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/meta_only/bootstrap.py", 1]}
V0819 12:17:11.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/bootstrap.py", 2]}
V0819 12:17:11.091000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/runpy.py", 3]}
V0819 12:17:11.091000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/unittest.py", 4]}
V0819 12:17:11.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/base.py", 5]}
V0819 12:17:11.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/runner.py", 6]}
V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/suite.py", 7]}
V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/case.py", 8]}
V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/testing/_internal/common_utils.py", 9]}
V0819 12:17:11.094000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/contextlib.py", 10]}
V0819 12:17:11.094000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py", 11]}
V0819 12:17:11.095000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/__init__.py", 12]}
V0819 12:17:11.095000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/_trace.py", 13]}
V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/exported_program.py", 14]}
V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_export/non_strict_utils.py", 15]}
V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/utils/_pytree.py", 16]}
V0819 12:17:11.097000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_subclasses/fake_tensor.py", 17]}
V0819 12:17:11.097000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_subclasses/meta_utils.py", 18]}
V0819 12:17:11.098000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 0, "describer_id": 0, "size": 320}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.099000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [8, 10], "is_leaf": true, "stride": [10, 1], "storage": 0, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8be5e5bce0>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.100000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.102000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 1, "describer_id": 0, "size": 800}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.103000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 20], "is_leaf": true, "stride": [20, 1], "storage": 1, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8be5e7cb80>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.104000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 1, "source": "L['a']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.106000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 2, "describer_id": 0, "size": 2400}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.107000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [20, 30], "is_leaf": true, "stride": [30, 1], "storage": 2, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8bd693d850>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.107000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 2, "source": "L['b']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.109000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 3, "describer_id": 0, "size": 1200}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.110000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 30], "is_leaf": true, "stride": [30, 1], "storage": 3, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8bd693d800>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.111000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 3, "source": "L['c']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
V0819 12:17:11.212000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6aca4c2393e9ee762ae85ded683fe3e1"}
	{
	"name": "compile_fx_aot",
	"ts": 1755631031212288.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.215000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9fc9b4d8e207af26c84b91013d416c20"}
	{
	"name": "inductor_codecache_torch_key",
	"ts": 1755631031215360.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.217000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3cc7ee90f5aecb2fcf07b8816a341089"}
	{
	"name": "inductor_codecache_torch_key",
	"ts": 1755631031217169.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.224000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/__init__.py", 19]}
V0819 12:17:11.224000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/debug.py", 20]}
V0819 12:17:11.225000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/compile_fx.py", 21]}
V0819 12:17:11.227000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2223] {"artifact": {"name": "before_pre_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2223, "name": "compile_fx", "filename": 21, "loc": "trace_structured("}], "has_payload": "4a0a3d9a0a4da4e2c240ac1983842cbb"}
	class GraphModule(torch.nn.Module):
	    def forward(self, x: "f32[8, 10][10, 1]cuda:0", a: "f32[10, 20][20, 1]cuda:0", b: "f32[20, 30][30, 1]cuda:0", c: "f32[10, 30][30, 1]cuda:0"):
	        # No stacktrace found for following nodes
	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
	        linear: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.linear.default(x, fc1_weight, fc1_bias);  x = fc1_weight = fc1_bias = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(linear);  linear = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(a, 3.14);  a = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
	        addmm: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(c, mul, b);  c = mul = b = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
	        gelu: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.gelu.default(addmm);  addmm = None
	        return (sigmoid, gelu)
	        
	
	 # graph id: 140239416832880
V0819 12:17:11.228000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "feaae61087882c73fd487f611da2f3f6"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1755631031228600.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.248000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "c61bd9aba6cba7a9f938607528578adc"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1755631031248490.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.256000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2254] {"artifact": {"name": "after_pre_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2254, "name": "compile_fx", "filename": 21, "loc": "trace_structured("}], "has_payload": "4a0a3d9a0a4da4e2c240ac1983842cbb"}
	class GraphModule(torch.nn.Module):
	    def forward(self, x: "f32[8, 10][10, 1]cuda:0", a: "f32[10, 20][20, 1]cuda:0", b: "f32[20, 30][30, 1]cuda:0", c: "f32[10, 30][30, 1]cuda:0"):
	        # No stacktrace found for following nodes
	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
	        linear: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.linear.default(x, fc1_weight, fc1_bias);  x = fc1_weight = fc1_bias = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(linear);  linear = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(a, 3.14);  a = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
	        addmm: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(c, mul, b);  c = mul = b = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
	        gelu: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.gelu.default(addmm);  addmm = None
	        return (sigmoid, gelu)
	        
	
	 # graph id: 140239416832880
V0819 12:17:11.258000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "261b4a27f8afe5281619e6ce09acc6b1"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1755631031258767.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.263000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f899ec11974508bf54cfd2ddda8573a8"}
	{
	"name": "aot_collect_metadata",
	"ts": 1755631031263436.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.286000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f6de1051f01eecef01c84f1f8e6a074c"}
	{
	"name": "aot_collect_metadata",
	"ts": 1755631031285986.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.327000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/aot_autograd.py", 22]}
V0819 12:17:11.328000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/graph_compile.py", 23]}
V0819 12:17:11.328000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/graph_capture.py", 24]}
V0819 12:17:11.329000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_capture.py:301] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2458, "name": "compile_fx", "filename": 21, "loc": "gm, graph_signature = aot_export_module("}, {"line": 1444, "name": "aot_export_module", "filename": 22, "loc": "fx_g, metadata, in_spec, out_spec = _aot_export_function("}, {"line": 1703, "name": "_aot_export_function", "filename": 22, "loc": "aot_graph_capture = aot_stage1_graph_capture(aot_state, flat_fn)"}, {"line": 171, "name": "aot_stage1_graph_capture", "filename": 23, "loc": "aot_dispatch_base_graph(  # type: ignore[assignment]"}, {"line": 301, "name": "aot_dispatch_base_graph", "filename": 24, "loc": "trace_structured("}], "has_payload": "4d5e8ee520aca9cec301c0adf555bcb8"}
	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=True,
	                                              keep_input_mutations=False),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=True,
	                                              keep_input_mutations=False),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=False),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=False),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=False),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=False)],
	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
	                                                base_idx=None,
	                                                dynamic_dims=set(),
	                                                requires_grad=False,
	                                                functional_tensor=None),
	                                OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
	                                                base_idx=None,
	                                                dynamic_dims=set(),
	                                                requires_grad=False,
	                                                functional_tensor=None)],
	                    num_intermediate_bases=0,
	                    keep_input_mutations=False,
	                    traced_tangents=[],
	                    traced_tangents_descs=[],
	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=1,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=2,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=3,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=4,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=5,
	                                                      memory_format=None)],
	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                               memory_format=None),
	                                               PlainTensorMeta(unwrapped_idx=1,
	                                                               memory_format=None)],
	                    subclass_tangent_meta=[],
	                    is_train=False,
	                    traced_tangent_metas=None,
	                    num_symints_saved_for_bw=None,
	                    grad_enabled_mutation=None,
	                    deterministic=False,
	                    static_input_indices=[],
	                    tokens={},
	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
	                    bw_donated_idxs=None,
	                    num_backward_tokens=0,
	                    num_graphsafe_rng_states=0,
	                    graphsafe_rng_state_index=None)
V0819 12:17:11.332000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_capture.py:319] {"aot_inference_graph": {}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2458, "name": "compile_fx", "filename": 21, "loc": "gm, graph_signature = aot_export_module("}, {"line": 1444, "name": "aot_export_module", "filename": 22, "loc": "fx_g, metadata, in_spec, out_spec = _aot_export_function("}, {"line": 1703, "name": "_aot_export_function", "filename": 22, "loc": "aot_graph_capture = aot_stage1_graph_capture(aot_state, flat_fn)"}, {"line": 171, "name": "aot_stage1_graph_capture", "filename": 23, "loc": "aot_dispatch_base_graph(  # type: ignore[assignment]"}, {"line": 319, "name": "aot_dispatch_base_graph", "filename": 24, "loc": "trace_structured("}], "has_payload": "b951429148939c22d26b0940141f8b77"}
	class <lambda>(torch.nn.Module):
	    def forward(
	        self,
	        arg0_1: "f32[16, 10][10, 1]cuda:0",  # PlainAOTInput(idx=0)
	        arg1_1: "f32[16][1]cuda:0",  # PlainAOTInput(idx=1)
	        arg2_1: "f32[8, 10][10, 1]cuda:0",  # PlainAOTInput(idx=2)
	        arg3_1: "f32[10, 20][20, 1]cuda:0",  # PlainAOTInput(idx=3)
	        arg4_1: "f32[20, 30][30, 1]cuda:0",  # PlainAOTInput(idx=4)
	        arg5_1: "f32[10, 30][30, 1]cuda:0",  # PlainAOTInput(idx=5)
	    ):
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(arg0_1, [1, 0]);  arg0_1 = None
	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(arg1_1, arg2_1, permute);  arg1_1 = arg2_1 = permute = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
	        return (
	            sigmoid,  # PlainAOTOutput(idx=0)
	            mul_3,  # PlainAOTOutput(idx=1)
	        )
	        
V0819 12:17:11.336000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7becc45412f1402ef7762c00ad5932f3"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1755631031336150.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.340000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ceef05227dc68692b0c2a5ca2505a5d2"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1755631031340807.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.341000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "daf6b8f1a3ef1f763d4b8047cb580bb0"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1755631031341705.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.573000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8b14f19de16b7539b26676fb7d25e657"}
	{
	"name": "pad_mm_benchmark",
	"ts": 1755631031573546.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.576000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "57000f52b79007b58fea7dadb68faec6"}
	{
	"name": "pad_mm_benchmark_get_do_bench",
	"ts": 1755631031575236.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:11.577000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "684ab6cc51e1a97fc83a2617d9193a89"}
	{
	"name": "pad_mm_benchmark_get_do_bench",
	"ts": 1755631031577450.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:13.574000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9e2289a2565d8614dbaf7f591a0c71a0"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631033574302.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:13.938000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8723cf30905af166a9f27e688633d534"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631033938727.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:13.940000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "782f5055c2285b1347238bb617a06264"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631033940683.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.082000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5c19ba3069bd0fc6c56053cc0ee17d21"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631034082677.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.085000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2d8af9f8b4561019e88ccdd4a7c79239"}
	{
	"name": "pad_mm_benchmark",
	"ts": 1755631034084961.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.088000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "1d53c73457381df48799916c32b9d6e0"}
	{
	"name": "pad_mm_benchmark",
	"ts": 1755631034088690.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.089000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "adc3f87a3f7d41f8df12f5b1d234d9b4"}
	{
	"name": "pad_mm_benchmark_get_do_bench",
	"ts": 1755631034089778.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9c9aa97686737676e9b28517c8d5592c"}
	{
	"name": "pad_mm_benchmark_get_do_bench",
	"ts": 1755631034090892.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "02668a64656757d7a10dd08691939306"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631034092335.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.204000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b4d4b07879507fe50a3dfea58faf9422"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631034204193.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.206000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9705a76501f21cbef7177d5ca3445fd0"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631034206306.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.334000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7b4dcaef7e0675dea4417a9f60dc57e2"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631034334180.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.336000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "98cf7a2fddbd6e5264c110037b4b9a10"}
	{
	"name": "pad_mm_benchmark",
	"ts": 1755631034336454.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.338000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b9ba61789c85cd982e45683b482ce64f"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1755631034338054.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.342000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bd1ad090c0cbcad5010ae1d99865a581"}
	{
	"name": "inductor_compile",
	"ts": 1755631034342007.8,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.354000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4c64e47ea44971bba809d1f5bf8a81e5"}
	{
	"name": "fx_codegen_and_compile",
	"ts": 1755631034353917.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.366000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/schemas.py", 25]}
V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/repro/after_aot.py", 26]}
V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/fb/utils.py", 27]}
V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1230] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1230, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "cb0ac49018c0e74516fc42058b037ae6"}
	
	import os
	os.environ['PYTORCH_TEST_FBCODE'] = '1'
	os.environ['TORCH_TRACE'] = '/home/shangdiy/my_trace_log_dir'
	os.environ['PYTORCH_TEST_REMOTE_GPU'] = '1'
	os.environ['PYTORCH_DDP_USE_SIDE_STREAM'] = '0'
	os.environ['TRITON_ALLOW_NON_CONSTEXPR_GLOBALS'] = '1'
	os.environ['TRITON_LIBHIP_PATH'] = '/usr/local/fbcode/platform010/lib/rocm-6.2.1/lib/libamdhip64.so'
	os.environ['TRITON_CUPTI_LIB_PATH'] = '/usr/local/fbcode/platform010/lib/libcupti.so'
	os.environ['TRITON_HOME'] = '/tmp/shangdiy'
	os.environ['TORCHINDUCTOR_CACHE_DIR'] = '/tmp/tmpspd28pc5'
	os.environ['TRITON_CACHE_DIR'] = '/tmp/tmpspd28pc5/triton'
	
	import torch
	from torch import tensor, device
	import torch.fx as fx
	from torch._dynamo.testing import rand_strided
	from math import inf
	import torch._inductor.inductor_prims
	
	
	
	import torch._dynamo.config
	import torch._inductor.config
	import torch._functorch.config
	import torch.fx.experimental._config
	torch._dynamo.config.specialize_int = False
	torch._dynamo.config.specialize_float = False
	torch._dynamo.config.assume_static_by_default = True
	torch._dynamo.config.automatic_dynamic_shapes = True
	torch._dynamo.config.suppress_errors = False
	torch._dynamo.config.capture_scalar_outputs = False
	torch._dynamo.config.capture_dynamic_output_shape_ops = False
	torch._dynamo.config.prefer_deferred_runtime_asserts_over_guards = False
	torch._dynamo.config.do_not_emit_runtime_asserts = False
	torch._dynamo.config.raise_on_ctx_manager_usage = True
	torch._dynamo.config.allow_rnn = False
	torch._dynamo.config.log_compilation_metrics = False
	torch._inductor.config.fx_graph_cache = True
	torch._inductor.config.cpp_wrapper = True
	torch._inductor.config.compile_threads = 32
	torch._inductor.config.triton.cudagraphs = False
	torch._inductor.config.triton.autotune_cublasLt = False
	torch._inductor.config.triton.autotune_at_compile_time = True
	torch._inductor.config.triton.store_cubin = True
	torch._inductor.config.aot_inductor.output_path = 'cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz'
	torch._inductor.config.aot_inductor.serialized_in_spec = '[1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}, {"type": "builtins.dict", "context": "[]", "children_spec": []}]}]'
	torch._inductor.config.aot_inductor.serialized_out_spec = '[1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}]'
	torch._inductor.config.aot_inductor.package = True
	torch._inductor.config.trace.provenance_tracking_level = 1
	torch._functorch.config.functionalize_rng_ops = False
	torch._functorch.config.enable_autograd_cache = True
	torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
	torch._functorch.config.unlift_effect_tokens = False
	
	
	
	isolate_fails_code_str = None
	
	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu")
	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators")
	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
	
	"""
	To run this script in fbcode:
	- Create a directory (//scripts/{your_unixname}/repro)
	- Put this file in scripts/{your_unixname}/repro/fx_graph_runnable.py
	- Add a TARGETS file that looks like the following
	- `buck2 run //scripts/{your_unixname}/repro:repro`
	
	NOTE: you may need additional deps to actually be able to run the script.
	```
	# Contents of TARGETS file
	load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
	
	python_binary(
	    name = "repro",
	    main_src = "fx_graph_runnable.py",
	    deps = [
	        "//caffe2:torch",
	        "//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu",
	        "//caffe2/torch/fb/sparsenn:sparsenn_operators",
	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu",
	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops",
	    ],
	)
	```
	"""
	
	# torch version: 2.9.0a0+fb
	# torch cuda version: 12.4.0
	# CUDA Info: 
	# nvcc: NVIDIA (R) Cuda compiler driver 
	# Copyright (c) 2005-2024 NVIDIA Corporation 
	# Built on Tue_Oct_29_23:50:19_PDT_2024 
	# Cuda compilation tools, release 12.6, V12.6.85 
	# Build cuda_12.6.r12.6/compiler.35059454_0 
	
	# GPU Hardware Info: 
	# NVIDIA PG509-210 : 8 
	
	
	from torch.nn import *
	class Repro(torch.nn.Module):
	    def __init__(self) -> None:
	        super().__init__()
	        self.fc1 = Module().cuda()
	
	    
	    
	    def forward(self):
	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
	        fc1_weight = self.fc1.weight
	        fc1_bias = self.fc1.bias
	        permute = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
	        addmm = torch.ops.aten.addmm.default(fc1_bias, arg2_1, permute);  fc1_bias = arg2_1 = permute = None
	        relu = torch.ops.aten.relu.default(addmm);  addmm = None
	        sigmoid = torch.ops.aten.sigmoid.default(relu);  relu = None
	        mul = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
	        addmm_1 = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
	        mul_1 = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
	        mul_2 = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
	        erf = torch.ops.aten.erf.default(mul_2);  mul_2 = None
	        add = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_3 = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
	        return (sigmoid, mul_3)
	        
	def load_args(reader):
	    buf0 = reader.storage(None, 320, device=device(type='cuda', index=0))
	    reader.tensor(buf0, (8, 10), is_leaf=True)  # arg2_1
	    buf1 = reader.storage(None, 800, device=device(type='cuda', index=0))
	    reader.tensor(buf1, (10, 20), is_leaf=True)  # arg3_1
	    buf2 = reader.storage(None, 2400, device=device(type='cuda', index=0))
	    reader.tensor(buf2, (20, 30), is_leaf=True)  # arg4_1
	    buf3 = reader.storage(None, 1200, device=device(type='cuda', index=0))
	    reader.tensor(buf3, (10, 30), is_leaf=True)  # arg5_1
	load_args._version = 0
	mod = Repro()
	if __name__ == '__main__':
	    from torch._dynamo.repro.after_aot import run_repro
	    with torch.no_grad():
	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
	        # To run it separately, do 
	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
	        # mod(*args)
V0819 12:17:14.369000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ebf308296b1f8689a0d63c41a1db992a"}
	{
	"name": "additional_fake_tensor_prop",
	"ts": 1755631034368948.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.380000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "97d42b48ddb9905a491bdf3cba1c6527"}
	{
	"name": "additional_fake_tensor_prop",
	"ts": 1755631034380876.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.385000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1279] {"artifact": {"name": "before_post_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1279, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "40cdb2b1144299274a9c2e5a11a97ed8"}
	class <lambda>(torch.nn.Module):
	    def forward(self):
	        arg2_1: "f32[8, 10][10, 1]cuda:0"; arg3_1: "f32[10, 20][20, 1]cuda:0"; arg4_1: "f32[20, 30][30, 1]cuda:0"; arg5_1: "f32[10, 30][30, 1]cuda:0"; 
	    
	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
	        # No stacktrace found for following nodes
	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(fc1_bias, arg2_1, permute);  fc1_bias = arg2_1 = permute = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
	        return (sigmoid, mul_3)
	        
V0819 12:17:14.386000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "c2e1ab260461351e26f60feba0fbef35"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1755631034386761.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.439000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a663ba4038609b5264cc740a664dd102"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1755631034439715.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.444000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1317] {"artifact": {"name": "after_post_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1317, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "6eaa709538210c0772f354a3d61c2e68"}
	class <lambda>(torch.nn.Module):
	    def forward(self):
	        arg2_1: "f32[8, 10][10, 1]cuda:0"; arg3_1: "f32[10, 20][20, 1]cuda:0"; arg4_1: "f32[20, 30][30, 1]cuda:0"; arg5_1: "f32[10, 30][30, 1]cuda:0"; 
	    
	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
	        # No stacktrace found for following nodes
	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
	        mm_default_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mm.default(arg2_1, permute);  arg2_1 = permute = None
	        add_tensor_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_1, fc1_bias);  mm_default_1 = fc1_bias = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(add_tensor_1);  add_tensor_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
	        mm_default: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mm.default(mul, arg4_1);  mul = arg4_1 = None
	        add_tensor: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default, arg5_1);  mm_default = arg5_1 = None
	        
	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.5)
	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.7071067811865476);  add_tensor = None
	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
	        return (sigmoid, mul_3)
	        
V0819 12:17:14.450000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "84b2439db28331c02d0b456cf808b57c"}
	{
	"name": "GraphLowering.run",
	"ts": 1755631034450230.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.515000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2625ae8656c861528d7a57471a08c3fe"}
	{
	"name": "GraphLowering.run",
	"ts": 1755631034515895.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.516000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "12e90de2e22a17a64deac9d90988dc67"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1755631034516910.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.517000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "815bef5a99f93dedcc4f5c7586e01b49"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1755631034517755.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.920000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9bb19a2d03217feeb3f52c7df60d9cfa"}
	{
	"name": "Scheduler.__init__",
	"ts": 1755631034920225.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.952000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5ddae14ce346a1093e4c60c7519015a0"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1755631034951976.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.953000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a5473fc85fb86639239f35f450617eda"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1755631034953532.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.959000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "286c5594c9a995a35144acab005004e5"}
	{
	"name": "Scheduler.__init__",
	"ts": 1755631034959233.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:14.960000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "887c175a8f855c440d9be1e2cf0db86e"}
	{
	"name": "Scheduler.codegen",
	"ts": 1755631034960085.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.022000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e1212b2f0699fa5c4ebb2ad3f3edefc5"}
	{
	"name": "Scheduler.codegen",
	"ts": 1755631035022240.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.023000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ee9c787f272ac480a8d7673ef75615e1"}
	{
	"name": "CppWrapperGpu.generate",
	"ts": 1755631035023390.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.024000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8874e63300b9ea71451acc470c2fa525"}
	{
	"name": "CppWrapperCpu.generate",
	"ts": 1755631035024265.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.027000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4d6f15f1961f4086d9ef7d34973a3f0d"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1755631035027388.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.032000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e6a3888f177717a05d134fcc172b1813"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035032324.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.147000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "907b4e0c82e1b74115a5e22ffe08c050"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035146913.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.148000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "28dd1a6930067e5ef34c1467e2a6d60f"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035148481.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.200000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7b5aa873eb861796cef055859afaa37e"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035200462.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.205000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "500a2823cc070f4fd8266361346dc5a8"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035205102.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.385000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3ef3c138073641f2c2571b395e16f91a"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035385462.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.387000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e24be073aa4b7b88e8ea57265b65b8af"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035386995.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.389000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f64718dc768c55058ef38857f102d619"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035389500.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.393000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "fccd5234583c742aea7c7115daa1311f"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035393448.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.573000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "14b9d56f2a6ee3948b54a66902fcecec"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035572957.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.574000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8cf869e1a2d7513f7d1f051bc3752248"}
	{
	"name": "CachingAutotuner.synchronize",
	"ts": 1755631035574477.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.577000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "1520e9418c35f8b68a956c5a133d7103"}
	{
	"name": "async_compile.precompile",
	"ts": 1755631035576984.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.580000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a49e9a0416c8316f4d9c9c56b2c0ced0"}
	{
	"name": "async_compile.wait",
	"ts": 1755631035580767.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.581000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d58f64a9c8f0d510a46f58227488e6fb"}
	{
	"name": "async_compile.wait",
	"ts": 1755631035581827.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.585000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/graph.py", 28]}
V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/cpp_wrapper_gpu.py", 29]}
V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/cpp_wrapper_cpu.py", 30]}
V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/wrapper.py", 31]}
V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["<string>", 32]}
V0819 12:17:15.587000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/async_compile.py", 33]}
V0819 12:17:15.587000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/async_compile.py:117] {"artifact": {"name": "triton_kernel_info", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1465, "name": "codegen_and_compile", "filename": 21, "loc": "wrapper_code, kernel_code = graph.codegen_with_cpp_wrapper()"}, {"line": 2219, "name": "codegen_with_cpp_wrapper", "filename": 28, "loc": "return self.codegen()"}, {"line": 2270, "name": "codegen", "filename": 28, "loc": "result = self.wrapper_code.generate(self.is_inference)"}, {"line": 355, "name": "generate", "filename": 29, "loc": "return super().generate(is_inference)"}, {"line": 977, "name": "generate", "filename": 30, "loc": "return super().generate(is_inference)"}, {"line": 1465, "name": "generate", "filename": 31, "loc": "return self._generate(is_inference)"}, {"line": 1528, "name": "_generate", "filename": 31, "loc": "self.generate_and_run_autotune_block()"}, {"line": 1606, "name": "generate_and_run_autotune_block", "filename": 31, "loc": "exec(tuning_code, scope)"}, {"line": 115, "name": "<module>", "filename": 32, "loc": ""}, {"line": 583, "name": "wait", "filename": 33, "loc": "_compile_end()"}, {"line": 117, "name": "_compile_end", "filename": 33, "loc": "torch._logging.trace_structured("}], "has_payload": "64835d9f685ee082909facee8a5ee175"}
	{"triton_poi_fused_addmm_gelu_2": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 182513}, "triton_poi_fused_addmm_relu_sigmoid_0": {"autotune_cache_state": "only 1 config", "only_config": [["XBLOCK", 128], ["num_warps", 4], ["num_stages", 1]], "compile_time_us": 166812}, "triton_poi_fused_mul_1": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 183348}}
V0819 12:17:15.590000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ad4ab41028b8d64322890f1ea6e52020"}
	{
	"name": "CachingAutotuner.benchmark_all_configs",
	"ts": 1755631035590284.2,
	"args": {
	"kernel_name": "triton_poi_fused_mul_1",
	"is_backward": false,
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.591000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "15d8d0a903e4a45cdb7c6e2f083a30d3"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035591686.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.638000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d6ea5aba8c6809f793323c5c722ae78b"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035638345.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.639000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6012720b1ae40bd8f126bb4afa3194a6"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035639761.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.688000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "226d671a240d857cdd6131619ce51ff0"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035687937.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.688000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4bae60db312c469ca4ab3076f437d4b8"}
	{
	"name": "CachingAutotuner.benchmark_all_configs",
	"ts": 1755631035688898.0,
	"args": {
	"kernel_name": "triton_poi_fused_mul_1",
	"is_backward": false,
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.694000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8254544d8e0f9c83be896a08de9dd48c"}
	{
	"name": "CachingAutotuner.benchmark_all_configs",
	"ts": 1755631035694228.8,
	"args": {
	"kernel_name": "triton_poi_fused_addmm_gelu_2",
	"is_backward": false,
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.695000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bc095e74b312f69c84cf06b71d6f94ea"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035695570.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.755000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b803ca2ed15090323113e6f24e54e727"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035755541.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.757000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "acbb78661f81ed5b283e84a3e46f2237"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035757007.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.815000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4f44b57f9e263de22a3dfa9d7bb4686d"}
	{
	"name": "TritonBenchmarker.benchmark_gpu",
	"ts": 1755631035815819.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.816000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bd6eb280833425e5fd7a410c577cb8ec"}
	{
	"name": "CachingAutotuner.benchmark_all_configs",
	"ts": 1755631035816716.2,
	"args": {
	"kernel_name": "triton_poi_fused_addmm_gelu_2",
	"is_backward": false,
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.826000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d11d04c0060a6c5ab3111b49cfd51389"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1755631035826016.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.826000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d23f889cf870423c0766500f2436a532"}
	{
	"name": "CppWrapperCpu.generate",
	"ts": 1755631035826839.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.830000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "93f015aa33a6d3a9590a0b8aeb2de231"}
	{
	"name": "CppWrapperGpu.generate",
	"ts": 1755631035830160.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.833000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "466bf05776662915805ecd996f683c05"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1755631035833401.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.836000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6a85f95f036828ca68e6367ef8ccfd26"}
	{
	"name": "AotCodeCompiler.compile",
	"ts": 1755631035836648.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:15.846000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codecache.py", 34]}
V0819 12:17:15.846000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/codecache.py:1790] {"graph_dump": {"name": "inductor_aot_wrapper_code", "type": "cpp", "filename": "/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/c2zi7pbvbb6r2z2ilqqn22mpt7jxdy72w5fymrtjqrpewk5akujk.wrapper.cpp"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1490, "name": "codegen_and_compile", "filename": 21, "loc": "compiled_fn = AotCodeCompiler.compile("}, {"line": 1790, "name": "compile", "filename": 34, "loc": "trace_structured("}], "has_payload": "78b00c43b8ee3127faca1bd40f61ff57"}
	
	#include <torch/csrc/inductor/aoti_include/cuda.h>
	// Definition of AOTI runtime interface functions
	
	#include <torch/csrc/inductor/aoti_runtime/interface.h>
	#include <torch/csrc/inductor/aoti_runtime/model_container.h>
	
	#include <iostream>
	#include <vector>
	
	#define CONVERT_EXCEPTION_TO_ERROR_CODE(...)      \
	  try {                                           \
	    __VA_ARGS__                                   \
	  } catch (const std::exception& e) {             \
	    std::cerr << "Error: " << e.what() << '\n';   \
	    return AOTI_RUNTIME_FAILURE;                  \
	  } catch (...) {                                 \
	    std::cerr << "Unknown exception occurred.\n"; \
	    return AOTI_RUNTIME_FAILURE;                  \
	  }                                               \
	  return AOTI_RUNTIME_SUCCESS;
	
	#define AOTI_VECTOR_SIZE_CHECK(actual_size, expected_size, name)  \
	  do {                                                            \
	    AOTI_RUNTIME_CHECK(                                           \
	        actual_size == expected_size,                             \
	        "expected " + std::string(name) + " vector size to be " + \
	            std::to_string(expected_size) + ", but got " +        \
	            std::to_string(actual_size));                         \
	  } while (0)
	
	// AOTInductor uses at::addmm_out, which doesn't supports
	// arguments that requires gradient. For this reason, we
	// enforce no_grad context for run APIs.
	//
	// A RAII, thread local (!) guard that enables or disables grad mode upon
	// construction, and sets it back to the original value upon destruction.
	struct AOTINoGradGuard {
	  AOTINoGradGuard() {
	    aoti_torch_grad_mode_set_enabled(false);
	  }
	  AOTINoGradGuard(const AOTINoGradGuard&) = delete;
	  AOTINoGradGuard(AOTINoGradGuard&&) noexcept = delete;
	  ~AOTINoGradGuard() {
	    aoti_torch_grad_mode_set_enabled(prev_mode);
	  }
	  AOTINoGradGuard& operator=(const AOTINoGradGuard&) = delete;
	  AOTINoGradGuard& operator=(AOTINoGradGuard&&) noexcept = delete;
	  bool prev_mode{aoti_torch_grad_mode_is_enabled()};
	};
	
	extern "C" {
	
	AOTIRuntimeError AOTInductorModelContainerCreate(
	    AOTInductorModelContainerHandle* container_handle,
	    size_t num_models,
	    bool is_cpu,
	    const char* cubin_dir) {
	      return AOTInductorModelContainerCreateWithDevice(
	        container_handle,
	        num_models,
	        is_cpu ? "cpu" : "cuda",
	        cubin_dir);
	}
	
	AOTIRuntimeError AOTInductorModelContainerCreateWithDevice(
	    AOTInductorModelContainerHandle* container_handle,
	    size_t num_models,
	    const char* device_str,
	    const char* cubin_dir) {
	  if (num_models == 0) {
	    std::cerr << "Error: num_models must be positive, but got 0\n";
	    return AOTI_RUNTIME_FAILURE;
	  }
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    std::optional<std::string> cubin_dir_opt;
	    if (cubin_dir != nullptr) {
	      cubin_dir_opt.emplace(cubin_dir);
	    }
	    auto* container = new torch::aot_inductor::AOTInductorModelContainer(
	        num_models, std::string(device_str), cubin_dir_opt);
	    *container_handle =
	        reinterpret_cast<AOTInductorModelContainerHandle>(container);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerDelete(
	    AOTInductorModelContainerHandle container_handle) {
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    auto* container =
	        reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	            container_handle);
	    delete container;
	  });
	}
	
	AOTIRuntimeError AOTInductorModelContainerRun(
	    AOTInductorModelContainerHandle container_handle,
	    AtenTensorHandle* input_handles, // array of input AtenTensorHandle; handles
	                                     // are stolen; the array itself is borrowed
	    size_t num_inputs,
	    AtenTensorHandle*
	        output_handles, // array for writing output AtenTensorHandle; handles
	                        // will be stolen by the caller; the array itself is
	                        // borrowed
	    size_t num_outputs,
	    AOTInductorStreamHandle stream_handle,
	    AOTIProxyExecutorHandle proxy_executor_handle) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  AOTI_VECTOR_SIZE_CHECK(num_inputs, container->num_inputs(), "inputs");
	  AOTI_VECTOR_SIZE_CHECK(num_outputs, container->num_outputs(), "outputs");
	
	  auto stream =
	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    AOTINoGradGuard guard;
	    container->run(
	        input_handles, output_handles, stream, proxy_executor_handle);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerRunSingleThreaded(
	    AOTInductorModelContainerHandle container_handle,
	    AtenTensorHandle* input_handles, // array of input AtenTensorHandle; handles
	                                     // are stolen; the array itself is borrowed
	    size_t num_inputs,
	    AtenTensorHandle*
	        output_handles, // array for writing output AtenTensorHandle; handles
	                        // will be stolen by the caller; the array itself is
	                        // borrowed
	    size_t num_outputs,
	    AOTInductorStreamHandle stream_handle,
	    AOTIProxyExecutorHandle proxy_executor_handle) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  AOTI_VECTOR_SIZE_CHECK(num_inputs, container->num_inputs(), "inputs");
	  AOTI_VECTOR_SIZE_CHECK(num_outputs, container->num_outputs(), "outputs");
	
	  auto stream =
	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    AOTINoGradGuard guard;
	    container->run_single_threaded(
	        input_handles, output_handles, stream, proxy_executor_handle);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetNumConstants(
	    AOTInductorModelContainerHandle container_handle,
	    size_t* num_constants) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { *num_constants = container->num_constants(); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantName(
	    AOTInductorModelContainerHandle container_handle,
	    size_t idx,
	    const char** name) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { *name = container->constant_name(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantOriginalFQN(
	    AOTInductorModelContainerHandle container_handle,
	    size_t idx,
	    const char** original_fqn) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { *original_fqn = container->constant_original_fqn(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantFromFolded(
	    AOTInductorModelContainerHandle container_handle,
	    size_t idx,
	    bool* from_folded) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({ *from_folded = container->constant_from_folded(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantType(
	    AOTInductorModelContainerHandle container_handle,
	    size_t idx,
	    int32_t* type) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({ *type = container->constant_type(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantDtype(
	    AOTInductorModelContainerHandle container_handle,
	    size_t idx,
	    int32_t* dtype) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { *dtype = container->constant_dtype(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetConstantDataSize(
	  AOTInductorModelContainerHandle container_handle,
	  size_t idx,
	  size_t* data_size) {
	  auto* container =
	    reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	        container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { *data_size = container->constant_data_size(idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerExtractConstantsMap(
	    AOTInductorModelContainerHandle container_handle,
	    AOTInductorConstantMapHandle constant_map_handle,
	    bool use_inactive) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  auto constants_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	    { const auto ret = container->extract_constants_map(use_inactive);
	      for (const auto& pair: ret) {
	        constants_map->emplace(pair.first, pair.second);
	      }
	    })
	}
	
	AOTIRuntimeError AOTInductorModelContainerUpdateUserManagedConstantBuffer(
	    AOTInductorModelContainerHandle container_handle,
	    AOTInductorConstantMapHandle constant_map_handle,
	    bool use_inactive,
	    bool validate_full_update) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    container->update_constant_buffer(
	        *input_map, use_inactive, validate_full_update, /* user_managed = */ true);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerUpdateConstantBuffer(
	    AOTInductorModelContainerHandle container_handle,
	    AOTInductorConstantMapHandle constant_map_handle,
	    bool use_inactive,
	    bool validate_full_update) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    container->update_constant_buffer(
	        *input_map, use_inactive, validate_full_update);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerUpdateInactiveConstantBuffer(
	    AOTInductorModelContainerHandle container_handle,
	    AOTInductorConstantMapHandle constant_map_handle) {
	  return AOTInductorModelContainerUpdateConstantBuffer(container_handle,
	          constant_map_handle,
	          /*use_inactive*/ true,
	          /*validate_full_update*/ true);
	}
	
	AOTIRuntimeError AOTInductorModelContainerFreeInactiveConstantBuffer(
	    AOTInductorModelContainerHandle container_handle) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    container->free_inactive_constant_buffer();
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerRunConstantFolding(
	    AOTInductorModelContainerHandle container_handle,
	    bool use_inactive,
	    AOTInductorStreamHandle stream_handle,
	    AOTIProxyExecutorHandle proxy_executor_handle) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  auto stream =
	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    AOTINoGradGuard guard;
	    container->run_const_fold(use_inactive, stream, proxy_executor_handle);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerSwapConstantBuffer(
	    AOTInductorModelContainerHandle container_handle) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    container->swap_constant_buffer();
	  })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetNumInputs(
	    AOTInductorModelContainerHandle container_handle,
	    size_t* ret_num_inputs) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	      { *ret_num_inputs = container->num_inputs(); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetInputName(
	    AOTInductorModelContainerHandle container_handle,
	    size_t input_idx,
	    const char** ret_input_names) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	      { *ret_input_names = container->input_name(input_idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetNumOutputs(
	    AOTInductorModelContainerHandle container_handle,
	    size_t* ret_num_outputs) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	      { *ret_num_outputs = container->num_outputs(); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetOutputName(
	    AOTInductorModelContainerHandle container_handle,
	    size_t output_idx,
	    const char** ret_output_names) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE(
	      { *ret_output_names = container->output_name(output_idx); })
	}
	
	AOTIRuntimeError AOTInductorModelContainerGetCallSpec(
	    AOTInductorModelContainerHandle container_handle,
	    const char** in_spec,
	    const char** out_spec) {
	  auto* container =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
	          container_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    *in_spec = container->get_in_spec();
	    *out_spec = container->get_out_spec();
	  })
	}
	
	AOTIRuntimeError AOTInductorModelCreate(
	    AOTInductorModelHandle* model_handle,
	    AOTInductorConstantMapHandle constant_map_handle){
	    CONVERT_EXCEPTION_TO_ERROR_CODE({
	      auto constant_map = std::make_shared<torch::aot_inductor::ConstantMap>();
	      auto constant_array = std::make_shared<std::vector<torch::aot_inductor::ConstantHandle>>();
	      auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
	
	      auto model = new torch::aot_inductor::AOTInductorModel(
	          constant_map,
	          constant_array,
	          "cpu", // device_str is hardcoded, as AOTInductorModelCreate is only use for CPU models
	          ""
	      );
	
	      if (input_map) {
	        for (auto const& kv : *input_map) {
	          constant_map->emplace(kv.first, kv.second);
	        }
	      } else {
	        model->load_constants();
	      }
	
	      *model_handle = reinterpret_cast<AOTInductorModelHandle>(model);
	    })}
	
	AOTIRuntimeError AOTInductorModelRun(
	    AOTInductorModelHandle model_handle,
	    AtenTensorHandle* input_handles,
	    AtenTensorHandle* output_handles) {
	  auto model =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    AOTINoGradGuard guard;
	    model->run_impl(
	        input_handles,
	        output_handles,
	        (torch::aot_inductor::DeviceStreamType) nullptr,
	        nullptr);
	  })
	}
	
	AOTIRuntimeError AOTInductorModelDelete(AOTInductorModelHandle model_handle){
	    CONVERT_EXCEPTION_TO_ERROR_CODE({
	      auto model = reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(
	          model_handle);
	      delete model;
	    })}
	
	AOTIRuntimeError AOTInductorModelGetNumOutputs(
	    AOTInductorModelHandle model_handle,
	    size_t* ret_num_outputs) {
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	      auto model = reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
	      *ret_num_outputs = model->num_outputs();
	  })
	}
	
	AOTIRuntimeError AOTInductorModelUpdateConstantsMap(
	    AOTInductorModelHandle model_handle,
	    AOTInductorConstantMapHandle constant_map_handle) {
	  auto model =
	      reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
	  CONVERT_EXCEPTION_TO_ERROR_CODE({
	    auto constant_map = std::make_shared<torch::aot_inductor::ConstantMap>();
	    auto input_map =
	        reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(
	            constant_map_handle);
	
	    for (auto const& kv : *input_map) {
	      constant_map->emplace(kv.first, kv.second);
	    }
	    model->update_constants_map(std::move(constant_map));
	  })
	}
	
	} // extern "C"
	
	
	#define CUDA_DRIVER_CHECK(EXPR)                    \
	do {                                               \
	    CUresult code = EXPR;                          \
	    const char *msg;                               \
	    CUresult code_get_error = cuGetErrorString(code, &msg); \
	    if (code_get_error != CUDA_SUCCESS) {          \
	        throw std::runtime_error(                  \
	            std::string("CUDA driver error: ") +   \
	            std::string("invalid error code!"));   \
	    }                                              \
	    if (code != CUDA_SUCCESS) {                    \
	        throw std::runtime_error(                  \
	            std::string("CUDA driver error: ") +   \
	            std::string(msg));                     \
	    }                                              \
	} while (0);
	
	static inline CUfunction loadKernel(
	        std::string filePath,
	        const std::string &funcName,
	        uint32_t sharedMemBytes,
	        const std::optional<std::string> &cubinDir = std::nullopt) {
	    if (cubinDir) {
	        std::filesystem::path p1{*cubinDir};
	        std::filesystem::path p2{filePath};
	        filePath = (p1 / p2.filename()).string();
	    }
	
	    CUmodule mod;
	    CUfunction func;
	    CUDA_DRIVER_CHECK(cuModuleLoad(&mod, filePath.c_str()));
	    CUDA_DRIVER_CHECK(cuModuleGetFunction(&func, mod, funcName.c_str()));
	    if (sharedMemBytes > 0) {
	        CUDA_DRIVER_CHECK(cuFuncSetAttribute(
	            func,
	            CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
	            sharedMemBytes
	        ))
	    }
	    return func;
	}
	
	static inline CUfunction loadKernel(const void* start, const std::string &funcName, uint32_t sharedMemBytes) {
	    CUmodule mod;
	    CUfunction func;
	    CUDA_DRIVER_CHECK(cuModuleLoadData(&mod, start));
	    CUDA_DRIVER_CHECK(cuModuleGetFunction(&func, mod, funcName.c_str()));
	    if (sharedMemBytes > 0) {
	        CUDA_DRIVER_CHECK(cuFuncSetAttribute(
	            func,
	            CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
	            sharedMemBytes
	        ))
	    }
	    return func;
	}
	
	static inline void launchKernel(
	        CUfunction func,
	        uint32_t gridX,
	        uint32_t gridY,
	        uint32_t gridZ,
	        uint32_t numWarps,
	        uint32_t sharedMemBytes,
	        void* args[],
	        cudaStream_t stream) {
	    CUDA_DRIVER_CHECK(cuLaunchKernel(
	        func, gridX, gridY, gridZ, 32*numWarps, 1, 1, sharedMemBytes, stream, args, nullptr
	    ));
	}
	CACHE_TORCH_DTYPE(float32);
	CACHE_TORCH_DEVICE(cuda);
	CACHE_TORCH_LAYOUT(strided);
	namespace torch::aot_inductor {
	namespace {
	class AOTInductorModelKernels : public AOTInductorModelKernelsBase {
	  public:
	    CUfunction triton_poi_fused_addmm_gelu_2{nullptr};
	    CUfunction triton_poi_fused_addmm_relu_sigmoid_0{nullptr};
	    CUfunction triton_poi_fused_mul_1{nullptr};
	};
	}  // namespace
	
	
	
	AOTInductorModel::AOTInductorModel(std::shared_ptr<ConstantMap> constants_map,
	                                   std::shared_ptr<std::vector<ConstantHandle>> constants_array,
	                                   const std::string& device_str,
	                                   std::optional<std::string> cubin_dir)
	    : AOTInductorModelBase(4,
	                           2,
	                           2,
	                           device_str,
	                           std::move(cubin_dir),
	                           true) {
	    inputs_info_[0].name = "arg2_1";
	    inputs_info_[1].name = "arg3_1";
	    inputs_info_[2].name = "arg4_1";
	    inputs_info_[3].name = "arg5_1";
	    constants_info_[0].name = "fc1_weight";
	    constants_info_[0].dtype = static_cast<int32_t>(cached_torch_dtype_float32);
	    constants_info_[0].offset = 0;
	    constants_info_[0].data_size = 640;
	    constants_info_[0].from_folded = false;
	    constants_info_[0].type = static_cast<int32_t>(torch::aot_inductor::ConstantType::Parameter);
	    constants_info_[0].shape = {16, 10};
	    constants_info_[0].stride = {10, 1};
	    constants_info_[0].layout = static_cast<int32_t>(cached_torch_layout_strided);
	    constants_info_[0].original_fqn = "fc1.weight";
	    constants_info_[1].name = "fc1_bias";
	    constants_info_[1].dtype = static_cast<int32_t>(cached_torch_dtype_float32);
	    constants_info_[1].offset = 0;
	    constants_info_[1].data_size = 64;
	    constants_info_[1].from_folded = false;
	    constants_info_[1].type = static_cast<int32_t>(torch::aot_inductor::ConstantType::Parameter);
	    constants_info_[1].shape = {16};
	    constants_info_[1].stride = {1};
	    constants_info_[1].layout = static_cast<int32_t>(cached_torch_layout_strided);
	    constants_info_[1].original_fqn = "fc1.bias";
	    update_constants_map(std::move(constants_map));
	    update_constants_array(std::move(constants_array));
	    in_spec_ = R"([1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}, {"type": "builtins.dict", "context": "[]", "children_spec": []}]}])";
	    out_spec_ = R"([1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}])";
	    outputs_info_[0].name = "output0";
	    outputs_info_[1].name = "output1";
	    this->kernels_ = std::make_unique<AOTInductorModelKernels>();
	}
	
	std::unordered_map<std::string, AtenTensorHandle> AOTInductorModel::const_run_impl(
	    DeviceStreamType stream,
	    AOTIProxyExecutorHandle proxy_executor,
	    bool initialization
	) {
	
	    if (!initialization) {
	        std::cerr << "[WARNING] Calling constant_folding in model, but compiled with config: "
	                  << "aot_inductor.use_runtime_constant_folding=False\n";
	    }
	    return {};
	}
	} // namespace torch::aot_inductor
	using namespace torch::aot_inductor;
	
	template <typename in_out_ptr0_type_, typename in_ptr0_type_, typename kernels_type_>
	static inline void call_triton_poi_fused_addmm_relu_sigmoid_0(
	    const in_out_ptr0_type_& in_out_ptr0,
	    const in_ptr0_type_& in_ptr0,
	    int64_t xnumel,
	    int32_t device_idx_,
	    cudaStream_t stream_,
	    kernels_type_& kernels_,
	    const std::optional<std::string>& cubin_dir_ = std::nullopt
	){
	    /*
	    async_compile.triton('triton_poi_fused_addmm_relu_sigmoid_0', '''
	    import triton
	    import triton.language as tl
	
	    from torch._inductor.runtime import triton_helpers, triton_heuristics
	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	    triton_helpers.set_driver_to_gpu()
	
	    @triton_heuristics.pointwise(
	        size_hints={'x': 128}, 
	        filename=__file__,
	        triton_meta={'signature': {'in_out_ptr0': '*fp32', 'in_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]], (2,): [['tt.divisibility', 16]]}]},
	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_relu_sigmoid_0', 'mutated_arg_names': ['in_out_ptr0'], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
	        min_elem_per_thread=0
	    )
	    @triton.jit
	    def triton_poi_fused_addmm_relu_sigmoid_0(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
	        xnumel = 128
	        xoffset = tl.program_id(0) * XBLOCK
	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
	        xmask = xindex < xnumel
	        x2 = xindex
	        x0 = (xindex % 16)
	        tmp0 = tl.load(in_out_ptr0 + (x2), xmask)
	        tmp1 = tl.load(in_ptr0 + (x0), xmask, eviction_policy='evict_last')
	        tmp2 = tmp0 + tmp1
	        tmp3 = tl.full([1], 0, tl.int32)
	        tmp4 = triton_helpers.maximum(tmp3, tmp2)
	        tmp5 = tl.sigmoid(tmp4)
	        tl.store(in_out_ptr0 + (x2), tmp5, xmask)
	    ''', device_str='cuda')
	    */
	    uint32_t grid_0 = ((xnumel + (128 - 1)) / (128));
	    uint32_t grid_1 = 1;
	    uint32_t grid_2 = 1;
	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
	    if (kernels_.triton_poi_fused_addmm_relu_sigmoid_0 == nullptr) {
	        kernels_.triton_poi_fused_addmm_relu_sigmoid_0 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/clccbkkoi5xmntp42dsrk4vjjdegwzvxj2dkuqgrmyblionocbn2.cubin", "triton_poi_fused_addmm_relu_sigmoid_0", 0, cubin_dir_); 
	    }
	    CUdeviceptr var_0 = reinterpret_cast<CUdeviceptr>(in_out_ptr0.data_ptr());
	    CUdeviceptr var_1 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
	    int var_2 = xnumel;
	    CUdeviceptr global_scratch_scratch_3 = 0;
	    void* kernel_args_[] = {&var_0, &var_1, &var_2, &global_scratch_scratch_3};
	    launchKernel(kernels_.triton_poi_fused_addmm_relu_sigmoid_0, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
	}
	
	template <typename in_ptr0_type_, typename out_ptr0_type_, typename kernels_type_>
	static inline void call_triton_poi_fused_mul_1(
	    const in_ptr0_type_& in_ptr0,
	    const out_ptr0_type_& out_ptr0,
	    int64_t xnumel,
	    int32_t device_idx_,
	    cudaStream_t stream_,
	    kernels_type_& kernels_,
	    const std::optional<std::string>& cubin_dir_ = std::nullopt
	){
	    /*
	    async_compile.triton('triton_poi_fused_mul_1', '''
	    import triton
	    import triton.language as tl
	
	    from torch._inductor.runtime import triton_helpers, triton_heuristics
	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	    triton_helpers.set_driver_to_gpu()
	
	    @triton_heuristics.pointwise(
	        size_hints={'x': 256}, 
	        filename=__file__,
	        triton_meta={'signature': {'in_ptr0': '*fp32', 'out_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_mul_1', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 1, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
	        min_elem_per_thread=0
	    )
	    @triton.jit
	    def triton_poi_fused_mul_1(in_ptr0, out_ptr0, xnumel, XBLOCK : tl.constexpr):
	        xnumel = 200
	        xoffset = tl.program_id(0) * XBLOCK
	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
	        xmask = xindex < xnumel
	        x0 = xindex
	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
	        tmp1 = 3.14
	        tmp2 = tmp0 * tmp1
	        tl.store(out_ptr0 + (x0), tmp2, xmask)
	    ''', device_str='cuda')
	    */
	    uint32_t grid_0 = ((xnumel + (256 - 1)) / (256));
	    uint32_t grid_1 = 1;
	    uint32_t grid_2 = 1;
	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
	    if (kernels_.triton_poi_fused_mul_1 == nullptr) {
	        kernels_.triton_poi_fused_mul_1 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/cwu4klxzdejby66dnoubpn5xgs6wb5eomanc62dcz5a42lgsz7uz.cubin", "triton_poi_fused_mul_1", 0, cubin_dir_); 
	    }
	    CUdeviceptr var_4 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
	    CUdeviceptr var_5 = reinterpret_cast<CUdeviceptr>(out_ptr0.data_ptr());
	    int var_6 = xnumel;
	    CUdeviceptr global_scratch_scratch_7 = 0;
	    void* kernel_args_[] = {&var_4, &var_5, &var_6, &global_scratch_scratch_7};
	    launchKernel(kernels_.triton_poi_fused_mul_1, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
	}
	
	template <typename in_out_ptr0_type_, typename in_ptr0_type_, typename kernels_type_>
	static inline void call_triton_poi_fused_addmm_gelu_2(
	    const in_out_ptr0_type_& in_out_ptr0,
	    const in_ptr0_type_& in_ptr0,
	    int64_t xnumel,
	    int32_t device_idx_,
	    cudaStream_t stream_,
	    kernels_type_& kernels_,
	    const std::optional<std::string>& cubin_dir_ = std::nullopt
	){
	    /*
	    async_compile.triton('triton_poi_fused_addmm_gelu_2', '''
	    import triton
	    import triton.language as tl
	
	    from torch._inductor.runtime import triton_helpers, triton_heuristics
	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	    triton_helpers.set_driver_to_gpu()
	
	    @triton_heuristics.pointwise(
	        size_hints={'x': 512}, 
	        filename=__file__,
	        triton_meta={'signature': {'in_out_ptr0': '*fp32', 'in_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_gelu_2', 'mutated_arg_names': ['in_out_ptr0'], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
	        min_elem_per_thread=0
	    )
	    @triton.jit
	    def triton_poi_fused_addmm_gelu_2(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
	        xnumel = 300
	        xoffset = tl.program_id(0) * XBLOCK
	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
	        xmask = xindex < xnumel
	        x0 = xindex
	        tmp0 = tl.load(in_out_ptr0 + (x0), xmask)
	        tmp1 = tl.load(in_ptr0 + (x0), xmask)
	        tmp2 = tmp0 + tmp1
	        tmp3 = 0.5
	        tmp4 = tmp2 * tmp3
	        tmp5 = 0.7071067811865476
	        tmp6 = tmp2 * tmp5
	        tmp7 = libdevice.erf(tmp6)
	        tmp8 = 1.0
	        tmp9 = tmp7 + tmp8
	        tmp10 = tmp4 * tmp9
	        tl.store(in_out_ptr0 + (x0), tmp10, xmask)
	    ''', device_str='cuda')
	    */
	    uint32_t grid_0 = ((xnumel + (256 - 1)) / (256));
	    uint32_t grid_1 = 1;
	    uint32_t grid_2 = 1;
	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
	    if (kernels_.triton_poi_fused_addmm_gelu_2 == nullptr) {
	        kernels_.triton_poi_fused_addmm_gelu_2 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/ccwrkckegnvy2eonhehywcr42tj5q645p2oguulvb3gphpowfpp3.cubin", "triton_poi_fused_addmm_gelu_2", 0, cubin_dir_); 
	    }
	    CUdeviceptr var_8 = reinterpret_cast<CUdeviceptr>(in_out_ptr0.data_ptr());
	    CUdeviceptr var_9 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
	    int var_10 = xnumel;
	    CUdeviceptr global_scratch_scratch_11 = 0;
	    void* kernel_args_[] = {&var_8, &var_9, &var_10, &global_scratch_scratch_11};
	    launchKernel(kernels_.triton_poi_fused_addmm_gelu_2, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
	}
	
	namespace torch::aot_inductor {
	
	void AOTInductorModel::_const_run_impl(
	    std::vector<AtenTensorHandle>& output_handles,
	    DeviceStreamType stream,
	    AOTIProxyExecutorHandle proxy_executor
	) {}
	
	AOTI_NOINLINE static void check_input_0(
	    AtenTensorHandle* input_handles
	) {
	    ConstantHandle arg2_1 = ConstantHandle(input_handles[0]);
	    int32_t arg2_1_dtype;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg2_1, &arg2_1_dtype));
	
	    int32_t arg2_1_expected_dtype = aoti_torch_dtype_float32();
	    if (arg2_1_expected_dtype != arg2_1_dtype) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched dtype, "
	           << "expected: " << arg2_1_expected_dtype << "(at::kFloat), "
	           << "but got: " << arg2_1_dtype << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg2_1_size = arg2_1.sizes();
	
	    if (8 != arg2_1_size[0]) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched dim value at 0, "
	           << "expected: 8, " << "but got: " << arg2_1_size[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (10 != arg2_1_size[1]) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched dim value at 1, "
	           << "expected: 10, " << "but got: " << arg2_1_size[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg2_1_stride = arg2_1.strides();
	
	    if (10 != arg2_1_stride[0]) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched stride value at 0, "
	           << "expected: 10, " << "but got: " << arg2_1_stride[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (1 != arg2_1_stride[1]) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched stride value at 1, "
	           << "expected: 1, " << "but got: " << arg2_1_stride[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    int32_t arg2_1_device_type;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg2_1, &arg2_1_device_type));
	
	    int32_t arg2_1_expected_device_type = 1;
	    if (arg2_1_expected_device_type != arg2_1_device_type) {
	        std::stringstream ss;
	        ss << "input_handles[0]: unmatched device type, "
	        << "expected: " << arg2_1_expected_device_type << "1(cuda), "
	        << "but got: " << arg2_1_device_type << "\n";
	        throw std::runtime_error(ss.str());
	    }
	}
	
	AOTI_NOINLINE static void check_input_1(
	    AtenTensorHandle* input_handles
	) {
	    ConstantHandle arg3_1 = ConstantHandle(input_handles[1]);
	    int32_t arg3_1_dtype;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg3_1, &arg3_1_dtype));
	
	    int32_t arg3_1_expected_dtype = aoti_torch_dtype_float32();
	    if (arg3_1_expected_dtype != arg3_1_dtype) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched dtype, "
	           << "expected: " << arg3_1_expected_dtype << "(at::kFloat), "
	           << "but got: " << arg3_1_dtype << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg3_1_size = arg3_1.sizes();
	
	    if (10 != arg3_1_size[0]) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched dim value at 0, "
	           << "expected: 10, " << "but got: " << arg3_1_size[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (20 != arg3_1_size[1]) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched dim value at 1, "
	           << "expected: 20, " << "but got: " << arg3_1_size[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg3_1_stride = arg3_1.strides();
	
	    if (20 != arg3_1_stride[0]) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched stride value at 0, "
	           << "expected: 20, " << "but got: " << arg3_1_stride[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (1 != arg3_1_stride[1]) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched stride value at 1, "
	           << "expected: 1, " << "but got: " << arg3_1_stride[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    int32_t arg3_1_device_type;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg3_1, &arg3_1_device_type));
	
	    int32_t arg3_1_expected_device_type = 1;
	    if (arg3_1_expected_device_type != arg3_1_device_type) {
	        std::stringstream ss;
	        ss << "input_handles[1]: unmatched device type, "
	        << "expected: " << arg3_1_expected_device_type << "1(cuda), "
	        << "but got: " << arg3_1_device_type << "\n";
	        throw std::runtime_error(ss.str());
	    }
	}
	
	AOTI_NOINLINE static void check_input_2(
	    AtenTensorHandle* input_handles
	) {
	    ConstantHandle arg4_1 = ConstantHandle(input_handles[2]);
	    int32_t arg4_1_dtype;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg4_1, &arg4_1_dtype));
	
	    int32_t arg4_1_expected_dtype = aoti_torch_dtype_float32();
	    if (arg4_1_expected_dtype != arg4_1_dtype) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched dtype, "
	           << "expected: " << arg4_1_expected_dtype << "(at::kFloat), "
	           << "but got: " << arg4_1_dtype << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg4_1_size = arg4_1.sizes();
	
	    if (20 != arg4_1_size[0]) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched dim value at 0, "
	           << "expected: 20, " << "but got: " << arg4_1_size[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (30 != arg4_1_size[1]) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched dim value at 1, "
	           << "expected: 30, " << "but got: " << arg4_1_size[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg4_1_stride = arg4_1.strides();
	
	    if (30 != arg4_1_stride[0]) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched stride value at 0, "
	           << "expected: 30, " << "but got: " << arg4_1_stride[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (1 != arg4_1_stride[1]) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched stride value at 1, "
	           << "expected: 1, " << "but got: " << arg4_1_stride[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    int32_t arg4_1_device_type;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg4_1, &arg4_1_device_type));
	
	    int32_t arg4_1_expected_device_type = 1;
	    if (arg4_1_expected_device_type != arg4_1_device_type) {
	        std::stringstream ss;
	        ss << "input_handles[2]: unmatched device type, "
	        << "expected: " << arg4_1_expected_device_type << "1(cuda), "
	        << "but got: " << arg4_1_device_type << "\n";
	        throw std::runtime_error(ss.str());
	    }
	}
	
	AOTI_NOINLINE static void check_input_3(
	    AtenTensorHandle* input_handles
	) {
	    ConstantHandle arg5_1 = ConstantHandle(input_handles[3]);
	    int32_t arg5_1_dtype;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg5_1, &arg5_1_dtype));
	
	    int32_t arg5_1_expected_dtype = aoti_torch_dtype_float32();
	    if (arg5_1_expected_dtype != arg5_1_dtype) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched dtype, "
	           << "expected: " << arg5_1_expected_dtype << "(at::kFloat), "
	           << "but got: " << arg5_1_dtype << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg5_1_size = arg5_1.sizes();
	
	    if (10 != arg5_1_size[0]) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched dim value at 0, "
	           << "expected: 10, " << "but got: " << arg5_1_size[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (30 != arg5_1_size[1]) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched dim value at 1, "
	           << "expected: 30, " << "but got: " << arg5_1_size[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    auto arg5_1_stride = arg5_1.strides();
	
	    if (30 != arg5_1_stride[0]) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched stride value at 0, "
	           << "expected: 30, " << "but got: " << arg5_1_stride[0]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	
	    if (1 != arg5_1_stride[1]) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched stride value at 1, "
	           << "expected: 1, " << "but got: " << arg5_1_stride[1]
	           << "\n";
	        throw std::runtime_error(ss.str());
	    }
	    int32_t arg5_1_device_type;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg5_1, &arg5_1_device_type));
	
	    int32_t arg5_1_expected_device_type = 1;
	    if (arg5_1_expected_device_type != arg5_1_device_type) {
	        std::stringstream ss;
	        ss << "input_handles[3]: unmatched device type, "
	        << "expected: " << arg5_1_expected_device_type << "1(cuda), "
	        << "but got: " << arg5_1_device_type << "\n";
	        throw std::runtime_error(ss.str());
	    }
	}
	
	static bool _check_aoti_runtime_check_inputs_env() {
	    const static char* env_var_value = getenv("AOTI_RUNTIME_CHECK_INPUTS");
	    const static bool result = env_var_value != nullptr && env_var_value[0] != '0';
	    return result;
	}
	
	AOTI_NOINLINE static void __check_inputs_outputs(
	    AtenTensorHandle* input_handles,
	    AtenTensorHandle* output_handles) {
	    if (!_check_aoti_runtime_check_inputs_env()){
	        return;
	    }
	    check_input_0(input_handles);
	    check_input_1(input_handles);
	    check_input_2(input_handles);
	    check_input_3(input_handles);
	}
	
	void AOTInductorModel::run_impl(
	    AtenTensorHandle*
	        input_handles, // array of input AtenTensorHandle; handles
	                        // are stolen; the array itself is borrowed
	    AtenTensorHandle*
	        output_handles, // array for writing output AtenTensorHandle; handles
	                        // will be stolen by the caller; the array itself is
	                        // borrowed
	    DeviceStreamType stream,
	    AOTIProxyExecutorHandle proxy_executor
	) {
	    __check_inputs_outputs(input_handles, output_handles);
	
	    auto inputs = steal_from_raw_handles_to_raii_handles(input_handles, 4);
	    auto arg2_1 = std::move(inputs[0]);
	    auto arg3_1 = std::move(inputs[1]);
	    auto arg4_1 = std::move(inputs[2]);
	    auto arg5_1 = std::move(inputs[3]);
	    [[maybe_unused]] auto& fc1_weight = constants_->at(0);
	    [[maybe_unused]] auto& fc1_bias = constants_->at(1);
	    inputs.clear();
	    [[maybe_unused]] auto& kernels = static_cast<AOTInductorModelKernels&>(*this->kernels_.get());
	
	    AOTICudaStreamGuard stream_guard(stream, this->device_idx_);
	    static constexpr int64_t int_array_0[] = {8L, 16L};
	    static constexpr int64_t int_array_1[] = {16L, 1L};
	    AtenTensorHandle buf0_handle;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_0, int_array_1, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf0_handle));
	    RAIIAtenTensorHandle buf0(buf0_handle);
	    // Topologically Sorted Source Nodes: [linear, ], Original ATen: [aten.t, aten.addmm]
	    static constexpr int64_t int_array_2[] = {10L, 16L};
	    static constexpr int64_t int_array_3[] = {1L, 10L};
	    // [Provenance debug handles] aoti_torch_cuda_mm_out:4
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_cuda_mm_out(buf0, arg2_1, wrap_with_raii_handle_if_needed(reinterpret_tensor_wrapper(fc1_weight, 2, int_array_2, int_array_3, 0L))));
	    arg2_1.reset();
	    auto buf1 = std::move(buf0);  // reuse
	    // Topologically Sorted Source Nodes: [, relu, sigmoid], Original ATen: [aten.addmm, aten.relu, aten.sigmoid]
	    // [Provenance debug handles] triton_poi_fused_addmm_relu_sigmoid_0:1
	    call_triton_poi_fused_addmm_relu_sigmoid_0(buf1, fc1_bias, 128L, this->device_idx_, stream, kernels, this->cubin_dir_);
	    static constexpr int64_t int_array_4[] = {10L, 20L};
	    static constexpr int64_t int_array_5[] = {20L, 1L};
	    AtenTensorHandle buf2_handle;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_4, int_array_5, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf2_handle));
	    RAIIAtenTensorHandle buf2(buf2_handle);
	    // Topologically Sorted Source Nodes: [mul], Original ATen: [aten.mul]
	    // [Provenance debug handles] triton_poi_fused_mul_1:2
	    call_triton_poi_fused_mul_1(arg3_1, buf2, 200L, this->device_idx_, stream, kernels, this->cubin_dir_);
	    arg3_1.reset();
	    static constexpr int64_t int_array_6[] = {10L, 30L};
	    static constexpr int64_t int_array_7[] = {30L, 1L};
	    AtenTensorHandle buf3_handle;
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_6, int_array_7, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf3_handle));
	    RAIIAtenTensorHandle buf3(buf3_handle);
	    // Topologically Sorted Source Nodes: [mul, ], Original ATen: [aten.mul, aten.addmm]
	    // [Provenance debug handles] aoti_torch_cuda_mm_out:5
	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_cuda_mm_out(buf3, buf2, arg4_1));
	    arg4_1.reset();
	    buf2.reset();
	    auto buf4 = std::move(buf3);  // reuse
	    // Topologically Sorted Source Nodes: [, gelu], Original ATen: [aten.addmm, aten.gelu]
	    // [Provenance debug handles] triton_poi_fused_addmm_gelu_2:3
	    call_triton_poi_fused_addmm_gelu_2(buf4, arg5_1, 300L, this->device_idx_, stream, kernels, this->cubin_dir_);
	    arg5_1.reset();
	    output_handles[0] = buf1.release();
	    output_handles[1] = buf4.release();
	} // AOTInductorModel::run_impl
	} // namespace torch::aot_inductor
	
	
	
	
V0819 12:17:15.848000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/codecache.py:1799] {"graph_dump": {"name": "inductor_aot_kernel_code", "type": "cpp", "filename": "/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/cb4ashn4alx6bnx7tb5oh4tbrnwu3vng2clhvxxaf45plbd7wmsn.kernel.cpp"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1490, "name": "codegen_and_compile", "filename": 21, "loc": "compiled_fn = AotCodeCompiler.compile("}, {"line": 1799, "name": "compile", "filename": 34, "loc": "trace_structured("}], "has_payload": "b01447e33a18a899a90703e2f10b6675"}
	// Triton kernels are embedded as comments in /tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/c2zi7pbvbb6r2z2ilqqn22mpt7jxdy72w5fymrtjqrpewk5akujk.wrapper.cpp
	
V0819 12:17:15.856000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2902a73f3334687482e4d85513a4e5b8"}
	{
	"name": "compile_file",
	"ts": 1755631035856672.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.379000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8368a694a2061e76b9bf8bd3681297c6"}
	{
	"name": "compile_file",
	"ts": 1755631040379538.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.381000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3cfc6d83d56b375202e1981736f37720"}
	{
	"name": "compile_file",
	"ts": 1755631040381197.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.412000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "737dac716e47f7382fdcf8719e6285c7"}
	{
	"name": "compile_file",
	"ts": 1755631040412038.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.416000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5db9b489851c626c900408e3a2b39535"}
	{
	"name": "compile_file",
	"ts": 1755631040416577.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.445000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d1f583a80da80193320f0b83cee5115e"}
	{
	"name": "compile_file",
	"ts": 1755631040445140.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.449000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8cd5c84d309cccb04e67b5c8640f9d2a"}
	{
	"name": "compile_file",
	"ts": 1755631040449043.0,
	"args": {
	"compile_id": "None"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.486000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ef79323064969b3cdaeb047d0bab0fcf"}
	{
	"name": "compile_file",
	"ts": 1755631040485729.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.487000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3d30c5d871af47f489170e67717d1543"}
	{
	"name": "AotCodeCompiler.compile",
	"ts": 1755631040487847.2,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.491000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6d0e402e60e25eb6033cef1f2ed7ec09"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1755631040491533.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.496000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/utils.py", 35]}
V0819 12:17:20.496000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1985] {"chromium_event": {}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1025, "name": "_compile_fx_inner", "filename": 21, "loc": "CompileEventLogger.instant("}, {"line": 616, "name": "instant", "filename": 35, "loc": "CompileEventLogger.log_instant_event("}, {"line": 410, "name": "log_instant_event", "filename": 35, "loc": "chromium_log.log_instant_event("}, {"line": 1985, "name": "log_instant_event", "filename": 35, "loc": "torch._logging.trace_structured("}], "has_payload": "899dba35067fad12333b53168351cea7"}
	{
	"name": "fx_graph_cache_disabled",
	"ts": 1755631034354989.8,
	"args": {
	"compile_id": "None"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V0819 12:17:20.497000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5dc602214c67c8b4c3fd5121c69f7566"}
	{
	"name": "fx_codegen_and_compile",
	"ts": 1755631040497217.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.500000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1063] {"artifact": {"name": "inductor_provenance_tracking_node_mappings", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1063, "name": "_compile_fx_inner", "filename": 21, "loc": "trace_structured("}], "has_payload": "589a4aa33cba4db6a5e0f1e82b52c551"}
	{"preToPost": {"linear": ["permute", "mm_default_1", "add_tensor_1"], "relu": ["relu"], "sigmoid": ["sigmoid"], "mul": ["mul"], "addmm": ["mm_default", "add_tensor"], "gelu": ["mul_1", "mul_2", "erf", "add", "mul_3"]}, "postToPre": {"permute": ["linear"], "mm_default_1": ["linear"], "add_tensor_1": ["linear"], "relu": ["relu"], "sigmoid": ["sigmoid"], "mul": ["mul"], "mm_default": ["addmm"], "add_tensor": ["addmm"], "mul_1": ["gelu"], "mul_2": ["gelu"], "erf": ["gelu"], "add": ["gelu"], "mul_3": ["gelu"]}, "cppCodeToPost": {"triton_poi_fused_addmm_relu_sigmoid_0:1": ["sigmoid", "relu", "add_tensor_1"], "triton_poi_fused_mul_1:2": ["mul"], "triton_poi_fused_addmm_gelu_2:3": ["mul_3", "mul_1", "add_tensor", "add", "erf", "mul_2"], "aoti_torch_cuda_mm_out:4": ["mm_default_1"], "aoti_torch_cuda_mm_out:5": ["mm_default"]}, "postToCppCode": {"sigmoid": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "relu": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "add_tensor_1": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "mul": ["triton_poi_fused_mul_1:2"], "mul_3": ["triton_poi_fused_addmm_gelu_2:3"], "mul_1": ["triton_poi_fused_addmm_gelu_2:3"], "add_tensor": ["triton_poi_fused_addmm_gelu_2:3"], "add": ["triton_poi_fused_addmm_gelu_2:3"], "erf": ["triton_poi_fused_addmm_gelu_2:3"], "mul_2": ["triton_poi_fused_addmm_gelu_2:3"], "mm_default_1": ["aoti_torch_cuda_mm_out:4"], "mm_default": ["aoti_torch_cuda_mm_out:5"]}, "version": 2.0}
V0819 12:17:20.501000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1073] {"artifact": {"name": "inductor_provenance_tracking_kernel_stack_traces", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1073, "name": "_compile_fx_inner", "filename": 21, "loc": "trace_structured("}], "has_payload": "2542f0a704bc078bccd4359742da5bd6"}
	{"triton_poi_fused_addmm_relu_sigmoid_0:1": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 81, in forward\n    x = self.sigmoid(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py\", line 359, in forward\n    return torch.sigmoid(input)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 80, in forward\n    x = self.relu(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py\", line 144, in forward\n    return F.relu(input, inplace=self.inplace)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py\", line 134, in forward\n    return F.linear(input, self.weight, self.bias)"], "triton_poi_fused_mul_1:2": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 82, in forward\n    d = a * 3.14"], "triton_poi_fused_addmm_gelu_2:3": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 84, in forward\n    z = torch.nn.functional.gelu(y)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)"], "aoti_torch_cuda_mm_out:4": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py\", line 134, in forward\n    return F.linear(input, self.weight, self.bias)"], "aoti_torch_cuda_mm_out:5": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)"]}
V0819 12:17:20.503000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "80f190ba3c221310881910c65dc1036b"}
	{
	"name": "inductor_compile",
	"ts": 1755631040502942.2,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "None",
	"is_backward": false,
	"cache_state": "disabled",
	"cache_event_time": 1755631034354989637,
	"key": null,
	"components": null,
	"cache_bypass_reason": "cache not enabled",
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.506000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "cbf3c70c8c3726d682875cddbf996f29"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1755631040506393.5,
	"args": {
	"compile_id": "None"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V0819 12:17:20.510000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/metrics_context.py", 36]}
V0819 12:17:20.511000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1641] {"compilation_metrics": {"compile_id": null, "frame_key": null, "co_name": null, "co_filename": null, "co_firstlineno": null, "cache_size": null, "accumulated_cache_size": null, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "start_time": 1755631031.215075, "entire_frame_compile_time_s": null, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": null, "compliant_custom_ops": null, "restart_reasons": null, "dynamo_time_before_restart_s": null, "stack_trace": null, "graph_node_shapes": null, "has_guarded_code": null, "remote_cache_time_saved_s": null, "structured_logging_overhead_s": null, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": null, "dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}", "is_forward": null, "num_triton_bundles": null, "remote_fx_graph_cache_get_time_ms": null, "remote_fx_graph_cache_put_time_ms": null, "start_time_us": 1755631031215075, "duration_us": null, "dynamo_cumulative_compile_time_us": null, "aot_autograd_cumulative_compile_time_us": null, "inductor_cumulative_compile_time_us": null, "inductor_code_gen_cumulative_compile_time_us": null, "triton_compile_time_us": null, "runtime_cudagraphify_time_us": null, "runtime_triton_autotune_time_us": null, "dynamo_compile_time_before_restart_us": null, "distributed_ephemeral_timeout_us": null, "structured_logging_overhead_us": null, "remote_fx_graph_cache_get_time_us": null, "remote_fx_graph_cache_put_time_us": null, "backward_cumulative_compile_time_us": null, "end_time_us": 1755631040507360, "pre_grad_pass_time_us": null, "post_grad_pass_time_us": null, "joint_graph_pass_time_us": null, "log_format_version": 3, "inductor_config": "{\"TYPE_CHECKING\": false, \"_cache_config_ignore_prefix\": [\"trace\", \"cuda.cutlass_dir\", \"worker_start_method\", \"compile_threads\", \"post_grad_custom_post_pass\", \"post_grad_custom_pre_pass\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\", \"always_complex_memory_overlap_TESTING_ONLY\", \"fx_graph_cache\", \"fx_graph_remote_cache\", \"autotune_local_cache\", \"autotune_remote_cache\"], \"_collective.auto_select\": false, \"_collective.one_shot_all_reduce_threshold_bytes\": 131072, \"_fuse_ddp_bucket_size\": 25, \"_fuse_ddp_communication\": false, \"_fuse_ddp_communication_passes\": [\"fuse_ddp_with_concat_op\", \"schedule_comm_wait\"], \"_micro_pipeline_tp\": false, \"_post_fusion_custom_pass\": null, \"_pre_fusion_custom_pass\": null, \"_profile_var\": \"\", \"_raise_error_for_testing\": false, \"_save_config_ignore\": [\"trace.upload_tar\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"pre_grad_custom_pass\", \"aot_inductor.repro_level\", \"aot_inductor.dump_aoti_minifier\", \"post_grad_custom_pre_pass\", \"post_grad_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\"], \"add_pre_grad_passes\": null, \"aggressive_fusion\": false, \"alignment_asserts\": false, \"allow_buffer_reuse\": true, \"always_complex_memory_overlap_TESTING_ONLY\": false, \"always_keep_tensor_constants\": false, \"annotate_training\": false, \"aot_inductor.allow_stack_allocation\": false, \"aot_inductor.compile_standalone\": false, \"aot_inductor.compile_wrapper_opt_level\": \"O1\", \"aot_inductor.custom_op_libs\": null, \"aot_inductor.custom_ops_to_c_shims\": {}, \"aot_inductor.debug_compile\": false, \"aot_inductor.debug_intermediate_value_printer\": \"0\", \"aot_inductor.dump_aoti_minifier\": false, \"aot_inductor.embed_kernel_binary\": null, \"aot_inductor.emit_multi_arch_kernel\": null, \"aot_inductor.enable_lto\": false, \"aot_inductor.filtered_kernel_names\": null, \"aot_inductor.force_mmap_weights\": false, \"aot_inductor.metadata\": {\"AOTI_DEVICE_KEY\": \"cuda\"}, \"aot_inductor.model_name_for_generated_files\": null, \"aot_inductor.output_path\": \"\", \"aot_inductor.package\": false, \"aot_inductor.package_constants_in_so\": true, \"aot_inductor.package_constants_on_disk\": false, \"aot_inductor.package_cpp_only\": null, \"aot_inductor.precompile_headers\": false, \"aot_inductor.presets\": {}, \"aot_inductor.raise_error_on_ignored_optimization\": true, \"aot_inductor.repro_level\": 2, \"aot_inductor.serialized_in_spec\": \"\", \"aot_inductor.serialized_out_spec\": \"\", \"aot_inductor.use_consts_asm_build\": true, \"aot_inductor.use_minimal_arrayref_interface\": false, \"aot_inductor.use_runtime_constant_folding\": false, \"aot_inductor.weight_use_caching_allocator\": false, \"assert_indirect_indexing\": true, \"assume_aligned_inputs\": false, \"assume_unaligned_fallback_output\": false, \"autoheuristic_collect\": \"\", \"autoheuristic_log_path\": \"DEFAULT\", \"autoheuristic_use\": \"mixed_mm\", \"autotune_fallback_to_aten\": false, \"autotune_in_subproc\": false, \"autotune_local_cache\": true, \"autotune_lookup_table\": {}, \"autotune_multi_device\": false, \"autotune_num_choices_displayed\": 10, \"autotune_remote_cache\": null, \"b2b_gemm_pass\": false, \"batch_fusion\": true, \"benchmark_combo_kernel\": false, \"benchmark_epilogue_fusion\": true, \"benchmark_fusion\": false, \"benchmark_harness\": true, \"benchmark_kernel\": false, \"bfloat16_atomic_adds_enabled\": true, \"bucket_all_gathers_fx\": \"none\", \"bucket_all_gathers_fx_bucket_size_determinator\": null, \"bucket_reduce_scatters_fx\": \"none\", \"bucket_reduce_scatters_fx_bucket_size_determinator\": null, \"bundle_triton_into_fx_graph_cache\": null, \"bundled_autotune_remote_cache\": null, \"bw_outputs_user_visible\": true, \"can_inplace_pad_graph_input\": false, \"check_stack_no_cycles_TESTING_ONLY\": false, \"combo_kernel_allow_mixed_sizes\": 1, \"combo_kernel_foreach_dynamic_shapes\": true, \"combo_kernels\": false, \"combo_kernels_autotune\": 1, \"comment_origin\": false, \"compile_threads\": 32, \"comprehensive_padding\": true, \"compute_all_bounds\": false, \"constant_and_index_propagation\": true, \"conv_1x1_as_mm\": false, \"coordinate_descent_check_all_directions\": false, \"coordinate_descent_search_radius\": 1, \"coordinate_descent_tuning\": false, \"cpp.cxx\": [null, \"g++\"], \"cpp.descriptive_names\": \"original_aten\", \"cpp.dynamic_threads\": false, \"cpp.enable_concat_linear\": false, \"cpp.enable_floating_point_contract_flag\": \"off\", \"cpp.enable_grouped_gemm_template\": false, \"cpp.enable_kernel_profile\": false, \"cpp.enable_loop_tail_vec\": true, \"cpp.enable_tiling_heuristics\": true, \"cpp.enable_unsafe_math_opt_flag\": false, \"cpp.fallback_scatter_reduce_sum\": true, \"cpp.force_inline_kernel\": false, \"cpp.gemm_cache_blocking\": null, \"cpp.gemm_max_k_slices\": 1, \"cpp.gemm_thread_factors\": null, \"cpp.inject_log1p_bug_TESTING_ONLY\": null, \"cpp.inject_relu_bug_TESTING_ONLY\": null, \"cpp.max_horizontal_fusion_size\": 16, \"cpp.min_chunk_size\": 512, \"cpp.no_redundant_loops\": true, \"cpp.simdlen\": null, \"cpp.threads\": -1, \"cpp.use_decompose_tanh\": false, \"cpp.use_small_dequant_buffer\": false, \"cpp.vec_isa_ok\": null, \"cpp.weight_prepack\": true, \"cpp_cache_precompile_headers\": false, \"cpp_wrapper\": false, \"cpp_wrapper_build_separate\": false, \"cpu_backend\": \"cpp\", \"cuda.arch\": null, \"cuda.binary_remote_cache_force_write\": false, \"cuda.compile_opt_level\": \"-O1\", \"cuda.cuda_cxx\": null, \"cuda.cutlass_backend_min_gemm_size\": 1, \"cuda.cutlass_dir\": \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/third_party/cutlass\", \"cuda.cutlass_enabled_ops\": \"all\", \"cuda.cutlass_epilogue_fusion_enabled\": false, \"cuda.cutlass_hash_with_compile_cmd\": false, \"cuda.cutlass_instantiation_level\": \"0\", \"cuda.cutlass_max_profiling_configs\": null, \"cuda.cutlass_max_profiling_swizzle_options\": [1, 2, 4, 8], \"cuda.cutlass_op_allowlist_regex\": null, \"cuda.cutlass_op_denylist_regex\": null, \"cuda.cutlass_prescreening\": true, \"cuda.cutlass_presets\": null, \"cuda.cutlass_tma_only\": false, \"cuda.enable_caching_codegen\": true, \"cuda.enable_cuda_lto\": false, \"cuda.enable_debug_info\": false, \"cuda.enable_ptxas_info\": false, \"cuda.generate_test_runner\": false, \"cuda.upload_to_binary_remote_cache\": false, \"cuda.use_binary_remote_cache\": true, \"cuda.use_fast_math\": false, \"cuda.version\": null, \"cuda_backend\": \"triton\", \"dce\": false, \"debug\": false, \"debug_fusion\": false, \"debug_index_asserts\": false, \"debug_ir_traceback\": false, \"decompose_mem_bound_mm\": false, \"developer_warnings\": true, \"disable_cpp_codegen\": false, \"disable_padding_cpu\": true, \"disable_progress\": true, \"dynamic_scale_rblock\": true, \"efficient_conv_bn_eval_fx_passes\": false, \"emulate_precision_casts\": false, \"enable_auto_functionalized_v2\": true, \"enable_caching_generated_triton_templates\": true, \"enable_linear_binary_folding\": false, \"enabled_metric_tables\": \"\", \"epilogue_fusion\": true, \"epilogue_fusion_first\": false, \"estimate_op_runtime\": \"default\", \"external_matmul\": [], \"fallback_random\": false, \"force_fuse_int_mm_with_mul\": false, \"force_layout_optimization\": false, \"force_pointwise_cat\": false, \"force_same_precision\": false, \"force_shape_pad\": false, \"freezing\": false, \"freezing_discard_parameters\": false, \"fx_graph_cache\": true, \"fx_graph_remote_cache\": null, \"fx_passes_numeric_check\": {\"num_iterations\": 1, \"pre_grad\": false, \"precision\": 0.0001, \"requires_optimizer\": true}, \"generate_intermediate_hooks\": false, \"global_cache_dir\": null, \"graph_partition\": false, \"group_fusion\": false, \"halide.asserts\": false, \"halide.cpu_target\": \"host\", \"halide.debug\": false, \"halide.gpu_target\": \"host-cuda\", \"halide.scan_kernels\": false, \"halide.scheduler_cpu\": \"Adams2019\", \"halide.scheduler_cuda\": \"Anderson2021\", \"implicit_fallbacks\": true, \"inplace_buffers\": true, \"inplace_padding\": true, \"inter_node_bw\": 25, \"intra_node_bw\": 300, \"is_nightly_or_source\": false, \"is_predispatch\": false, \"joint_custom_post_pass\": null, \"joint_custom_pre_pass\": null, \"joint_graph_constant_folding\": true, \"keep_output_stride\": true, \"kernel_name_max_ops\": 10, \"layout_opt_default\": \"1\", \"layout_optimization\": true, \"log_tlparse\": false, \"loop_ordering_after_fusion\": false, \"max_autotune\": false, \"max_autotune_conv_backends\": \"ATEN,TRITON\", \"max_autotune_flex_search_space\": \"DEFAULT\", \"max_autotune_gemm\": false, \"max_autotune_gemm_backends\": \"ATEN,TRITON,CPP\", \"max_autotune_gemm_search_space\": \"DEFAULT\", \"max_autotune_pointwise\": false, \"max_autotune_report_choices_stats\": true, \"max_autotune_subproc_graceful_timeout_seconds\": 0.0, \"max_autotune_subproc_result_timeout_seconds\": 60.0, \"max_autotune_subproc_terminate_timeout_seconds\": 0.0, \"max_epilogue_benchmarked_choices\": 1, \"max_fusion_buffer_group_pairwise_attempts\": 64, \"max_fusion_size\": 64, \"max_pointwise_cat_inputs\": 8, \"memory_planning\": false, \"memory_pool\": \"intermediates\", \"min_num_split\": 0, \"mixed_mm_choice\": \"heuristic\", \"multi_kernel_hints\": [], \"nan_asserts\": false, \"non_blocking_remote_cache_write\": true, \"online_softmax\": true, \"optimize_scatter_upon_const_tensor\": true, \"pad_channels_last\": false, \"pad_outputs\": false, \"padding_alignment_bytes\": 128, \"padding_stride_threshold\": 1024, \"pattern_matcher\": true, \"permute_fusion\": false, \"pick_loop_orders\": true, \"post_grad_custom_post_pass\": null, \"post_grad_custom_pre_pass\": null, \"post_grad_fusion_options\": {}, \"pre_grad_custom_pass\": null, \"pre_grad_fusion_options\": {}, \"precompilation_timeout_seconds\": 3600, \"profile_bandwidth\": false, \"profile_bandwidth_output\": null, \"profile_bandwidth_regex\": \"\", \"profile_bandwidth_with_do_bench_using_profiling\": false, \"profiler_mark_wrapper_call\": false, \"prologue_fusion\": true, \"quiesce_async_compile_pool\": false, \"realize_acc_reads_size_threshold\": null, \"realize_acc_reads_threshold\": 8, \"realize_opcount_threshold\": 30, \"realize_reads_threshold\": 4, \"remote_gemm_autotune_cache\": false, \"remove_pre_grad_passes\": null, \"reorder_for_compute_comm_overlap\": false, \"reorder_for_compute_comm_overlap_passes\": [\"reorder_compute_for_overlap\", \"sink_waits\", \"raise_comms\"], \"reorder_for_locality\": true, \"reorder_for_peak_memory\": true, \"reorder_prefetch_limit\": null, \"rocm.arch\": [], \"rocm.ck_dir\": null, \"rocm.ck_max_profiling_configs\": null, \"rocm.ck_supported_arch\": [\"gfx90a\", \"gfx942\", \"gfx950\"], \"rocm.ck_tile_max_profiling_configs\": null, \"rocm.compile_opt_level\": \"-O2\", \"rocm.flush_denormals\": true, \"rocm.generate_test_runner\": false, \"rocm.is_debug\": false, \"rocm.kBatch_sweep\": null, \"rocm.n_max_profiling_configs\": null, \"rocm.print_kernel_resource_usage\": false, \"rocm.rocm_home\": null, \"rocm.save_temps\": false, \"rocm.split_k_threshold\": 16, \"rocm.use_fast_math\": true, \"rocm.use_preselected_instances\": false, \"save_args\": false, \"scalar_asserts\": true, \"score_fusion_memory_threshold\": 10, \"search_autotune_cache\": false, \"shape_padding\": true, \"size_asserts\": true, \"sleep_sec_TESTING_ONLY\": null, \"split_cat_fx_passes\": true, \"split_reductions\": true, \"static_launch_user_defined_triton_kernels\": false, \"static_weight_shapes\": true, \"strict_static_cuda_launcher\": false, \"test_configs.autotune_choice_desc_regex\": null, \"test_configs.autotune_choice_name_regex\": null, \"test_configs.force_extern_kernel_in_multi_template\": false, \"test_configs.graphsafe_rng_func_ignores_fallback_random\": false, \"test_configs.max_mm_configs\": null, \"test_configs.runtime_triton_dtype_assert\": false, \"test_configs.static_cpp_dtype_assert\": false, \"test_configs.track_memory_lifecycle\": null, \"test_configs.use_libtorch\": false, \"torchinductor_worker_logpath\": \"\", \"trace.compile_profile\": false, \"trace.debug_dir\": null, \"trace.debug_log\": false, \"trace.dot_graph_shape\": null, \"trace.draw_orig_fx_graph\": false, \"trace.enabled\": false, \"trace.fx_graph\": true, \"trace.fx_graph_transformed\": true, \"trace.graph_diagram\": false, \"trace.info_log\": false, \"trace.ir_post_fusion\": true, \"trace.ir_pre_fusion\": true, \"trace.log_autotuning_results\": false, \"trace.log_url_for_graph_xform\": null, \"trace.output_code\": true, \"trace.provenance_tracking_level\": 1, \"trace.save_real_tensors\": false, \"trace.upload_tar\": null, \"triton.autotune_at_compile_time\": null, \"triton.autotune_cublasLt\": true, \"triton.autotune_pointwise\": true, \"triton.autotune_with_sample_inputs\": false, \"triton.coalesce_tiling_analysis\": false, \"triton.codegen_upcast_to_fp32\": true, \"triton.cooperative_reductions\": false, \"triton.cudagraph_capture_sizes\": null, \"triton.cudagraph_dynamic_shape_warn_limit\": 50, \"triton.cudagraph_skip_dynamic_graphs\": false, \"triton.cudagraph_support_input_mutation\": false, \"triton.cudagraph_trees\": true, \"triton.cudagraph_trees_history_recording\": false, \"triton.cudagraph_unexpected_rerecord_limit\": 128, \"triton.cudagraphs\": false, \"triton.debug_sync_graph\": false, \"triton.debug_sync_kernel\": false, \"triton.decompose_k_threshold\": 32, \"triton.dense_indexing\": false, \"triton.descriptive_names\": \"original_aten\", \"triton.disallow_failing_autotune_kernels_TESTING_ONLY\": false, \"triton.divisible_by_16\": true, \"triton.enable_persistent_tma_matmul\": false, \"triton.fast_path_cudagraph_asserts\": false, \"triton.force_cooperative_reductions\": false, \"triton.force_cudagraph_sync\": false, \"triton.force_cudagraphs_warmup\": false, \"triton.inject_relu_bug_TESTING_ONLY\": null, \"triton.max_tiles\": null, \"triton.min_split_scan_rblock\": 256, \"triton.multi_kernel\": 0, \"triton.num_decompose_k_splits\": 10, \"triton.persistent_reductions\": true, \"triton.prefer_nd_tiling\": false, \"triton.skip_cudagraph_warmup\": false, \"triton.skip_l1_cache\": false, \"triton.slow_path_cudagraph_asserts\": true, \"triton.spill_threshold\": 16, \"triton.store_cubin\": false, \"triton.tile_reductions\": false, \"triton.tiling_prevents_pointwise_fusion\": true, \"triton.tiling_prevents_reduction_fusion\": true, \"triton.unique_kernel_names\": true, \"triton.unique_user_kernel_names\": false, \"triton.use_block_ptr\": false, \"triton.use_tensor_descriptor\": false, \"triton_kernel_default_layout_constraint\": \"needs_fixed_stride_order\", \"unbacked_symint_fallback\": 8192, \"unroll_reductions_threshold\": 8, \"unsafe_ignore_unsupported_triton_autotune_args\": false, \"unsafe_marked_cacheable_functions\": {}, \"unsafe_skip_cache_dynamic_shape_guards\": false, \"use_experimental_benchmarker\": false, \"use_fast_math\": false, \"use_mixed_mm\": true, \"use_static_cuda_launcher\": true, \"verbose_progress\": false, \"warn_mix_layout\": false, \"worker_log_path\": \"/logs/dedicated_log_torch_compile_worker_rank\", \"worker_start_method\": \"subprocess\", \"worker_suppress_logging\": true}", "remote_cache_version": null, "inductor_fx_remote_cache_hit_count": null, "inductor_fx_remote_cache_miss_count": null, "inductor_fx_remote_cache_backend_type": null, "inductor_fx_remote_cache_hit_keys": null, "inductor_fx_remote_cache_miss_keys": null, "cuda_version": "12.4.0", "triton_version": "3.3.1+fb", "feature_usage": {"fx_cache": false, "parallel_compile_post_warmup": false}, "compile_time_autotune_time_us": null, "is_runtime": false, "gc_time_us": null, "tensorify_float_attempt": null, "tensorify_float_success": null, "tensorify_float_failure": null, "guard_latency_us": null, "recompile_reason": null, "num_graph_breaks": 0, "triton_kernel_compile_times_us": "[[\"triton_poi_fused_mul_1\", 183348], [\"triton_poi_fused_addmm_gelu_2\", 182513], [\"triton_poi_fused_addmm_relu_sigmoid_0\", 166812]]", "ir_count": null, "cudagraph_skip_reason": null, "python_version": "3.10.9+fb (3.10:1dd9be6, May  4 2022, 01:23:45) [Clang 17.0.4 (mononoke://mononoke.internal.tfbnw.net/fbsource 447fcd878ef9ed82d", "pgo_put_remote_code_state_time_us": null, "pgo_get_remote_code_state_time_us": null, "param_numel": null, "param_bytes": null, "param_count": null, "recompile_user_contexts": null, "inline_inbuilt_nn_modules_candidate": false}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1890, "name": "compile_fx_aot", "filename": 21, "loc": "with ("}, {"line": 96, "name": "__exit__", "filename": 36, "loc": "self._on_exit("}, {"line": 1641, "name": "record_compilation_metrics", "filename": 35, "loc": "torch._logging.trace_structured("}]}
V0819 12:17:20.512000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e7be130af6e49eafb61d62ac7a68a2ba"}
	{
	"name": "compile_fx_aot",
	"ts": 1755631040512053.8,
	"args": {
	"compile_id": "None",
	"num_graph_breaks": 0,
	"frame_key": null,
	"co_name": null,
	"co_filename": null,
	"co_firstlineno": null,
	"cache_size": null,
	"accumulated_cache_size": null,
	"guard_count": null,
	"shape_env_guard_count": null,
	"graph_op_count": null,
	"graph_node_count": null,
	"graph_input_count": null,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": null,
	"compliant_custom_ops": null,
	"restart_reasons": null,
	"dynamo_time_before_restart_s": null,
	"has_guarded_code": null,
	"dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
