Cherry pick jetson enablement from 2.8 release branch to main by lanluo-nvidia · Pull Request #3765 · pytorch/TensorRT

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/custom_ops_converters.py	2025-08-08 06:50:04.271977+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/custom_ops_converters.py	2025-08-08 06:50:29.351625+00:00
@@ -19,10 +19,11 @@
if load_tensorrt_llm():
    from torch_tensorrt.dynamo.lowering.passes.fuse_distributed_ops import (
        tensorrt_fused_nccl_all_gather_op,
        tensorrt_fused_nccl_reduce_scatter_op,
    )
+
    @dynamo_tensorrt_converter(tensorrt_fused_nccl_all_gather_op)
    def fused_nccl_gather(
        ctx: ConversionContext,
        target: Target,
        args: Tuple[Argument, ...],
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/_aten_lowering_pass.py	2025-08-08 06:50:04.274977+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/_aten_lowering_pass.py	2025-08-08 06:50:30.337911+00:00
@@ -33,10 +33,11 @@
    remove_detach,
]

if not is_tegra_platform():
    from .fuse_distributed_ops import fuse_distributed_ops
+
    post_lowering_pass_list.append(fuse_distributed_ops)

ATEN_POST_LOWERING_PASSES = DynamoPassManager.build_from_passlist(
    post_lowering_pass_list
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py	2025-08-08 06:50:04.271977+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py	2025-08-08 06:50:32.115974+00:00
@@ -617,10 +617,11 @@
            args[0],
            args[1],
            args[2],
            args[3],
        )
+

if is_tensorrt_version_supported("10.8.0"):
    try:
        import modelopt.torch.quantization as mtq  # noqa: F401