This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._dynamo.config | |
import torch._inductor.config | |
import triton | |
import triton.language as tl | |
torch._dynamo.config.cache_size_limit = 10000 | |
torch._inductor.config.triton.cudagraphs = False | |
torch._inductor.config.triton.cudagraph_trees = False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -xe | |
export TORCH_LOGS="recompiles,inductor" | |
export CUDA_VISIBLE_DEVICES="3,2,1,0" | |
set_fa_op() { | |
COMPUTE_CAPABILITY=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | tr -d '.') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import contextlib | |
import functools | |
import pathlib | |
import math | |
from dataclasses import dataclass | |
from typing import Callable, List, Literal, Optional, Tuple | |
import numpy as np | |
import torch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import contextlib | |
import math | |
from dataclasses import dataclass | |
from typing import Callable, Literal, Optional, Tuple | |
import torch | |
import torch.distributed as dist | |
import torch.distributed._functional_collectives as funcol | |
import torch.profiler._utils |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import time | |
import torch | |
import triton | |
import triton.language as tl | |
import triton.runtime as runtime | |
import triton.tools.experimental_descriptor | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
torch.backends.cuda.matmul.allow_tf32 = False | |
torch.backends.cudnn.allow_tf32 = False | |
class Model(nn.Module): | |
""" | |
Simple model that performs a single square matrix multiplication (C = A * B) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Implementation of "An Attention-Free Transformer": https://arxiv.org/abs/2105.14103 | |
""" | |
import contextlib | |
import functools | |
import torch | |
import triton | |
import triton.language as tl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import triton | |
import triton.language as tl | |
torch._dynamo.config.cache_size_limit = 10000 | |
ENABLE_TRITON = True | |
ENABLE_DEEP_AUTOTUNE = True | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
torch.manual_seed(42) | |
def torch_sdpa(query, key, value): | |
out, lse, cum_seq_q, cum_seq_k, max_q, max_k, philox_seed, philox_offset, debug_attn_mask = ( | |
torch.ops.aten._scaled_dot_product_cudnn_attention( | |
query=query, | |
key=key, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import contextlib | |
import math | |
import pathlib | |
from typing import List, Optional, Tuple, Union | |
import numpy as np | |
import torch | |
import torch.distributed as dist | |
import torch.nn as nn |
NewerOlder