Upload custom kernels
Browse files- build/torch-universal/liger_kernels/__init__.py +2 -29
- build/torch-universal/liger_kernels/_ops.py +2 -2
- build/torch-universal/liger_kernels/layers.py +42 -0
- build/torch-universal/liger_kernels/rms_norm.py +1 -40
- torch-ext/liger_kernels/__init__.py +2 -29
- torch-ext/liger_kernels/layers.py +42 -0
- torch-ext/liger_kernels/rms_norm.py +1 -40
build/torch-universal/liger_kernels/__init__.py
CHANGED
|
@@ -1,30 +1,3 @@
|
|
| 1 |
-
from .
|
| 2 |
-
from .fused_linear_cross_entropy import LigerFusedLinearCrossEntropyFunction
|
| 3 |
-
from .dyt import LigerDyTFunction
|
| 4 |
-
from .geglu import LigerGELUMulFunction
|
| 5 |
-
from .group_norm import LigerGroupNormFunction
|
| 6 |
-
from .kl_div import LigerKLDivLossFunction
|
| 7 |
-
from .layer_norm import LigerLayerNormFunction
|
| 8 |
-
from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
|
| 9 |
-
from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
|
| 10 |
-
from .jsd import LigerJSDFunction
|
| 11 |
-
from .rope import LigerRopeFunction
|
| 12 |
-
from .swiglu import LigerSiLUMulFunction
|
| 13 |
-
from .tvd import LigerTVDLossFunction
|
| 14 |
|
| 15 |
-
__all__ = [
|
| 16 |
-
"LigerCrossEntropyFunction",
|
| 17 |
-
"LigerFusedLinearCrossEntropyFunction",
|
| 18 |
-
"LigerDyTFunction",
|
| 19 |
-
"LigerGELUMulFunction",
|
| 20 |
-
"LigerGroupNormFunction",
|
| 21 |
-
"LigerKLDivLossFunction",
|
| 22 |
-
"LigerLayerNormFunction",
|
| 23 |
-
"LigerQwen2VLMRopeFunction",
|
| 24 |
-
"LigerRMSNormFunction",
|
| 25 |
-
"LigerRMSNorm",
|
| 26 |
-
"LigerJSDFunction",
|
| 27 |
-
"LigerRopeFunction",
|
| 28 |
-
"LigerSiLUMulFunction",
|
| 29 |
-
"LigerTVDLossFunction",
|
| 30 |
-
]
|
|
|
|
| 1 |
+
from . import layers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
__all__ = ["layers"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch-universal/liger_kernels/_ops.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import torch
|
| 2 |
-
ops = torch.ops.
|
| 3 |
|
| 4 |
def add_op_namespace_prefix(op_name: str):
|
| 5 |
"""
|
| 6 |
Prefix op by namespace.
|
| 7 |
"""
|
| 8 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
ops = torch.ops._liger_kernels_20250507091553
|
| 3 |
|
| 4 |
def add_op_namespace_prefix(op_name: str):
|
| 5 |
"""
|
| 6 |
Prefix op by namespace.
|
| 7 |
"""
|
| 8 |
+
return f"_liger_kernels_20250507091553::{op_name}"
|
build/torch-universal/liger_kernels/layers.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from .rms_norm import LigerRMSNormFunction
|
| 3 |
+
|
| 4 |
+
class LigerRMSNorm(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
RMSNorm module that uses the optimized LigerRMSNormFunction.
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
hidden_size (int): The size of the hidden dimension.
|
| 10 |
+
eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
|
| 11 |
+
offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
|
| 12 |
+
casting_mode (str, optional): The casting mode to use. Defaults to "llama".
|
| 13 |
+
in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
weight: torch.Tensor
|
| 18 |
+
variance_epsilon: float
|
| 19 |
+
offset: float = 0
|
| 20 |
+
casting_mode: str = "llama"
|
| 21 |
+
in_place: bool = True
|
| 22 |
+
|
| 23 |
+
def forward(self, hidden_states):
|
| 24 |
+
"""
|
| 25 |
+
Apply RMS normalization to the input tensor.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
torch.Tensor: Normalized tensor of the same shape as input
|
| 32 |
+
"""
|
| 33 |
+
return LigerRMSNormFunction.apply(
|
| 34 |
+
hidden_states,
|
| 35 |
+
self.weight,
|
| 36 |
+
self.variance_epsilon,
|
| 37 |
+
self.offset,
|
| 38 |
+
self.casting_mode,
|
| 39 |
+
self.in_place
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
__all__ = ["LigerRMSNorm"]
|
build/torch-universal/liger_kernels/rms_norm.py
CHANGED
|
@@ -362,43 +362,4 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
| 362 |
ctx.num_warps,
|
| 363 |
ctx.in_place,
|
| 364 |
)
|
| 365 |
-
return dX, dW, None, None, None, None
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
class LigerRMSNorm(torch.nn.Module):
|
| 369 |
-
"""
|
| 370 |
-
RMSNorm module that uses the optimized LigerRMSNormFunction.
|
| 371 |
-
|
| 372 |
-
Args:
|
| 373 |
-
hidden_size (int): The size of the hidden dimension.
|
| 374 |
-
eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
|
| 375 |
-
offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
|
| 376 |
-
casting_mode (str, optional): The casting mode to use. Defaults to "llama".
|
| 377 |
-
in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
|
| 378 |
-
"""
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
weight: torch.Tensor
|
| 382 |
-
variance_epsilon: float
|
| 383 |
-
offset: float = 0
|
| 384 |
-
casting_mode: str = "llama"
|
| 385 |
-
in_place: bool = True
|
| 386 |
-
|
| 387 |
-
def forward(self, hidden_states):
|
| 388 |
-
"""
|
| 389 |
-
Apply RMS normalization to the input tensor.
|
| 390 |
-
|
| 391 |
-
Args:
|
| 392 |
-
hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
|
| 393 |
-
|
| 394 |
-
Returns:
|
| 395 |
-
torch.Tensor: Normalized tensor of the same shape as input
|
| 396 |
-
"""
|
| 397 |
-
return LigerRMSNormFunction.apply(
|
| 398 |
-
hidden_states,
|
| 399 |
-
self.weight,
|
| 400 |
-
self.variance_epsilon,
|
| 401 |
-
self.offset,
|
| 402 |
-
self.casting_mode,
|
| 403 |
-
self.in_place
|
| 404 |
-
)
|
|
|
|
| 362 |
ctx.num_warps,
|
| 363 |
ctx.in_place,
|
| 364 |
)
|
| 365 |
+
return dX, dW, None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
torch-ext/liger_kernels/__init__.py
CHANGED
|
@@ -1,30 +1,3 @@
|
|
| 1 |
-
from .
|
| 2 |
-
from .fused_linear_cross_entropy import LigerFusedLinearCrossEntropyFunction
|
| 3 |
-
from .dyt import LigerDyTFunction
|
| 4 |
-
from .geglu import LigerGELUMulFunction
|
| 5 |
-
from .group_norm import LigerGroupNormFunction
|
| 6 |
-
from .kl_div import LigerKLDivLossFunction
|
| 7 |
-
from .layer_norm import LigerLayerNormFunction
|
| 8 |
-
from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
|
| 9 |
-
from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
|
| 10 |
-
from .jsd import LigerJSDFunction
|
| 11 |
-
from .rope import LigerRopeFunction
|
| 12 |
-
from .swiglu import LigerSiLUMulFunction
|
| 13 |
-
from .tvd import LigerTVDLossFunction
|
| 14 |
|
| 15 |
-
__all__ = [
|
| 16 |
-
"LigerCrossEntropyFunction",
|
| 17 |
-
"LigerFusedLinearCrossEntropyFunction",
|
| 18 |
-
"LigerDyTFunction",
|
| 19 |
-
"LigerGELUMulFunction",
|
| 20 |
-
"LigerGroupNormFunction",
|
| 21 |
-
"LigerKLDivLossFunction",
|
| 22 |
-
"LigerLayerNormFunction",
|
| 23 |
-
"LigerQwen2VLMRopeFunction",
|
| 24 |
-
"LigerRMSNormFunction",
|
| 25 |
-
"LigerRMSNorm",
|
| 26 |
-
"LigerJSDFunction",
|
| 27 |
-
"LigerRopeFunction",
|
| 28 |
-
"LigerSiLUMulFunction",
|
| 29 |
-
"LigerTVDLossFunction",
|
| 30 |
-
]
|
|
|
|
| 1 |
+
from . import layers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
__all__ = ["layers"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
torch-ext/liger_kernels/layers.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from .rms_norm import LigerRMSNormFunction
|
| 3 |
+
|
| 4 |
+
class LigerRMSNorm(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
RMSNorm module that uses the optimized LigerRMSNormFunction.
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
hidden_size (int): The size of the hidden dimension.
|
| 10 |
+
eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
|
| 11 |
+
offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
|
| 12 |
+
casting_mode (str, optional): The casting mode to use. Defaults to "llama".
|
| 13 |
+
in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
weight: torch.Tensor
|
| 18 |
+
variance_epsilon: float
|
| 19 |
+
offset: float = 0
|
| 20 |
+
casting_mode: str = "llama"
|
| 21 |
+
in_place: bool = True
|
| 22 |
+
|
| 23 |
+
def forward(self, hidden_states):
|
| 24 |
+
"""
|
| 25 |
+
Apply RMS normalization to the input tensor.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
torch.Tensor: Normalized tensor of the same shape as input
|
| 32 |
+
"""
|
| 33 |
+
return LigerRMSNormFunction.apply(
|
| 34 |
+
hidden_states,
|
| 35 |
+
self.weight,
|
| 36 |
+
self.variance_epsilon,
|
| 37 |
+
self.offset,
|
| 38 |
+
self.casting_mode,
|
| 39 |
+
self.in_place
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
__all__ = ["LigerRMSNorm"]
|
torch-ext/liger_kernels/rms_norm.py
CHANGED
|
@@ -362,43 +362,4 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
| 362 |
ctx.num_warps,
|
| 363 |
ctx.in_place,
|
| 364 |
)
|
| 365 |
-
return dX, dW, None, None, None, None
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
class LigerRMSNorm(torch.nn.Module):
|
| 369 |
-
"""
|
| 370 |
-
RMSNorm module that uses the optimized LigerRMSNormFunction.
|
| 371 |
-
|
| 372 |
-
Args:
|
| 373 |
-
hidden_size (int): The size of the hidden dimension.
|
| 374 |
-
eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
|
| 375 |
-
offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
|
| 376 |
-
casting_mode (str, optional): The casting mode to use. Defaults to "llama".
|
| 377 |
-
in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
|
| 378 |
-
"""
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
weight: torch.Tensor
|
| 382 |
-
variance_epsilon: float
|
| 383 |
-
offset: float = 0
|
| 384 |
-
casting_mode: str = "llama"
|
| 385 |
-
in_place: bool = True
|
| 386 |
-
|
| 387 |
-
def forward(self, hidden_states):
|
| 388 |
-
"""
|
| 389 |
-
Apply RMS normalization to the input tensor.
|
| 390 |
-
|
| 391 |
-
Args:
|
| 392 |
-
hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
|
| 393 |
-
|
| 394 |
-
Returns:
|
| 395 |
-
torch.Tensor: Normalized tensor of the same shape as input
|
| 396 |
-
"""
|
| 397 |
-
return LigerRMSNormFunction.apply(
|
| 398 |
-
hidden_states,
|
| 399 |
-
self.weight,
|
| 400 |
-
self.variance_epsilon,
|
| 401 |
-
self.offset,
|
| 402 |
-
self.casting_mode,
|
| 403 |
-
self.in_place
|
| 404 |
-
)
|
|
|
|
| 362 |
ctx.num_warps,
|
| 363 |
ctx.in_place,
|
| 364 |
)
|
| 365 |
+
return dX, dW, None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|