01-ai
/

Yi-34B

@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple, Union
 import torch.utils.checkpoint
 from einops import repeat
 from torch import nn
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from transformers.activations import ACT2FN
@@ -25,8 +26,12 @@ from .configuration_yi import YiConfig
 is_flash_attn_available = True
 try:
-    from flash_attn import flash_attn_func
-except Exception:
     is_flash_attn_available = False
 logger = logging.get_logger(__name__)
@@ -539,7 +544,9 @@ class YiModel(YiPreTrainedModel):
     def _prepare_decoder_attention_mask(
         self, attention_mask, input_ids, inputs_embeds, past_key_values_length
     ):
-        input_shape = input_ids.shape if input_ids is not None else inputs_embeds.shape[:-1]
         # create causal mask
         # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
         combined_attention_mask = None

 import torch.utils.checkpoint
 from einops import repeat
+from packaging import version
 from torch import nn
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from transformers.activations import ACT2FN
 is_flash_attn_available = True
 try:
+    from flash_attn import flash_attn_func, __version__
+    assert version.parse(__version__) >= version.parse(
+        "2.3.0"
+    ), "please update your flash_attn version (>= 2.3.0)"
+except ModuleNotFoundError:
     is_flash_attn_available = False
 logger = logging.get_logger(__name__)
     def _prepare_decoder_attention_mask(
         self, attention_mask, input_ids, inputs_embeds, past_key_values_length
     ):
+        input_shape = (
+            input_ids.shape if input_ids is not None else inputs_embeds.shape[:-1]
+        )
         # create causal mask
         # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
         combined_attention_mask = None