Skip to content

Commit 626dbda

Browse files
authored
Merge branch 'main' into prefill+decode_gpt_oss
Signed-off-by: Mamta Singh <[email protected]>
2 parents e8d1128 + e05f05a commit 626dbda

38 files changed

+3899
-123
lines changed

QEfficient/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
QEFFCommonLoader,
2929
)
3030
from QEfficient.compile.compile_helper import compile
31+
from QEfficient.diffusers.pipelines.flux.pipeline_flux import QEffFluxPipeline
3132
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
3233
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
3334
from QEfficient.peft import QEffAutoPeftModelForCausalLM
@@ -53,6 +54,7 @@
5354
"QEFFAutoModelForImageTextToText",
5455
"QEFFAutoModelForSpeechSeq2Seq",
5556
"QEFFCommonLoader",
57+
"QEffFluxPipeline",
5658
]
5759

5860

QEfficient/base/modeling_qeff.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import gc
99
import inspect
1010
import logging
11-
import re
1211
import shutil
1312
import subprocess
1413
import warnings
@@ -21,26 +20,21 @@
2120

2221
from QEfficient.base.onnx_transforms import (
2322
BaseOnnxTransform,
24-
CustomOpTransform,
2523
OnnxTransformPipeline,
26-
RenameFunctionOutputsTransform,
2724
)
2825
from QEfficient.base.pytorch_transforms import PytorchTransform
2926
from QEfficient.compile.qnn_compiler import compile as qnn_compile
3027
from QEfficient.generation.cloud_infer import QAICInferenceSession
31-
from QEfficient.transformers.cache_utils import InvalidIndexProvider
32-
from QEfficient.transformers.models.pytorch_transforms import get_decoder_layer_classes_for_export
3328
from QEfficient.utils import (
3429
constants,
3530
create_json,
3631
create_model_params,
3732
dump_qconfig,
38-
export_wrapper,
3933
generate_mdp_partition_config,
4034
hash_dict_params,
4135
load_json,
4236
)
43-
from QEfficient.utils.torch_patches import apply_torch_patches, undo_torch_patches
37+
from QEfficient.utils.export_utils import export_wrapper
4438

4539
logger = logging.getLogger(__name__)
4640

@@ -127,9 +121,35 @@ def _model_offloaded_check(self) -> None:
127121
logger.error(error_msg)
128122
raise RuntimeError(error_msg)
129123

124+
@property
125+
def model_name(self) -> str:
126+
"""
127+
Get the model class name without QEff/QEFF prefix.
128+
129+
This property extracts the underlying model's class name and removes
130+
any QEff or QEFF prefix that may have been added during wrapping.
131+
132+
Returns:
133+
str: Model class name (e.g., "CLIPTextModel" instead of "QEffCLIPTextModel")
134+
"""
135+
mname = self.model.__class__.__name__
136+
if mname.startswith("QEff") or mname.startswith("QEFF"):
137+
mname = mname[4:]
138+
return mname
139+
130140
@property
131141
@abstractmethod
132-
def model_name(self) -> str: ...
142+
def get_model_config(self) -> Dict:
143+
"""
144+
Get the model configuration as a dictionary.
145+
146+
This is an abstract property that must be implemented by all subclasses.
147+
Typically returns: self.model.config.__dict__
148+
149+
Returns:
150+
Dict: The configuration dictionary of the underlying model
151+
"""
152+
pass
133153

134154
@abstractmethod
135155
def export(self, export_dir: Optional[str] = None) -> Path:
@@ -259,18 +279,8 @@ def _export(
259279
input_names.append(param)
260280

261281
try:
262-
# Initialize the registry with your custom ops
282+
# Export to ONNX
263283
export_kwargs = {} if export_kwargs is None else export_kwargs
264-
if use_onnx_subfunctions:
265-
warnings.warn(
266-
"The subfunction feature is experimental. Please note that using compile consecutively with and without subfunction may produce inconsistent results."
267-
)
268-
apply_torch_patches()
269-
InvalidIndexProvider.SUBFUNC_ENABLED = True
270-
output_names = [re.sub("_RetainedState", "_InternalRetainedState", s) for s in output_names]
271-
export_kwargs["export_modules_as_functions"] = get_decoder_layer_classes_for_export(self.model)
272-
self._onnx_transforms.append(RenameFunctionOutputsTransform)
273-
self._onnx_transforms.append(CustomOpTransform)
274284

275285
torch.onnx.export(
276286
self.model,

QEfficient/diffusers/README.md

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
2+
<div align="center">
3+
4+
5+
# **Diffusion Models on Qualcomm Cloud AI 100**
6+
7+
8+
<div align="center">
9+
10+
### 🎨 **Experience the Future of AI Image Generation**
11+
12+
* Optimized for Qualcomm Cloud AI 100*
13+
14+
<img src="../../docs/image/girl_laughing.png" alt="Sample Output" width="400">
15+
16+
**Generated with**: `black-forest-labs/FLUX.1-schnell``"A girl laughing"` • 4 steps • 0.0 guidance scale • ⚡
17+
18+
19+
20+
</div>
21+
22+
23+
24+
[![Diffusers](https://img.shields.io/badge/Diffusers-0.35.1-orange.svg)](https://github.com/huggingface/diffusers)
25+
</div>
26+
27+
---
28+
29+
## ✨ Overview
30+
31+
QEfficient Diffusers brings the power of state-of-the-art diffusion models to Qualcomm Cloud AI 100 hardware for text-to-image generation. Built on top of the popular HuggingFace Diffusers library, our optimized pipeline provides seamless inference on Qualcomm Cloud AI 100 hardware.
32+
33+
## 🛠️ Installation
34+
35+
### Prerequisites
36+
37+
Ensure you have Python 3.8+ and the required dependencies:
38+
39+
```bash
40+
# Create Python virtual environment (Recommended Python 3.10)
41+
sudo apt install python3.10-venv
42+
python3.10 -m venv qeff_env
43+
source qeff_env/bin/activate
44+
pip install -U pip
45+
```
46+
47+
### Install QEfficient
48+
49+
```bash
50+
# Install from GitHub (includes diffusers support)
51+
pip install git+https://github.com/quic/efficient-transformers
52+
53+
# Or build from source
54+
git clone https://github.com/quic/efficient-transformers.git
55+
cd efficient-transformers
56+
pip install build wheel
57+
python -m build --wheel --outdir dist
58+
pip install dist/qefficient-0.0.1.dev0-py3-none-any.whl
59+
```
60+
61+
---
62+
63+
## 🎯 Supported Models
64+
-[`black-forest-labs/FLUX.1-schnell`](https://huggingface.co/black-forest-labs/FLUX.1-schnell)
65+
66+
---
67+
68+
69+
## 📚 Examples
70+
71+
Check out our comprehensive examples in the [`examples/diffusers/`](../../examples/diffusers/) directory:
72+
73+
---
74+
75+
## 🤝 Contributing
76+
77+
We welcome contributions! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details.
78+
79+
80+
81+
---
82+
83+
## 🙏 Acknowledgments
84+
85+
- **HuggingFace Diffusers**: For the excellent foundation library
86+
- **Stability AI**: For the amazing Stable Diffusion models
87+
---
88+
89+
## 📞 Support
90+
91+
- 📖 **Documentation**: [https://quic.github.io/efficient-transformers/](https://quic.github.io/efficient-transformers/)
92+
- 🐛 **Issues**: [GitHub Issues](https://github.com/quic/efficient-transformers/issues)
93+
94+
---
95+

QEfficient/diffusers/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
7+
from typing import Optional, Tuple
8+
9+
import torch
10+
from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
11+
12+
13+
class QEffAdaLayerNormZero(AdaLayerNormZero):
14+
def forward(
15+
self,
16+
x: torch.Tensor,
17+
shift_msa: Optional[torch.Tensor] = None,
18+
scale_msa: Optional[torch.Tensor] = None,
19+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
20+
x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None]
21+
return x
22+
23+
24+
class QEffAdaLayerNormZeroSingle(AdaLayerNormZeroSingle):
25+
def forward(
26+
self,
27+
x: torch.Tensor,
28+
scale_msa: Optional[torch.Tensor] = None,
29+
shift_msa: Optional[torch.Tensor] = None,
30+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
31+
x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None]
32+
return x
33+
34+
35+
class QEffAdaLayerNormContinuous(AdaLayerNormContinuous):
36+
def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor:
37+
emb = conditioning_embedding
38+
scale, shift = torch.chunk(emb, 2, dim=1)
39+
x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :]
40+
return x
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------
7+
8+
from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle, RMSNorm
9+
from diffusers.models.transformers.transformer_flux import (
10+
FluxAttention,
11+
FluxAttnProcessor,
12+
FluxSingleTransformerBlock,
13+
FluxTransformer2DModel,
14+
FluxTransformerBlock,
15+
)
16+
from torch import nn
17+
18+
from QEfficient.base.pytorch_transforms import ModuleMappingTransform
19+
from QEfficient.customop.rms_norm import CustomRMSNormAIC
20+
from QEfficient.diffusers.models.normalization import (
21+
QEffAdaLayerNormContinuous,
22+
QEffAdaLayerNormZero,
23+
QEffAdaLayerNormZeroSingle,
24+
)
25+
from QEfficient.diffusers.models.transformers.transformer_flux import (
26+
QEffFluxAttention,
27+
QEffFluxAttnProcessor,
28+
QEffFluxSingleTransformerBlock,
29+
QEffFluxTransformer2DModel,
30+
QEffFluxTransformerBlock,
31+
)
32+
33+
34+
class CustomOpsTransform(ModuleMappingTransform):
35+
_module_mapping = {
36+
RMSNorm: CustomRMSNormAIC,
37+
nn.RMSNorm: CustomRMSNormAIC, # for torch.nn.RMSNorm
38+
}
39+
40+
41+
class AttentionTransform(ModuleMappingTransform):
42+
_module_mapping = {
43+
FluxSingleTransformerBlock: QEffFluxSingleTransformerBlock,
44+
FluxTransformerBlock: QEffFluxTransformerBlock,
45+
FluxTransformer2DModel: QEffFluxTransformer2DModel,
46+
FluxAttention: QEffFluxAttention,
47+
FluxAttnProcessor: QEffFluxAttnProcessor,
48+
}
49+
50+
51+
class NormalizationTransform(ModuleMappingTransform):
52+
_module_mapping = {
53+
AdaLayerNormZero: QEffAdaLayerNormZero,
54+
AdaLayerNormZeroSingle: QEffAdaLayerNormZeroSingle,
55+
AdaLayerNormContinuous: QEffAdaLayerNormContinuous,
56+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------

0 commit comments

Comments
 (0)