Skip to content

Commit e05f05a

Browse files
quic-amitrajtv-karthikeyaAmit Raj
authored
Diffusers support (#604)
# Support for Diffusers Architecture in Efficient Transformers ## Overview This pull request introduces **Diffusers architecture support** to the **Efficient Transformers** framework, enabling seamless integration of diffusion models. ## Key Highlights 1. **Support of model [black-forest-labs/FLUX1-schnell](https://huggingface.co/black-forest-labs/FLUX.1-schnell)** 2. **Flexible Configuration** - Supports JSON-based configuration files for easy compilation and execution. 3. **Performance Benchmarking** - Implements a performance matrix for Diffusers models to enable benchmarking for each modules. 4. **Testing Framework** - Includes initial test scripts for Diffusers (In progress). 5. **Support of onnx subfunction graph using flag use_onnx_function** 6. **Support parallel compilation of modules using flag `parallel_compile`** --------- Signed-off-by: Amit Raj <[email protected]> Signed-off-by: Amit Raj <[email protected]> Signed-off-by: tv-karthikeya <[email protected]> Signed-off-by: vtirumal <[email protected]> Co-authored-by: tv-karthikeya <[email protected]> Co-authored-by: Amit Raj <[email protected]> Co-authored-by: Karthikeya <[email protected]>
1 parent 06f9f08 commit e05f05a

38 files changed

+3899
-179
lines changed

QEfficient/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
QEFFCommonLoader,
1919
)
2020
from QEfficient.compile.compile_helper import compile
21+
from QEfficient.diffusers.pipelines.flux.pipeline_flux import QEffFluxPipeline
2122
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
2223
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
2324
from QEfficient.peft import QEffAutoPeftModelForCausalLM
@@ -39,6 +40,7 @@
3940
"QEFFAutoModelForImageTextToText",
4041
"QEFFAutoModelForSpeechSeq2Seq",
4142
"QEFFCommonLoader",
43+
"QEffFluxPipeline",
4244
]
4345
# For faster downloads via hf_transfer
4446
# This code is put above import statements as this needs to be executed before

QEfficient/base/modeling_qeff.py

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import gc
99
import inspect
1010
import logging
11-
import re
1211
import shutil
1312
import subprocess
1413
import warnings
@@ -21,26 +20,21 @@
2120

2221
from QEfficient.base.onnx_transforms import (
2322
BaseOnnxTransform,
24-
CustomOpTransform,
2523
OnnxTransformPipeline,
26-
RenameFunctionOutputsTransform,
2724
)
2825
from QEfficient.base.pytorch_transforms import PytorchTransform
2926
from QEfficient.compile.qnn_compiler import compile as qnn_compile
3027
from QEfficient.generation.cloud_infer import QAICInferenceSession
31-
from QEfficient.transformers.cache_utils import InvalidIndexProvider
32-
from QEfficient.transformers.models.pytorch_transforms import get_decoder_layer_classes_for_export
3328
from QEfficient.utils import (
3429
constants,
3530
create_json,
3631
create_model_params,
3732
dump_qconfig,
38-
export_wrapper,
3933
generate_mdp_partition_config,
4034
hash_dict_params,
4135
load_json,
4236
)
43-
from QEfficient.utils.torch_patches import apply_torch_patches, undo_torch_patches
37+
from QEfficient.utils.export_utils import export_wrapper
4438

4539
logger = logging.getLogger(__name__)
4640

@@ -125,9 +119,35 @@ def _model_offloaded_check(self) -> None:
125119
logger.error(error_msg)
126120
raise RuntimeError(error_msg)
127121

122+
@property
123+
def model_name(self) -> str:
124+
"""
125+
Get the model class name without QEff/QEFF prefix.
126+
127+
This property extracts the underlying model's class name and removes
128+
any QEff or QEFF prefix that may have been added during wrapping.
129+
130+
Returns:
131+
str: Model class name (e.g., "CLIPTextModel" instead of "QEffCLIPTextModel")
132+
"""
133+
mname = self.model.__class__.__name__
134+
if mname.startswith("QEff") or mname.startswith("QEFF"):
135+
mname = mname[4:]
136+
return mname
137+
128138
@property
129139
@abstractmethod
130-
def model_name(self) -> str: ...
140+
def get_model_config(self) -> Dict:
141+
"""
142+
Get the model configuration as a dictionary.
143+
144+
This is an abstract property that must be implemented by all subclasses.
145+
Typically returns: self.model.config.__dict__
146+
147+
Returns:
148+
Dict: The configuration dictionary of the underlying model
149+
"""
150+
pass
131151

132152
@abstractmethod
133153
def export(self, export_dir: Optional[str] = None) -> Path:
@@ -188,7 +208,6 @@ def _export(
188208
onnx_transform_kwargs: Optional[Dict[str, any]] = None,
189209
export_dir: Optional[str] = None,
190210
offload_pt_weights: bool = True,
191-
use_onnx_subfunctions: bool = False,
192211
) -> str:
193212
"""
194213
Export the PyTorch model to ONNX and apply ONNX transforms
@@ -253,18 +272,8 @@ def _export(
253272
input_names.append(param)
254273

255274
try:
256-
# Initialize the registry with your custom ops
275+
# Export to ONNX
257276
export_kwargs = {} if export_kwargs is None else export_kwargs
258-
if use_onnx_subfunctions:
259-
warnings.warn(
260-
"The subfunction feature is experimental. Please note that using compile consecutively with and without subfunction may produce inconsistent results."
261-
)
262-
apply_torch_patches()
263-
InvalidIndexProvider.SUBFUNC_ENABLED = True
264-
output_names = [re.sub("_RetainedState", "_InternalRetainedState", s) for s in output_names]
265-
export_kwargs["export_modules_as_functions"] = get_decoder_layer_classes_for_export(self.model)
266-
self._onnx_transforms.append(RenameFunctionOutputsTransform)
267-
self._onnx_transforms.append(CustomOpTransform)
268277

269278
torch.onnx.export(
270279
self.model,
@@ -309,12 +318,6 @@ def _export(
309318
finally:
310319
shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
311320

312-
if use_onnx_subfunctions:
313-
undo_torch_patches()
314-
InvalidIndexProvider.SUBFUNC_ENABLED = False
315-
self._onnx_transforms.remove(CustomOpTransform)
316-
self._onnx_transforms.remove(RenameFunctionOutputsTransform)
317-
318321
self.onnx_path = onnx_path
319322
return onnx_path
320323

QEfficient/diffusers/README.md

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
2+
<div align="center">
3+
4+
5+
# **Diffusion Models on Qualcomm Cloud AI 100**
6+
7+
8+
<div align="center">
9+
10+
### 🎨 **Experience the Future of AI Image Generation**
11+
12+
* Optimized for Qualcomm Cloud AI 100*
13+
14+
<img src="../../docs/image/girl_laughing.png" alt="Sample Output" width="400">
15+
16+
**Generated with**: `black-forest-labs/FLUX.1-schnell``"A girl laughing"` • 4 steps • 0.0 guidance scale • ⚡
17+
18+
19+
20+
</div>
21+
22+
23+
24+
[![Diffusers](https://img.shields.io/badge/Diffusers-0.35.1-orange.svg)](https://github.com/huggingface/diffusers)
25+
</div>
26+
27+
---
28+
29+
## ✨ Overview
30+
31+
QEfficient Diffusers brings the power of state-of-the-art diffusion models to Qualcomm Cloud AI 100 hardware for text-to-image generation. Built on top of the popular HuggingFace Diffusers library, our optimized pipeline provides seamless inference on Qualcomm Cloud AI 100 hardware.
32+
33+
## 🛠️ Installation
34+
35+
### Prerequisites
36+
37+
Ensure you have Python 3.8+ and the required dependencies:
38+
39+
```bash
40+
# Create Python virtual environment (Recommended Python 3.10)
41+
sudo apt install python3.10-venv
42+
python3.10 -m venv qeff_env
43+
source qeff_env/bin/activate
44+
pip install -U pip
45+
```
46+
47+
### Install QEfficient
48+
49+
```bash
50+
# Install from GitHub (includes diffusers support)
51+
pip install git+https://github.com/quic/efficient-transformers
52+
53+
# Or build from source
54+
git clone https://github.com/quic/efficient-transformers.git
55+
cd efficient-transformers
56+
pip install build wheel
57+
python -m build --wheel --outdir dist
58+
pip install dist/qefficient-0.0.1.dev0-py3-none-any.whl
59+
```
60+
61+
---
62+
63+
## 🎯 Supported Models
64+
-[`black-forest-labs/FLUX.1-schnell`](https://huggingface.co/black-forest-labs/FLUX.1-schnell)
65+
66+
---
67+
68+
69+
## 📚 Examples
70+
71+
Check out our comprehensive examples in the [`examples/diffusers/`](../../examples/diffusers/) directory:
72+
73+
---
74+
75+
## 🤝 Contributing
76+
77+
We welcome contributions! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details.
78+
79+
80+
81+
---
82+
83+
## 🙏 Acknowledgments
84+
85+
- **HuggingFace Diffusers**: For the excellent foundation library
86+
- **Stability AI**: For the amazing Stable Diffusion models
87+
---
88+
89+
## 📞 Support
90+
91+
- 📖 **Documentation**: [https://quic.github.io/efficient-transformers/](https://quic.github.io/efficient-transformers/)
92+
- 🐛 **Issues**: [GitHub Issues](https://github.com/quic/efficient-transformers/issues)
93+
94+
---
95+

QEfficient/diffusers/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------
7+
from typing import Optional, Tuple
8+
9+
import torch
10+
from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
11+
12+
13+
class QEffAdaLayerNormZero(AdaLayerNormZero):
14+
def forward(
15+
self,
16+
x: torch.Tensor,
17+
shift_msa: Optional[torch.Tensor] = None,
18+
scale_msa: Optional[torch.Tensor] = None,
19+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
20+
x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None]
21+
return x
22+
23+
24+
class QEffAdaLayerNormZeroSingle(AdaLayerNormZeroSingle):
25+
def forward(
26+
self,
27+
x: torch.Tensor,
28+
scale_msa: Optional[torch.Tensor] = None,
29+
shift_msa: Optional[torch.Tensor] = None,
30+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
31+
x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None]
32+
return x
33+
34+
35+
class QEffAdaLayerNormContinuous(AdaLayerNormContinuous):
36+
def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor:
37+
emb = conditioning_embedding
38+
scale, shift = torch.chunk(emb, 2, dim=1)
39+
x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :]
40+
return x
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------
7+
8+
from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle, RMSNorm
9+
from diffusers.models.transformers.transformer_flux import (
10+
FluxAttention,
11+
FluxAttnProcessor,
12+
FluxSingleTransformerBlock,
13+
FluxTransformer2DModel,
14+
FluxTransformerBlock,
15+
)
16+
from torch import nn
17+
18+
from QEfficient.base.pytorch_transforms import ModuleMappingTransform
19+
from QEfficient.customop.rms_norm import CustomRMSNormAIC
20+
from QEfficient.diffusers.models.normalization import (
21+
QEffAdaLayerNormContinuous,
22+
QEffAdaLayerNormZero,
23+
QEffAdaLayerNormZeroSingle,
24+
)
25+
from QEfficient.diffusers.models.transformers.transformer_flux import (
26+
QEffFluxAttention,
27+
QEffFluxAttnProcessor,
28+
QEffFluxSingleTransformerBlock,
29+
QEffFluxTransformer2DModel,
30+
QEffFluxTransformerBlock,
31+
)
32+
33+
34+
class CustomOpsTransform(ModuleMappingTransform):
35+
_module_mapping = {
36+
RMSNorm: CustomRMSNormAIC,
37+
nn.RMSNorm: CustomRMSNormAIC, # for torch.nn.RMSNorm
38+
}
39+
40+
41+
class AttentionTransform(ModuleMappingTransform):
42+
_module_mapping = {
43+
FluxSingleTransformerBlock: QEffFluxSingleTransformerBlock,
44+
FluxTransformerBlock: QEffFluxTransformerBlock,
45+
FluxTransformer2DModel: QEffFluxTransformer2DModel,
46+
FluxAttention: QEffFluxAttention,
47+
FluxAttnProcessor: QEffFluxAttnProcessor,
48+
}
49+
50+
51+
class NormalizationTransform(ModuleMappingTransform):
52+
_module_mapping = {
53+
AdaLayerNormZero: QEffAdaLayerNormZero,
54+
AdaLayerNormZeroSingle: QEffAdaLayerNormZeroSingle,
55+
AdaLayerNormContinuous: QEffAdaLayerNormContinuous,
56+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# ----------------------------------------------------------------------------

0 commit comments

Comments
 (0)