Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,26 @@ struct DISABLE_VERSION_CHECK final : OptionBase<DISABLE_VERSION_CHECK, bool> {
}
};

struct EXPORT_RAW_BLOB final : OptionBase<EXPORT_RAW_BLOB, bool> {
static std::string_view key() {
return ov::intel_npu::export_raw_blob.name();
}

static bool defaultValue() {
return false;
}

#ifdef NPU_PLUGIN_DEVELOPER_BUILD
static std::string_view envVar() {
return "OV_NPU_EXPORT_RAW_BLOB";
}
#endif

static OptionMode mode() {
return OptionMode::RunTime;
}
};

struct BATCH_COMPILER_MODE_SETTINGS final : OptionBase<BATCH_COMPILER_MODE_SETTINGS, std::string> {
static std::string_view key() {
return ov::intel_npu::batch_compiler_mode_settings.name();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,5 +493,12 @@ static constexpr ov::Property<std::string> backend_compilation_params{"NPU_BACKE
*/
static constexpr ov::Property<bool> disable_version_check{"NPU_DISABLE_VERSION_CHECK"};

/**
* @brief [Only for NPU Plugin]
* Type: boolean, default is false.
* This option allows to skip writing plugin metadata to compiled model when exporting it
*/
static constexpr ov::Property<bool> export_raw_blob{"NPU_EXPORT_RAW_BLOB"};

} // namespace intel_npu
} // namespace ov
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/common/src/filtered_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ bool FilteredConfig::isAvailable(std::string key) const {
if (it != _enabled.end() && hasOpt(key)) {
return it->second;
}
// if doesnt exist = not available
// if doesn't exist = not available
return false;
}

Expand Down
36 changes: 19 additions & 17 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,25 +91,27 @@ void CompiledModel::export_model(std::ostream& stream) const {

auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);

std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();
if (!_config.get<EXPORT_RAW_BLOB>()) {
std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();

for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
inputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
}
for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
outputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
}
for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
inputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
}
for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
outputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
}

Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
CURRENT_OPENVINO_VERSION,
initBlobSizes,
_batchSize,
inputLayouts,
outputLayouts)
.write(stream);
Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
CURRENT_OPENVINO_VERSION,
std::move(initBlobSizes),
_batchSize,
std::move(inputLayouts),
std::move(outputLayouts))
.write(stream);
}
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
11 changes: 7 additions & 4 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ void Plugin::init_options() {
REGISTER_OPTION(STEPPING);
REGISTER_OPTION(MAX_TILES);
REGISTER_OPTION(DISABLE_VERSION_CHECK);
REGISTER_OPTION(EXPORT_RAW_BLOB);
REGISTER_OPTION(BATCH_COMPILER_MODE_SETTINGS);
REGISTER_OPTION(TURBO);
REGISTER_OPTION(WEIGHTLESS_BLOB);
Expand Down Expand Up @@ -893,8 +894,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

try {
const bool skipCompatibility =
npu_plugin_properties.find(DISABLE_VERSION_CHECK::key().data()) != npu_plugin_properties.end() &&
npu_plugin_properties[DISABLE_VERSION_CHECK::key().data()].as<bool>() == true;
(npu_plugin_properties.find(DISABLE_VERSION_CHECK::key().data()) != npu_plugin_properties.end())
? npu_plugin_properties[DISABLE_VERSION_CHECK::key().data()].as<bool>()
: _globalConfig.get<DISABLE_VERSION_CHECK>();
std::unique_ptr<MetadataBase> metadata = nullptr;
size_t blobSize = MetadataBase::getFileSize(stream);
if (!skipCompatibility) {
Expand Down Expand Up @@ -948,8 +950,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(const ov::Tensor& compi

try {
const bool skipCompatibility =
npu_plugin_properties.find(DISABLE_VERSION_CHECK::key().data()) != npu_plugin_properties.end() &&
npu_plugin_properties[DISABLE_VERSION_CHECK::key().data()].as<bool>() == true;
(npu_plugin_properties.find(DISABLE_VERSION_CHECK::key().data()) != npu_plugin_properties.end())
? npu_plugin_properties[DISABLE_VERSION_CHECK::key().data()].as<bool>()
: _globalConfig.get<DISABLE_VERSION_CHECK>();
std::unique_ptr<MetadataBase> metadata = nullptr;
size_t blobSize = compiled_blob.get_byte_size();
if (!skipCompatibility) {
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_npu/src/plugin/src/properties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ void Properties::registerPluginProperties() {
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::qdq_optimization, QDQ_OPTIMIZATION);
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::qdq_optimization_aggressive, QDQ_OPTIMIZATION_AGGRESSIVE);
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::disable_version_check, DISABLE_VERSION_CHECK);
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::export_raw_blob, EXPORT_RAW_BLOB);
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::batch_compiler_mode_settings, BATCH_COMPILER_MODE_SETTINGS);
TRY_REGISTER_SIMPLE_PROPERTY(ov::hint::enable_cpu_pinning, ENABLE_CPU_PINNING);
TRY_REGISTER_SIMPLE_PROPERTY(ov::workload_type, WORKLOAD_TYPE);
Expand Down Expand Up @@ -618,6 +619,7 @@ void Properties::registerCompiledModelProperties() {
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::qdq_optimization, QDQ_OPTIMIZATION);
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::qdq_optimization_aggressive, QDQ_OPTIMIZATION_AGGRESSIVE);
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::disable_version_check, DISABLE_VERSION_CHECK);
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::export_raw_blob, EXPORT_RAW_BLOB);
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::batch_compiler_mode_settings,
BATCH_COMPILER_MODE_SETTINGS);
TRY_REGISTER_COMPILEDMODEL_PROPERTY_IFSET(ov::intel_npu::run_inferences_sequentially, RUN_INFERENCES_SEQUENTIALLY);
Expand Down
17 changes: 17 additions & 0 deletions src/plugins/intel_npu/tools/compile_tool/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ static constexpr char targetDeviceMessage[] =

static constexpr char output_message[] = "Optional. Path to the output file. Default value: \"<model_xml_file>.blob\".";

static constexpr char raw_blob_message[] =
"Optional. Specifies if the output blob should NOT contain NPU plugin metadata placed at the beginning of it.";

static constexpr char log_level_message[] = "Optional. Log level for OpenVINO library.";

static constexpr char config_message[] = "Optional. Path to the configuration file.";
Expand Down Expand Up @@ -85,6 +88,7 @@ DEFINE_bool(h, false, help_message);
DEFINE_string(m, "", model_message);
DEFINE_string(d, "", targetDeviceMessage);
DEFINE_string(o, "", output_message);
DEFINE_bool(raw_blob, false, raw_blob_message);
DEFINE_string(log_level, "", log_level_message);
DEFINE_string(c, "", config_message);
DEFINE_bool(pc, false, perf_count_message);
Expand Down Expand Up @@ -320,6 +324,7 @@ static void showUsage() {
std::cout << " -m <value> " << model_message << std::endl;
std::cout << " -d <value> " << targetDeviceMessage << std::endl;
std::cout << " -o <value> " << output_message << std::endl;
std::cout << " -raw_blob " << raw_blob_message << std::endl;
std::cout << " -c <value> " << config_message << std::endl;
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
std::cout << " -op <value> " << outputs_precision_message << std::endl;
Expand Down Expand Up @@ -484,6 +489,18 @@ int main(int argc, char* argv[]) {
if (FLAGS_pc) {
configs["PERF_COUNT"] = "YES";
}
if (FLAGS_raw_blob) {
if (FLAGS_d == "NPU") {
// set only if was not previously parsed from config
if (configs.find("NPU_EXPORT_RAW_BLOB") == configs.end()) {
configs["NPU_EXPORT_RAW_BLOB"] = "YES";
} else {
std::cout << "Ignoring -raw_blob flag already set via -load_config." << std::endl;
}
} else {
std::cout << "Ignoring -raw_blob flag used with other device than NPU." << std::endl;
}
}

std::cout << "Compiling model" << std::endl;
auto compiledModel = core.compile_model(model, FLAGS_d, {configs.begin(), configs.end()});
Expand Down
Loading