Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions src/nncf/onnx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
from copy import deepcopy
from pathlib import Path
from typing import Any, Callable, Iterable, TypeVar
from typing import Any, Callable, Iterable, TypeVar, Tuple, Union

import onnx
from onnx.external_data_helper import ExternalDataInfo
Expand All @@ -22,6 +22,8 @@

import nncf
from nncf.common.factory import build_graph
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
from nncf.quantization.algorithms.pipeline import collect_statistics
from nncf.common.logging.logger import nncf_logger
from nncf.common.quantization.structs import QuantizationPreset
from nncf.data import Dataset
Expand Down Expand Up @@ -132,7 +134,8 @@ def quantize_impl(
model_type: ModelType | None = None,
ignored_scope: IgnoredScope | None = None,
advanced_parameters: AdvancedQuantizationParameters | None = None,
) -> onnx.ModelProto:
return_statistics: bool = False,
) -> Union[onnx.ModelProto, Tuple[onnx.ModelProto, StatisticPointsContainer]]:
"""
Implementation of the `quantize()` method for the ONNX backend.
"""
Expand Down Expand Up @@ -174,7 +177,9 @@ def quantize_impl(

graph = GraphConverter.create_nncf_graph(model)
warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS)
quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset)
statistic_points = quantization_algorithm.get_statistic_points(model, graph)
statistic_points = collect_statistics(statistic_points, model, graph, calibration_dataset)
quantized_model = quantization_algorithm.apply(model, graph, statistic_points, dataset=calibration_dataset)

if external_data_dir:
remove_metadata(model, MetadataKey.EXTERNAL_DATA_DIR)
Expand All @@ -184,6 +189,8 @@ def quantize_impl(
if is_weight_compression_needed(advanced_parameters):
compress_quantize_weights_transformation(quantized_model)

if return_statistics:
return quantized_model, statistic_points
return quantized_model


Expand Down Expand Up @@ -217,7 +224,7 @@ def quantize_with_accuracy_control_impl(
copied_parameters = deepcopy(advanced_quantization_parameters)
copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False

quantized_model = quantize_impl(
quantized_model, statistic_points = quantize_impl(
model=model,
calibration_dataset=calibration_dataset,
preset=preset,
Expand All @@ -227,6 +234,7 @@ def quantize_with_accuracy_control_impl(
model_type=model_type,
ignored_scope=ignored_scope,
advanced_parameters=copied_parameters,
return_statistics=True,
)

if advanced_accuracy_restorer_parameters.intermediate_model_dir:
Expand Down Expand Up @@ -267,6 +275,7 @@ def quantize_with_accuracy_control_impl(
model_type,
ignored_scope,
copied_parameters,
initial_statistic_points=statistic_points,
)
tuned_quantized_metric_results = evaluator.collect_metric_results(
tuned_quantized_model, validation_dataset, model_name="tuned"
Expand Down
4 changes: 3 additions & 1 deletion src/nncf/openvino/quantization/quantize_ifmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def apply_algorithm_if_bodies(
"""
nncf_logger.info(f"Iteration [{current_model_num}/{len(graphs)}] ...")
parent_graph = graphs[graph_id]
quantized_model = algorithm.apply(parent_model, parent_graph, parent_statistic_points, parent_dataset)
quantized_model = algorithm.apply(parent_model, parent_graph, parent_statistic_points, dataset=parent_dataset)
if get_number_if_op(parent_model) == 0:
return quantized_model, current_model_num
model_transformer_fp32 = factory.ModelTransformerFactory.create(parent_model)
Expand Down Expand Up @@ -186,6 +186,7 @@ def apply_algorithm_if_bodies(
then_dataset,
subset_size,
current_model_num + 1,
parent_statistic_points,
)
else_quantized_model, current_model_num = apply_algorithm_if_bodies(
algorithm,
Expand All @@ -195,6 +196,7 @@ def apply_algorithm_if_bodies(
else_dataset,
subset_size,
current_model_num + 1,
parent_statistic_points,
)
model_transformer_int8 = factory.ModelTransformerFactory.create(quantized_model)
quantized_model = _update_if_body(model_transformer_int8, if_node, True, then_quantized_model)
Expand Down
35 changes: 27 additions & 8 deletions src/nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@

from copy import deepcopy
from pathlib import Path
from typing import Any, Callable, Iterable, TypeVar
from typing import Any, Callable, Iterable, TypeVar, Tuple, Union

import openvino as ov
from openvino._offline_transformations import compress_quantize_weights_transformation

from nncf.common.factory import StatisticsAggregatorFactory
from nncf.common.factory import build_graph
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
from nncf.quantization.algorithms.pipeline import collect_statistics
from nncf.common.logging import nncf_logger
from nncf.common.quantization.structs import QuantizationPreset
from nncf.data import Dataset
Expand Down Expand Up @@ -71,7 +73,8 @@ def native_quantize_if_op_impl(
model_type: ModelType | None = None,
ignored_scope: IgnoredScope | None = None,
advanced_parameters: AdvancedQuantizationParameters | None = None,
) -> ov.Model:
return_statistics: bool = False,
) -> Union[ov.Model, Tuple[ov.Model, StatisticPointsContainer]]:
"""
Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API.
"""
Expand Down Expand Up @@ -109,6 +112,7 @@ def _extract_all_subgraphs(model: ov.Model, current_id: str) -> None:
model_type=model_type,
ignored_scope=ignored_scope,
advanced_parameters=advanced_parameters,
return_statistics=return_statistics,
)
for graph in graphs.values():
if is_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS):
Expand All @@ -119,13 +123,16 @@ def _extract_all_subgraphs(model: ov.Model, current_id: str) -> None:
f"The model consists of {if_ops_number} If node(-s) with then and else bodies. \
Main model and all If bodies will be quantized recursively."
)
statistic_points = quantization_algorithm.get_statistic_points(model, graphs[main_model_graph_id])
statistic_points = collect_statistics(statistic_points, model, graphs[main_model_graph_id], calibration_dataset)
quantized_model, _ = apply_algorithm_if_bodies(
quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1
quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1, statistic_points
)

if is_weight_compression_needed(advanced_parameters):
compress_quantize_weights_transformation(quantized_model)


dump_parameters(
quantized_model,
{
Expand All @@ -138,6 +145,8 @@ def _extract_all_subgraphs(model: ov.Model, current_id: str) -> None:
"advanced_parameters": convert_to_dict_recursively(advanced_parameters),
},
)
if return_statistics:
return quantized_model, statistic_points
return quantized_model


Expand All @@ -152,7 +161,8 @@ def native_quantize_impl(
model_type: ModelType | None = None,
ignored_scope: IgnoredScope | None = None,
advanced_parameters: AdvancedQuantizationParameters | None = None,
) -> ov.Model:
return_statistics: bool = False,
) -> Union[ov.Model, Tuple[ov.Model, StatisticPointsContainer]]:
"""
Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API.
"""
Expand All @@ -165,14 +175,18 @@ def native_quantize_impl(
model_type=model_type,
ignored_scope=ignored_scope,
advanced_parameters=advanced_parameters,
return_statistics=return_statistics,
)
graph = GraphConverter.create_nncf_graph(model)
warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS)
quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset)
statistic_points = quantization_algorithm.get_statistic_points(model, graph)
statistic_points = collect_statistics(statistic_points, model, graph, calibration_dataset)
quantized_model = quantization_algorithm.apply(model, graph, statistic_points, dataset=calibration_dataset)

if is_weight_compression_needed(advanced_parameters):
compress_quantize_weights_transformation(quantized_model)


dump_parameters(
quantized_model,
{
Expand All @@ -185,6 +199,8 @@ def native_quantize_impl(
"advanced_parameters": convert_to_dict_recursively(advanced_parameters),
},
)
if return_statistics:
return quantized_model, statistic_points
return quantized_model


Expand Down Expand Up @@ -219,7 +235,7 @@ def quantize_with_accuracy_control_impl(
copied_parameters = deepcopy(advanced_quantization_parameters)
copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False

quantized_model = quantize_impl(
quantized_model, statistic_points = quantize_impl(
model=model,
calibration_dataset=calibration_dataset,
preset=preset,
Expand All @@ -229,6 +245,7 @@ def quantize_with_accuracy_control_impl(
model_type=model_type,
ignored_scope=ignored_scope,
advanced_parameters=copied_parameters,
return_statistics=True,
)

if advanced_accuracy_restorer_parameters.intermediate_model_dir:
Expand All @@ -251,7 +268,6 @@ def quantize_with_accuracy_control_impl(

nncf_logger.info(f"Accuracy drop: {accuracy_drop} ({drop_type})")

# TODO(andrey-churkin): Collect statistics only once
if advanced_accuracy_restorer_parameters.tune_hyperparams and not should_terminate:
model = remove_friendly_name_duplicates(model)
tuned_quantized_model = quantize_with_tune_hyperparams(
Expand All @@ -269,6 +285,7 @@ def quantize_with_accuracy_control_impl(
model_type,
ignored_scope,
copied_parameters,
initial_statistic_points=statistic_points,
)
tuned_quantized_metric_results = evaluator.collect_metric_results(
tuned_quantized_model, validation_dataset, model_name="tuned"
Expand Down Expand Up @@ -338,7 +355,8 @@ def quantize_impl(
model_type: ModelType | None = None,
ignored_scope: IgnoredScope | None = None,
advanced_parameters: AdvancedQuantizationParameters | None = None,
) -> ov.Model:
return_statistics: bool = False,
) -> Union[ov.Model, Tuple[ov.Model, StatisticPointsContainer]]:
"""
Implementation of the `quantize()` method for the OpenVINO backend.
"""
Expand All @@ -359,6 +377,7 @@ def quantize_impl(
model_type=model_type,
ignored_scope=ignored_scope,
advanced_parameters=advanced_parameters,
return_statistics=return_statistics,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def __init__(
subset_size: int,
initial_metric_results: MetricResults,
quantized_metric_results: MetricResults,
initial_statistic_points: StatisticPointsContainer | None = None,
):
"""
:param pipeline_fn: Function to create pipeline.
Expand All @@ -246,6 +247,7 @@ def __init__(
self._subset_size = subset_size
self._initial_metric_results = initial_metric_results
self._quantized_metric_results = quantized_metric_results
self._initial_statistic_points = initial_statistic_points

self._is_metric_mode = isinstance(self._initial_metric_results.values_for_each_item[0], float)

Expand Down Expand Up @@ -290,8 +292,11 @@ def apply(self, model: TModel, validation_dataset: Dataset) -> TModel:
# TODO(andrey-churkin): Think about how it can be avoided.
params = apply_combination(self._init_params, best_settings)
pipeline = self._pipeline_fn(**params)
container = pipeline.get_statistic_points_for_step(step_index, step_model, step_graph)
step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset)
if step_index == 0 and self._initial_statistic_points is not None:
step_statistics = self._initial_statistic_points
else:
container = pipeline.get_statistic_points_for_step(step_index, step_model, step_graph)
step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset)
step_model = pipeline.run_step(step_index, step_statistics, step_model, step_graph)
continue

Expand Down Expand Up @@ -359,13 +364,16 @@ def _prepare_pipeline_step(
self._pipelines[combination_key] = self._pipeline_fn(**kwargs)

# Collect statistics required to execute `step_index`-th pipeline step
containers = [
pipeline.get_statistic_points_for_step(step_index, step_model, step_graph)
for pipeline in self._pipelines.values()
]
self._step_index_to_statistics[step_index] = collect_statistics(
containers, step_model, step_graph, self._calibration_dataset
)
if step_index == 0 and self._initial_statistic_points is not None:
self._step_index_to_statistics[step_index] = self._initial_statistic_points
else:
containers = [
pipeline.get_statistic_points_for_step(step_index, step_model, step_graph)
for pipeline in self._pipelines.values()
]
self._step_index_to_statistics[step_index] = collect_statistics(
containers, step_model, step_graph, self._calibration_dataset
)

def _calculate_combination_score(
self,
Expand Down
3 changes: 3 additions & 0 deletions src/nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Callable, Iterable, TypedDict, TypeVar
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer

import nncf
from nncf.common.graph import NNCFGraph
Expand Down Expand Up @@ -723,6 +724,7 @@ def quantize_with_tune_hyperparams(
model_type: ModelType | None = None,
ignored_scope: IgnoredScope | None = None,
advanced_quantization_parameters: AdvancedQuantizationParameters | None = None,
initial_statistic_points: StatisticPointsContainer | None = None,
) -> TModel:
"""
Applies post-training quantization algorithm with tune hyperparameters to provided model.
Expand Down Expand Up @@ -778,6 +780,7 @@ def quantize_with_tune_hyperparams(
tuner_subset_size,
initial_metric_results,
quantized_metric_results,
initial_statistic_points,
)

quantized_model = hyperparameter_tuner.apply(model, validation_dataset)
Expand Down