diff --git a/README.md b/README.md index bae39d094..063ae08aa 100755 --- a/README.md +++ b/README.md @@ -4,40 +4,51 @@

- - - + + + + + + + +

-PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低比特量化、知识蒸馏、稀疏化和模型结构搜索**等模型压缩策略,帮助用户快速实现模型的小型化。 +PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低比特量化、知识蒸馏、稀疏化和模型结构搜索**等模型压缩策略,帮助开发者快速实现模型的小型化。 ## 产品动态 +- 🔥 **2022.08.16:自动化压缩功能升级** + - 支持直接加载ONNX模型和Paddle模型导出至ONNX + - 发布量化分析工具试用版,发布[YOLO系列离线量化工具](example/post_training_quantization/pytorch_yolo_series/) + - 更新[YOLO-Series自动化压缩模型库](example/auto_compression/pytorch_yolo_series) + + | 模型 | Base mAPval
0.5:0.95 | ACT量化mAPval
0.5:0.95 | 模型体积压缩比 | 预测时延FP32
| 预测时延INT8
| 预测加速比 | + | :-------- |:-------- |:--------: | :--------: | :---------------------: | :----------------: | :----------------: | + | PPYOLOE-s | 43.1 | 42.6 | 3.9倍 | 6.51ms | 2.12ms | 3.1倍 | + | YOLOv5s | 37.4 | 36.9 | 3.8倍 | 5.95ms | 1.87ms | 3.2倍 | + | YOLOv6s | 42.4 | 41.3 | 3.9倍 | 9.06ms | 1.83ms | 5.0倍 | + | YOLOv7 | 51.1 | 50.9 | 3.9倍 | 26.84ms | 4.55ms | 5.9倍 | + | YOLOv7-Tiny | 37.3 | 37.0 | 3.9倍 | 5.06ms | 1.68ms | 3.0倍 | + + - 🔥 **2022.07.01: 发布[v2.3.0版本](https://github.com/PaddlePaddle/PaddleSlim/releases/tag/v2.3.0)** - 发布[自动化压缩功能](example/auto_compression) - - - 支持代码无感知压缩:用户只需提供推理模型文件和数据,既可进行离线量化(PTQ)、量化训练(QAT)、稀疏训练等压缩任务。 + - 支持代码无感知压缩:开发者只需提供推理模型文件和数据,既可进行离线量化(PTQ)、量化训练(QAT)、稀疏训练等压缩任务。 - 支持自动策略选择,根据任务特点和部署环境特性:自动搜索合适的离线量化方法,自动搜索最佳的压缩策略组合方式。 - 发布[自然语言处理](example/auto_compression/nlp)、[图像语义分割](example/auto_compression/semantic_segmentation)、[图像目标检测](example/auto_compression/detection)三个方向的自动化压缩示例。 - - 发布`X2Paddle`模型自动化压缩方案:[YOLOv5](example/auto_compression/pytorch_yolov5)、[HuggingFace](example/auto_compression/pytorch_huggingface) [MobileNet](example/auto_compression/tensorflow_mobilenet)。 - + - 发布`X2Paddle`模型自动化压缩方案:[YOLOv5](example/auto_compression/pytorch_yolo_series)、[YOLOv6](example/auto_compression/pytorch_yolo_series)、[YOLOv7](example/auto_compression/pytorch_yolo_series)、[HuggingFace](example/auto_compression/pytorch_huggingface)、[MobileNet](example/auto_compression/tensorflow_mobilenet)。 - 升级量化功能 - - - 统一量化模型格式 - - 离线量化支持while op - - 新增7种[离线量化方法](docs/zh_cn/tutorials/quant/post_training_quantization.md), 包括HIST, AVG, EMD, Bias Correction, AdaRound等 - - 修复BERT大模型量化训练过慢的问题 - + - 统一量化模型格式;离线量化支持while op;修复BERT大模型量化训练过慢的问题。 + - 新增7种[离线量化方法](docs/zh_cn/tutorials/quant/post_training_quantization.md), 包括HIST, AVG, EMD, Bias Correction, AdaRound等。 - 支持半结构化稀疏训练 - - 新增延时预估工具 + - 支持对稀疏化模型、低比特量化模型的性能预估;支持预估指定模型在特定部署环境下 (ARM CPU + Paddle Lite) 的推理性能;提供 SD625、SD710、RK3288 芯片 + Paddle Lite 的预估接口。 + - 提供部署环境自动扩展工具,可以自动增加在更多 ARM CPU 设备上的预估工具。 - - 支持预估指定模型在特定部署环境下 (ARM CPU + Paddle Lite) 的推理性能 - - 提供部署环境自动扩展工具,可以自动增加在更多 ARM CPU 设备上的预估工具 - - 支持对稀疏化模型、低比特量化模型的性能预估 - - 提供 SD625、SD710、RK3288 芯片 + Paddle Lite 的预估接口 - +
+历史更新 - **2021.11.15: 发布v2.2.0版本** @@ -52,6 +63,7 @@ PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低 更多信息请参考:[release note](https://github.com/PaddlePaddle/PaddleSlim/releases) +
## 基础压缩功能概览 @@ -279,6 +291,11 @@ python setup.py install #### 1. 量化训练或者离线量化后的模型体积为什么没有变小? 答:这是因为量化后保存的参数是虽然是int8范围,但是类型是float。这是因为Paddle训练前向默认的Kernel不支持INT8 Kernel实现,只有Paddle Inference TensorRT的推理才支持量化推理加速。为了方便量化后验证量化精度,使用Paddle训练前向能加载此模型,默认保存的Float32类型权重,体积没有发生变换。 + +#### 2. macOS + Python3.9环境或者Windows环境下, 安装出错, "command 'swig' failed" + +答: 请参考https://github.com/PaddlePaddle/PaddleSlim/issues/1258 + ## 许可证书 本项目的发布受[Apache 2.0 license](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/LICENSE)许可认证。 diff --git a/demo/imagenet_reader.py b/demo/imagenet_reader.py index dd102eb91..85abbb228 100644 --- a/demo/imagenet_reader.py +++ b/demo/imagenet_reader.py @@ -11,6 +11,7 @@ np.random.seed(0) DATA_DIM = 224 +RESIZE_DIM = 256 THREAD = 16 BUF_SIZE = 10240 @@ -34,8 +35,8 @@ def crop_image(img, target_size, center): width, height = img.size size = target_size if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 + w_start = (width - size) // 2 + h_start = (height - size) // 2 else: w_start = np.random.randint(0, width - size + 1) h_start = np.random.randint(0, height - size + 1) @@ -98,7 +99,12 @@ def random_color(img, lower=0.5, upper=1.5): return img -def process_image(sample, mode, color_jitter, rotate): +def process_image(sample, + mode, + color_jitter, + rotate, + crop_size=DATA_DIM, + resize_size=RESIZE_DIM): img_path = sample[0] try: @@ -108,10 +114,10 @@ def process_image(sample, mode, color_jitter, rotate): return None if mode == 'train': if rotate: img = rotate_image(img) - img = random_crop(img, DATA_DIM) + img = random_crop(img, crop_size) else: - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) + img = resize_short(img, target_size=resize_size) + img = crop_image(img, target_size=crop_size, center=True) if mode == 'train': if color_jitter: img = distort_color(img) @@ -185,9 +191,15 @@ def test(data_dir=DATA_DIR): class ImageNetDataset(Dataset): - def __init__(self, data_dir=DATA_DIR, mode='train'): + def __init__(self, + data_dir=DATA_DIR, + mode='train', + crop_size=DATA_DIM, + resize_size=RESIZE_DIM): super(ImageNetDataset, self).__init__() self.data_dir = data_dir + self.crop_size = crop_size + self.resize_size = resize_size train_file_list = os.path.join(data_dir, 'train_list.txt') val_file_list = os.path.join(data_dir, 'val_list.txt') test_file_list = os.path.join(data_dir, 'test_list.txt') @@ -211,21 +223,27 @@ def __getitem__(self, index): [data_path, sample[1]], mode='train', color_jitter=False, - rotate=False) + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) return data, np.array([label]).astype('int64') elif self.mode == 'val': data, label = process_image( [data_path, sample[1]], mode='val', color_jitter=False, - rotate=False) + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) return data, np.array([label]).astype('int64') elif self.mode == 'test': data = process_image( [data_path, sample[1]], mode='test', color_jitter=False, - rotate=False) + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) return data def __len__(self): diff --git a/demo/quant/pact_quant_aware/train.py b/demo/quant/pact_quant_aware/train.py index fb70c0fc2..67945a455 100644 --- a/demo/quant/pact_quant_aware/train.py +++ b/demo/quant/pact_quant_aware/train.py @@ -65,6 +65,8 @@ "Whether to use PACT or not.") add_arg('analysis', bool, False, "Whether analysis variables distribution.") +add_arg('onnx_format', bool, False, + "Whether use onnx format or not.") add_arg('ce_test', bool, False, "Whether to CE test.") # yapf: enable @@ -257,6 +259,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } # 2. quantization transform programs (training aware) @@ -298,9 +302,9 @@ def get_optimizer(): places, quant_config, scope=None, - act_preprocess_func=act_preprocess_func, - optimizer_func=optimizer_func, - executor=executor, + act_preprocess_func=None, + optimizer_func=None, + executor=None, for_test=True) compiled_train_prog = quant_aware( train_prog, @@ -425,29 +429,23 @@ def train(epoch, compiled_train_prog, lr): # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True) + model_path = os.path.join(quantization_model_save_dir, args.model) + if not os.path.isdir(model_path): + os.makedirs(model_path) + float_program = convert(val_program, places, quant_config) _logger.info("eval best_model after convert") final_acc1 = test(best_epoch, float_program) _logger.info("final acc:{}".format(final_acc1)) # 4. Save inference model - model_path = os.path.join(quantization_model_save_dir, args.model, - 'act_' + quant_config['activation_quantize_type'] - + '_w_' + quant_config['weight_quantize_type']) - float_path = os.path.join(model_path, 'float') - if not os.path.isdir(model_path): - os.makedirs(model_path) - paddle.fluid.io.save_inference_model( - dirname=float_path, + dirname=model_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, - model_filename=float_path + '/model', - params_filename=float_path + '/params') + model_filename=model_path + '/model.pdmodel', + params_filename=model_path + '/model.pdiparams') def main(): diff --git a/demo/quant/quant_aware/train.py b/demo/quant/quant_aware/train.py index abf6073ec..7fc133a46 100644 --- a/demo/quant/quant_aware/train.py +++ b/demo/quant/quant_aware/train.py @@ -126,6 +126,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } pretrain = True @@ -294,10 +296,7 @@ def train(epoch, compiled_train_prog): # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True, - onnx_format=args.onnx_format) + float_program = convert(val_program, places, quant_config) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ diff --git a/demo/quant/quant_post/eval.py b/demo/quant/quant_post/eval.py index 310eacd08..e8184e848 100755 --- a/demo/quant/quant_post/eval.py +++ b/demo/quant/quant_post/eval.py @@ -21,8 +21,7 @@ import paddle sys.path[0] = os.path.join( os.path.dirname("__file__"), os.path.pardir, os.path.pardir) -sys.path[1] = os.path.join( - os.path.dirname("__file__"), os.path.pardir) +sys.path[1] = os.path.join(os.path.dirname("__file__"), os.path.pardir) import imagenet_reader as reader from utility import add_arguments, print_arguments @@ -31,8 +30,8 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('model_path', str, "./pruning/checkpoints/resnet50/2/eval_model/", "Whether to use pretrained model.") -add_arg('model_name', str, '__model__', "model filename for inference model") -add_arg('params_name', str, '__params__', "params filename for inference model") +add_arg('model_name', str, 'model.pdmodel', "model filename for inference model") +add_arg('params_name', str, 'model.pdiparams', "params filename for inference model") add_arg('batch_size', int, 64, "Minibatch size.") # yapf: enable diff --git a/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst b/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst index f5731df45..c308413db 100644 --- a/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst +++ b/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst @@ -3,19 +3,19 @@ AutoCompression自动压缩功能 AutoCompression --------------- -.. py:class:: paddleslim.auto_compression.AutoCompression(model_dir, model_filename, params_filename, save_dir, strategy_config, train_config, train_dataloader, eval_callback, devices='gpu') +.. py:class:: paddleslim.auto_compression.AutoCompression(model_dir, train_dataloader, model_filename, params_filename, save_dir, strategy_config, train_config, eval_callback, devices='gpu') -`源代码 `_ +`源代码 `_ 根据指定的配置对使用 ``paddle.jit.save`` 接口或者 ``paddle.static.save_inference_model`` 接口保存的推理模型进行压缩。 **参数: ** - **model_dir(str)** - 需要压缩的推理模型所在的目录。 +- **train_dataloader(paddle.io.DataLoader)** - 训练数据迭代器。注意:如果选择离线量化超参搜索策略的话, ``train_dataloader`` 和 ``eval_callback`` 设置相同的数据读取即可。 - **model_filename(str)** - 需要压缩的推理模型文件名称。 - **params_filename(str)** - 需要压缩的推理模型参数文件名称。 - **save_dir(str)** - 压缩后模型的所保存的目录。 -- **train_dataloader(paddle.io.DataLoader)** - 训练数据迭代器。注意:如果选择离线量化超参搜索策略的话, ``train_dataloader`` 和 ``eval_callback`` 设置相同的数据读取即可。 - **train_config(dict)** - 训练配置。可以配置的参数请参考: ``_ 。注意:如果选择离线量化超参搜索策略的话, ``train_config`` 直接设置为 ``None`` 即可。 - **strategy_config(dict, list(dict), 可选)** - 使用的压缩策略,可以通过设置多个单种策略来并行使用这些压缩方式。字典的关键字必须在: ``Quantization`` (量化配置, 可配置的参数参考 ``_ ), @@ -82,13 +82,13 @@ AutoCompression eval_dataloader = Cifar10(mode='eval') - ac = AutoCompression(model_path, model_filename, params_filename, save_dir, \ + ac = AutoCompression(model_path, train_dataloader, model_filename, params_filename, save_dir, \ strategy_config="Quantization": Quantization(**default_ptq_config), "Distillation": HyperParameterOptimization(**default_distill_config)}, \ - train_config=None, train_dataloader=train_dataloader, eval_callback=eval_dataloader,devices='gpu') + train_config=None, eval_callback=eval_dataloader,devices='gpu') ``` diff --git a/docs/zh_cn/api_cn/static/quant/quantization_api.rst b/docs/zh_cn/api_cn/static/quant/quantization_api.rst index a12e4e9b5..f2d7b77d5 100644 --- a/docs/zh_cn/api_cn/static/quant/quantization_api.rst +++ b/docs/zh_cn/api_cn/static/quant/quantization_api.rst @@ -118,7 +118,7 @@ quant_post_dynamic quant_post_static --------------- -.. py:function:: paddleslim.quant.quant_post_static(executor,model_dir, quantize_model_path, batch_generator=None, sample_generator=None, model_filename=None, params_filename=None, save_model_filename='__model__', save_params_filename='__params__', batch_size=16, batch_nums=None, scope=None, algo='KL', round_type='round', quantizable_op_type=["conv2d","depthwise_conv2d","mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, activation_quantize_type='range_abs_max', weight_quantize_type='channel_wise_abs_max', onnx_format=False, skip_tensor_list=None, optimize_model=False) +.. py:function:: paddleslim.quant.quant_post_static(executor,model_dir, quantize_model_path, batch_generator=None, sample_generator=None, model_filename=None, params_filename=None, save_model_filename='model.pdmodel', save_params_filename='model.pdiparams', batch_size=16, batch_nums=None, scope=None, algo='KL', round_type='round', quantizable_op_type=["conv2d","depthwise_conv2d","mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, activation_quantize_type='range_abs_max', weight_quantize_type='channel_wise_abs_max', onnx_format=False, skip_tensor_list=None, optimize_model=False) `源代码 `_ @@ -217,15 +217,15 @@ quant_post_static target_vars=[out], main_program=val_prog, executor=exe, - model_filename='__model__', - params_filename='__params__') + model_filename='model.pdmodel', + params_filename='model.pdiparams') quant_post_static( executor=exe, model_dir='./model_path', quantize_model_path='./save_path', sample_generator=val_reader, - model_filename='__model__', - params_filename='__params__', + model_filename='model.pdmodel', + params_filename='model.pdiparams', batch_size=16, batch_nums=10) diff --git a/example/auto_compression/README.md b/example/auto_compression/README.md index bce9764ef..cb3b9277d 100644 --- a/example/auto_compression/README.md +++ b/example/auto_compression/README.md @@ -1,103 +1,250 @@ -# 自动化压缩工具ACT(Auto Compression Toolkit) - -## 简介 -PaddleSlim推出全新自动化压缩工具(ACT),旨在通过Source-Free的方式,自动对预测模型进行压缩,压缩后模型可直接部署应用。ACT自动化压缩工具主要特性如下: -- **『更便捷』**:开发者无需了解或修改模型源码,直接使用导出的预测模型进行压缩; -- **『更智能』**:开发者简单配置即可启动压缩,ACT工具会自动优化得到最好预测模型; -- **『更丰富』**:ACT中提供了量化训练、蒸馏、结构化剪枝、非结构化剪枝、多种离线量化方法及超参搜索等等,可任意搭配使用。 - - -## 环境准备 - -- 安装PaddlePaddle >= 2.3版本 (从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- 安装PaddleSlim develop版本 - -## 快速上手 - -- 1.准备模型及数据集 - -```shell -# 下载MobileNet预测模型 -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar -tar -xf MobileNetV1_infer.tar -# 下载ImageNet小型数据集 -wget https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz -tar xf ILSVRC2012_data_demo.tar.gz -``` - -- 2.运行 - -```python -# 导入依赖包 -import paddle -from PIL import Image -from paddle.vision.datasets import DatasetFolder -from paddle.vision.transforms import transforms -from paddleslim.auto_compression import AutoCompression, Quantization, HyperParameterOptimization -paddle.enable_static() -# 定义DataSet -class ImageNetDataset(DatasetFolder): - def __init__(self, path, image_size=224): - super(ImageNetDataset, self).__init__(path) - normalize = transforms.Normalize( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) - self.transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(image_size), transforms.Transpose(), - normalize - ]) - - def __getitem__(self, idx): - img_path, _ = self.samples[idx] - return self.transform(Image.open(img_path).convert('RGB')) - - def __len__(self): - return len(self.samples) - -# 定义DataLoader -train_dataset = ImageNetDataset("./ILSVRC2012_data_demo/ILSVRC2012/train/") -image = paddle.static.data( - name='inputs', shape=[None] + [3, 224, 224], dtype='float32') -train_loader = paddle.io.DataLoader(train_dataset, feed_list=[image], batch_size=32, return_list=False) -# 开始自动压缩 -ac = AutoCompression( - model_dir="./MobileNetV1_infer", - model_filename="inference.pdmodel", - params_filename="inference.pdiparams", - save_dir="output", - config={'Quantization': Quantization(), "HyperParameterOptimization": HyperParameterOptimization(max_quant_count=5)}, - train_dataloader=train_loader, - eval_dataloader=train_loader) # eval_function to verify accuracy -ac.compress() -``` - -**提示:** -- DataLoader传入的数据集是待压缩模型所用的数据集,DataLoader继承自`paddle.io.DataLoader`。 -- 如无需验证自动化压缩过程中模型的精度,`eval_callback`可不传入function,程序会自动根据损失来选择最优模型。 -- 自动化压缩Config中定义量化、蒸馏、剪枝等压缩算法会合并执行,压缩策略有:量化+蒸馏,剪枝+蒸馏等等。 -- 如果要压缩的模型参数是存储在各自分离的文件中,需要先通过[convert.py](./convert.py) 脚本将其保存成一个单独的二进制文件。 - -## 应用示例 - -#### [图像分类](./image_classification) - -#### [目标检测](./detection) - -#### [语义分割](./semantic_segmentation) - -#### [NLP](./nlp) - -#### X2Paddle - -- [PyTorch YOLOv5](./pytorch_yolov5) -- [HuggingFace](./pytorch_huggingface) -- [TensorFlow MobileNet](./tensorflow_mobilenet) - -#### 即将发布 -- [ ] 更多自动化压缩应用示例 - -## 其他 - -- ACT可以自动处理常见的预测模型,如果有更特殊的改造需求,可以参考[ACT超参配置教程](./hyperparameter_tutorial.md)来进行单独配置压缩策略。 - -- 如果你发现任何关于ACT自动化压缩工具的问题或者是建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleSlim/issues)给我们提issues。同时欢迎贡献更多优秀模型,共建开源生态。 +# 模型自动化压缩工具ACT(Auto Compression Toolkit) + +------------------------------------------------------------------------------------------ + +

+ + + + + + + + + +

+ +

+ 特性 | + Benchmark | + 安装 | + 快速开始 | + 进阶使用 | + 社区交流 +

+ +## **简介** + +PaddleSlim推出全新自动化压缩工具(Auto Compression Toolkit, ACT),旨在通过Source-Free的方式,自动对预测模型进行压缩,压缩后模型可直接部署应用。 + +## **News** 📢 + +* 🎉 2022.7.6 [**PaddleSlim v2.3.0**](https://github.com/PaddlePaddle/PaddleSlim/releases/tag/v2.3.0)全新发布!目前已经在图像分类、目标检测、图像分割、NLP等20多个模型验证正向效果。 +* 🔥 2022.7.14 晚 20:30,PaddleSlim自动压缩天使用户沟通会。与开发者共同探讨模型压缩痛点问题,欢迎大家扫码报名入群获取会议链接。 + +
+ +
+ +## **特性** + +- **🚀『解耦训练代码』** :开发者无需了解或修改模型源码,直接使用导出的预测模型进行压缩; +- **🎛️『全流程自动优化』** :开发者简单配置即可启动压缩,ACT工具会自动优化得到最好预测模型; +- **📦『支持丰富压缩算法』** :ACT中提供了量化训练、蒸馏、结构化剪枝、非结构化剪枝、多种离线量化方法及超参搜索等等,可任意搭配使用 + +### **ACT核心思想** + +相比于传统手工压缩,自动化压缩的“自动”主要体现在4个方面:解耦训练代码、离线量化超参搜索、算法 + +

+ +

+ +### **模型压缩效果示例** + +ACT相比传统的模型压缩方法, + +- 代码量减少 50% 以上 +- 压缩精度与手工压缩基本持平。在 PP-YOLOE 模型上,效果还优于手动压缩, +- 自动化压缩后的推理性能收益与手工压缩持平,相比压缩前,推理速度可以提升1.4~7.1倍。 + +

+ +

+ +### **模型压缩效果Benchmark** + + + + + +| 模型类型 | model name | 压缩前
精度(Top1 Acc %) | 压缩后
精度(Top1 Acc %) | 压缩前
推理时延(ms) | 压缩后
推理时延(ms) | 推理
加速比 | 芯片 | +| ------------------------------- | ---------------------------- | ---------------------- | ---------------------- | ---------------- | ---------------- | ---------- | ----------------- | +| [图像分类](./image_classification) | MobileNetV1 | 70.90 | 70.57 | 33.15 | 13.64 | **2.43** | SDM865(骁龙865) | +| [图像分类](./image_classification) | ShuffleNetV2_x1_0 | 68.65 | 68.32 | 10.43 | 5.51 | **1.89** | SDM865(骁龙865) | +| [图像分类](./image_classification) | SqueezeNet1_0_infer | 59.60 | 59.45 | 35.98 | 16.96 | **2.12** | SDM865(骁龙865) | +| [图像分类](./image_classification) | PPLCNetV2_base | 76.86 | 76.43 | 36.50 | 15.79 | **2.31** | SDM865(骁龙865) | +| [图像分类](./image_classification) | ResNet50_vd | 79.12 | 78.74 | 3.19 | 0.92 | **3.47** | NVIDIA Tesla T4 | +| [语义分割](./semantic_segmentation) | PPHGNet_tiny | 79.59 | 79.20 | 2.82 | 0.98 | **2.88** | NVIDIA Tesla T4 | +| [语义分割](./semantic_segmentation) | PP-HumanSeg-Lite | 92.87 | 92.35 | 56.36 | 37.71 | **1.49** | SDM710 | +| [语义分割](./semantic_segmentation) | PP-LiteSeg | 77.04 | 76.93 | 1.43 | 1.16 | **1.23** | NVIDIA Tesla T4 | +| [语义分割](./semantic_segmentation) | HRNet | 78.97 | 78.90 | 8.19 | 5.81 | **1.41** | NVIDIA Tesla T4 | +| [语义分割](./semantic_segmentation) | UNet | 65.00 | 64.93 | 15.29 | 10.23 | **1.49** | NVIDIA Tesla T4 | +| NLP | PP-MiniLM | 72.81 | 72.44 | 128.01 | 17.97 | **7.12** | NVIDIA Tesla T4 | +| NLP | ERNIE 3.0-Medium | 73.09 | 72.40 | 29.25(fp16) | 19.61 | **1.49** | NVIDIA Tesla T4 | +| [目标检测](./pytorch_yolo_series) | YOLOv5s
(PyTorch) | 37.40 | 36.9 | 5.95 | 1.87 | **3.18** | NVIDIA Tesla T4 | +| [目标检测](./pytorch_yolo_series) | YOLOv6s
(PyTorch) | 42.4 | 41.3 | 9.06 | 1.83 | **4.95** | NVIDIA Tesla T4 | +| [目标检测](./pytorch_yolo_series) | YOLOv7
(PyTorch) | 51.1 | 50.8 | 26.84 | 4.55 | **5.89** | NVIDIA Tesla T4 | +| [目标检测](./detection) | PP-YOLOE-s | 43.1 | 42.6 | 6.51 | 2.12 | **3.07** | NVIDIA Tesla T4 | +| [图像分类](./image_classification) | MobileNetV1
(TensorFlow) | 71.0 | 70.22 | 30.45 | 15.86 | **1.92** | SDMM865(骁龙865) | + +- 备注:目标检测精度指标为mAP(0.5:0.95)精度测量结果。图像分割精度指标为IoU精度测量结果。 +- 更多飞桨模型应用示例及Benchmark可以参考:[图像分类](./image_classification),[目标检测](./detection),[语义分割](./semantic_segmentation),[自然语言处理](./nlp) +- 更多其它框架应用示例及Benchmark可以参考:[YOLOv5(PyTorch)](./pytorch_yolo_series),[YOLOv6(PyTorch)](./pytorch_yolo_series),[YOLOv7(PyTorch)](./pytorch_yolo_series),[HuggingFace(PyTorch)](./pytorch_huggingface),[MobileNet(TensorFlow)](./tensorflow_mobilenet)。 + +## **环境准备** + +- 安装PaddlePaddle >= 2.3.1:(可以参考[飞桨官网安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) + + ```shell + # CPU + pip install paddlepaddle --upgrade + # GPU + pip install paddlepaddle-gpu --upgrade + ``` + +- 安装PaddleSlim >=2.3.0: + + ```shell + pip install paddleslim + ``` + +## **快速开始** + +- **1. 准备模型及数据集** + +```shell +# 下载MobileNet预测模型 +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar +tar -xf MobileNetV1_infer.tar +# 下载ImageNet小型数据集 +wget https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz +tar -xf ILSVRC2012_data_demo.tar.gz +``` + +- **2.运行自动化压缩** + +```python +# 导入依赖包 +import paddle +from PIL import Image +from paddle.vision.datasets import DatasetFolder +from paddle.vision.transforms import transforms +from paddleslim.auto_compression import AutoCompression +paddle.enable_static() +# 定义DataSet +class ImageNetDataset(DatasetFolder): + def __init__(self, path, image_size=224): + super(ImageNetDataset, self).__init__(path) + normalize = transforms.Normalize( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) + self.transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(image_size), transforms.Transpose(), + normalize + ]) + + def __getitem__(self, idx): + img_path, _ = self.samples[idx] + return self.transform(Image.open(img_path).convert('RGB')) + + def __len__(self): + return len(self.samples) + +# 定义DataLoader +train_dataset = ImageNetDataset("./ILSVRC2012_data_demo/ILSVRC2012/train/") +image = paddle.static.data( + name='inputs', shape=[None] + [3, 224, 224], dtype='float32') +train_loader = paddle.io.DataLoader(train_dataset, feed_list=[image], batch_size=32, return_list=False) +# 开始自动压缩 +ac = AutoCompression( + model_dir="./MobileNetV1_infer", + model_filename="inference.pdmodel", + params_filename="inference.pdiparams", + save_dir="MobileNetV1_quant", + config={'Quantization': {}, "HyperParameterOptimization": {'ptq_algo': ['avg'], 'max_quant_count': 3}}, + train_dataloader=train_loader, + eval_dataloader=train_loader) +ac.compress() +``` + +- **3.精度测试** + + - 测试压缩前模型的精度: + + ```shell + CUDA_VISIBLE_DEVICES=0 python ./image_classification/eval.py + ### Eval Top1: 0.7171724759615384 + ``` + + - 测试量化模型的精度: + + ```shell + CUDA_VISIBLE_DEVICES=0 python ./image_classification/eval.py --model_dir='MobileNetV1_quant' + ### Eval Top1: 0.7166466346153846 + ``` + + - 量化后模型的精度相比量化前的模型几乎精度无损,由于是使用的超参搜索的方法来选择的量化参数,所以每次运行得到的量化模型精度会有些许波动。 + +- **4.推理速度测试** + + - 量化模型速度的测试依赖推理库的支持,所以确保安装的是带有TensorRT的PaddlePaddle。以下示例和展示的测试结果是基于Tesla V100、CUDA 10.2、python3.7得到的。 + + - 使用以下指令查看本地cuda版本,并且在[下载链接](https://paddleinference.paddlepaddle.org.cn/master/user_guides/download_lib.html#python)中下载对应cuda版本和对应python版本的paddlepaddle安装包。 + + ```shell + cat /usr/local/cuda/version.txt ### CUDA Version 10.2.89 + ### 10.2.89 为cuda版本号,可以根据这个版本号选择需要安装的带有TensorRT的PaddlePaddle安装包。 + ``` + + - 安装下载的whl包:(这里通过wget下载到的是python3.7、cuda10.2的PaddlePaddle安装包,若您的环境和示例环境不同,请依赖您自己机器的环境下载对应的安装包,否则运行示例代码会报错。) + + ``` + wget https://paddle-inference-lib.bj.bcebos.com/2.3.0/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4/paddlepaddle_gpu-2.3.0-cp37-cp37m-linux_x86_64.whl + pip install paddlepaddle_gpu-2.3.0-cp37-cp37m-linux_x86_64.whl --force-reinstall + ``` + + - 测试FP32模型的速度 + + ``` + python ./image_classification/infer.py + ### using tensorrt FP32 batch size: 1 time(ms): 0.6140608787536621 + ``` + + - 测试FP16模型的速度 + + ``` + python ./image_classification/infer.py --use_fp16=True + ### using tensorrt FP16 batch size: 1 time(ms): 0.5795984268188477 + ``` + + - 测试INT8模型的速度 + + ``` + python ./image_classification/infer.py --model_dir=./MobileNetV1_quant/ --use_int8=True + ### using tensorrt INT8 batch size: 1 time(ms): 0.5213963985443115 + ``` + + - **提示:** + + - DataLoader传入的数据集是待压缩模型所用的数据集,DataLoader继承自`paddle.io.DataLoader`。可以直接使用模型套件中的DataLoader,或者根据[paddle.io.DataLoader](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/io/DataLoader_cn.html#dataloader)自定义所需要的DataLoader。 + - 自动化压缩Config中定义量化、蒸馏、剪枝等压缩算法会合并执行,压缩策略有:量化+蒸馏,剪枝+蒸馏等等。示例中选择的配置为离线量化超参搜索。 + - 如果要压缩的模型参数是存储在各自分离的文件中,需要先通过[convert.py](./convert.py) 脚本将其保存成一个单独的二进制文件。 + +## 进阶使用 + +- ACT可以自动处理常见的预测模型,如果有更特殊的改造需求,可以参考[ACT超参配置教程](./hyperparameter_tutorial.md)来进行单独配置压缩策略。 + +## 社区交流 + +- 微信扫描二维码并填写问卷之后,加入技术交流群 + +
+ +
+ +- 如果你发现任何关于ACT自动化压缩工具的问题或者是建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleSlim/issues)给我们提issues。同时欢迎贡献更多优秀模型,共建开源生态。 + +## License + +本项目遵循[Apache-2.0开源协议](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/LICENSE) diff --git a/example/auto_compression/detection/README.md b/example/auto_compression/detection/README.md index c625eb1c0..6c35915a8 100644 --- a/example/auto_compression/detection/README.md +++ b/example/auto_compression/detection/README.md @@ -23,16 +23,16 @@ | 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 | | :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: | | PP-YOLOE-l | Base模型 | 640*640 | 50.9 | 11.2 | 7.7ms | - | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/detection/ppyoloe_crn_l_300e_coco.tar) | -| PP-YOLOE-l | 量化+蒸馏 | 640*640 | 50.6 | - | - | 6.7ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_crn_l_300e_coco_quant.tar) | +| PP-YOLOE-l | 量化蒸馏训练 | 640*640 | 50.6 | - | - | 6.7ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_crn_l_300e_coco_quant.tar) | - mAP的指标均在COCO val2017数据集中评测得到。 -- PP-YOLOE模型在Tesla V100的GPU环境下测试,并且开启TensorRT,测试脚本是[benchmark demo](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/deploy/python)。 +- PP-YOLOE模型在Tesla V100的GPU环境下测试,并且开启TensorRT,batch_size=1,包含NMS,测试脚本是[benchmark demo](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/deploy/python)。 ## 3. 自动压缩流程 #### 3.1 准备环境 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 +- PaddleSlim >= 2.3 - PaddleDet >= 2.4 - opencv-python @@ -46,8 +46,7 @@ pip install paddlepaddle-gpu 安装paddleslim: ```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim ``` 安装paddledet: @@ -64,6 +63,8 @@ pip install paddledet 如果数据集为非COCO格式数据,请修改[configs](./configs)中reader配置文件中的Dataset字段。 +以PP-YOLOE模型为例,如果已经准备好数据集,请直接修改[./configs/yolo_reader.yml]中`EvalDataset`的`dataset_dir`字段为自己数据集路径即可。 + #### 3.3 准备预测模型 预测模型的格式为:`model.pdmodel` 和 `model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。 @@ -85,13 +86,7 @@ python tools/export_model.py \ trt=True \ ``` -**注意**:PP-YOLOE导出时设置`trt=True`旨在优化在TensorRT上的性能,其他模型不需要设置`trt=True`。 - -或直接下载: -```shell -wget https://bj.bcebos.com/v1/paddle-slim-models/detection/ppyoloe_crn_l_300e_coco.tar -tar -xf ppyoloe_crn_l_300e_coco.tar -``` +**注意**:PP-YOLOE导出时设置`trt=True`旨在优化在TensorRT上的性能,如果没有使用TensorRT,或者其他模型都不需要设置`trt=True`。如果想快速体验,可以直接下载[PP-YOLOE-l导出模型](https://bj.bcebos.com/v1/paddle-slim-models/detection/ppyoloe_crn_l_300e_coco.tar)。 #### 3.4 自动压缩并产出模型 @@ -123,8 +118,6 @@ python eval.py --config_path=./configs/ppyoloe_l_qat_dis.yaml ## 4.预测部署 可以参考[PaddleDetection部署教程](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/deploy): -- GPU上量化模型开启TensorRT并设置trt_int8模式进行部署; -- CPU上可参考[X86 CPU部署量化模型教程](https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/docs/optimize/paddle_x86_cpu_int8.md); -- 移动端请直接使用[Paddle Lite Demo](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/deploy/lite)部署。 +- GPU上量化模型开启TensorRT并设置trt_int8模式进行部署。 ## 5.FAQ diff --git a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml index d38055213..1727e5337 100644 --- a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml +++ b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml @@ -9,12 +9,10 @@ Global: Distillation: alpha: 1.0 - loss: l2 - node: - - concat_15.tmp_0 - - concat_14.tmp_0 + loss: soft_label Quantization: + onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: @@ -22,12 +20,12 @@ Quantization: - depthwise_conv2d TrainConfig: - train_iter: 3000 + train_iter: 5000 eval_iter: 1000 learning_rate: type: CosineAnnealingDecay learning_rate: 0.00003 - T_max: 5000 + T_max: 6000 optimizer_builder: optimizer: type: SGD diff --git a/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml new file mode 100644 index 000000000..466c9c2b5 --- /dev/null +++ b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml @@ -0,0 +1,34 @@ + +Global: + reader_config: configs/yolo_reader.yml + input_list: ['image'] + arch: PPYOLOE # When export exclude_nms=True, need set arch: PPYOLOE + Evaluation: True + model_dir: ./ppyoloe_crn_s_300e_coco + model_filename: model.pdmodel + params_filename: model.pdiparams + +Distillation: + alpha: 1.0 + loss: soft_label + +Quantization: + onnx_format: true + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + train_iter: 5000 + eval_iter: 1000 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.00003 + T_max: 6000 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 4.0e-05 + diff --git a/example/auto_compression/detection/eval.py b/example/auto_compression/detection/eval.py index 56c3af15d..a4ea554c8 100644 --- a/example/auto_compression/detection/eval.py +++ b/example/auto_compression/detection/eval.py @@ -20,7 +20,7 @@ from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from keypoint_utils import keypoint_post_process @@ -76,8 +76,8 @@ def eval(): place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() exe = paddle.static.Executor(place) - val_program, feed_target_names, fetch_targets = paddle.fluid.io.load_inference_model( - global_config["model_dir"], + val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + global_config["model_dir"].rstrip('/'), exe, model_filename=global_config["model_filename"], params_filename=global_config["params_filename"]) diff --git a/example/auto_compression/detection/run.py b/example/auto_compression/detection/run.py index 632a38f2e..523f2439e 100644 --- a/example/auto_compression/detection/run.py +++ b/example/auto_compression/detection/run.py @@ -20,7 +20,7 @@ from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression from keypoint_utils import keypoint_post_process @@ -43,8 +43,6 @@ def argsparser(): type=str, default='gpu', help="which device used to compress.") - parser.add_argument( - '--eval', type=bool, default=False, help="whether to run evaluation.") return parser @@ -123,7 +121,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] reader_cfg = load_config(global_config['reader_config']) diff --git a/example/auto_compression/hyperparameter_tutorial.md b/example/auto_compression/hyperparameter_tutorial.md index 6d423a9f9..29ce9fefc 100644 --- a/example/auto_compression/hyperparameter_tutorial.md +++ b/example/auto_compression/hyperparameter_tutorial.md @@ -1,9 +1,9 @@ -# ACT超参详细教程 +# 1. ACT超参详细教程 -## 各压缩方法超参解析 +## 1.1 各压缩方法超参解析 -#### 配置定制量化方案 +### 1.1.1 量化(quantization) 量化参数主要设置量化比特数和量化op类型,其中量化op包含卷积层(conv2d, depthwise_conv2d)和全连接层(mul, matmul_v2)。以下为只量化卷积层的示例: ```yaml @@ -20,69 +20,148 @@ Quantization: moving_rate: 0.9 # 'moving_average_abs_max' 量化方式的衰减系数,默认 0.9。 for_tensorrt: false # 量化后的模型是否使用 TensorRT 进行预测。如果是的话,量化op类型为: TENSORRT_OP_TYPES 。默认值为False. is_full_quantize: false # 是否全量化 + onnx_format: false # 是否采用ONNX量化标准格式 ``` -#### 配置定制蒸馏策略 +以上配置项说明如下: + + +- use_pact: 是否开启PACT。一般情况下,开启PACT后,量化产出的模型精度会更高。算法原理请参考:[PACT: Parameterized Clipping Activation for Quantized Neural Networks](https://arxiv.org/abs/1805.06085) +- activation_bits: 激活量化bit数,可选1~8。默认为8。 +- weight_bits: 参数量化bit数,可选1~8。默认为8。 +- activation_quantize_type: 激活量化方式,可选 'abs_max' , 'range_abs_max' , 'moving_average_abs_max' 。如果使用 TensorRT 加载量化后的模型来预测,请使用 'range_abs_max' 或 'moving_average_abs_max' 。默认为 'moving_average_abs_max'。 +- weight_quantize_type: 参数量化方式。可选 'abs_max' , 'channel_wise_abs_max' , 'range_abs_max' , 'moving_average_abs_max' 。如果使用 TensorRT 加载量化后的模型来预测,请使用 'channel_wise_abs_max' 。 默认 'channel_wise_abs_max' 。 +- not_quant_pattern: 所有 `name_scope` 包含 'not_quant_pattern' 字符串的 op ,都不量化。 `name_scope` 设置方式请参考 [paddle.static.name_scope](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/name_scope_cn.html#name-scope)。 +- quantize_op_types:需要进行量化的OP类型。通过以下代码输出所有支持量化的OP类型: +``` +from paddleslim.quant.quanter import TRANSFORM_PASS_OP_TYPES,QUANT_DEQUANT_PASS_OP_TYPES +print(TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES) +``` +- dtype: 量化后的参数类型,默认 int8 , 目前仅支持 int8 +- window_size: 'range_abs_max' 量化方式的 window size ,默认10000。 +- moving_rate: 'moving_average_abs_max' 量化方式的衰减系数,默认 0.9。 +- for_tensorrt: 量化后的模型是否使用 TensorRT 进行预测。默认值为False. 通过以下代码,输出for_tensorrt=True时会量化到的OP: +``` +from paddleslim.quant.quanter import TENSORRT_OP_TYPES +print(TENSORRT_OP_TYPES) +``` + +- is_full_quantize: 是否量化所有可支持op类型。默认值为False. + + +### 1.1.2 知识蒸馏(knowledge distillation) 蒸馏参数主要设置蒸馏节点(`node`)和教师预测模型路径,如下所示: ```yaml Distillation: - # alpha: 蒸馏loss所占权重;可输入多个数值,支持不同节点之间使用不同的ahpha值 alpha: 1.0 - # loss: 蒸馏loss算法;可输入多个loss,支持不同节点之间使用不同的loss算法 loss: l2 - # node: 蒸馏节点,即某层输出的变量名称,可以选择: - # 1. 使用自蒸馏的话,蒸馏结点仅包含学生网络节点即可, 支持多节点蒸馏; - # 2. 使用其他蒸馏的话,蒸馏节点需要包含教师网络节点和对应的学生网络节点, - # 每两个节点组成一对,分别属于教师模型和学生模型,支持多节点蒸馏。 node: - relu_30.tmp_0 - # teacher_model_dir: 保存预测模型文件和预测模型参数文件的文件夹名称 + teacher_model_dir: ./inference_model # teacher_model_filename: 预测模型文件,格式为 *.pdmodel 或 __model__ teacher_model_filename: model.pdmodel # teacher_params_filename: 预测模型参数文件,格式为 *.pdiparams 或 __params__ teacher_params_filename: model.pdiparams ``` +以上配置项说明如下: + +- alpha: 蒸馏loss所占权重;可输入多个数值,支持不同节点之间使用不同的alpha值。 +- loss: 蒸馏loss算法;可输入多个loss,支持不同节点之间使用不同的loss算法。 可选"soft_label"、“l2”或“fsp”。也可自定义loss。具体定义和使用可参考[知识蒸馏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/dist/single_distiller_api.html)。 +- node: 蒸馏节点,即某层输出的变量名称。该选项设置方式分两种情况: -- 蒸馏loss目前支持的有:fsp,l2,soft_label,也可自定义loss。具体定义和使用可参考[知识蒸馏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/dist/single_distiller_api.html)。 + - 自蒸馏:教师模型为压缩前的推理模型,学生模型为压缩后的推理模型。‘node’ 可设置为变量名称的列表,ACT会自动在该列表中的变量上依次添加知识蒸馏loss。示例如下: + ``` + node: + - relu_30.tmp_0 + - relu_31.tmp_0 + ``` + 上述示例,会添加两个知识蒸馏loss。第一个loss的输入为教师模型和学生模型的 'relu_30.tmp_0',第二个loss的输入为教师模型和学生模型的'relu_31.tmp_0'。 + - 普通蒸馏:教师模型为任意模型,学生模型为压缩后的推理模型。‘node’ 可设置为变量名称的列表,列表中元素数量必须为偶数。示例如下: + ``` + node: + - teacher_relu_0.tmp_0 + - student_relu_0.tmp_0 + - teacher_relu_1.tmp_0 + - student_relu_1.tmp_0 + ``` -#### 配置定制结构化稀疏策略 + 上述示例,会添加两个知识蒸馏loss。第一个loss的输入为教师模型的变量“teacher_relu_0.tmp_0”和学生模型的变量“student_relu_0.tmp_0”,第二个loss的输入为教师模型的变量“teacher_relu_1.tmp_0”和学生模型的“student_relu_1.tmp_0”。 + + 如果不设置`node`,则分别取教师模型和学生模型的最后一个带参数的层的输出,组成知识蒸馏loss. + +- teacher_model_dir: 用于监督压缩后模型训练的教师模型所在的路径。如果不设置该选项,则使用压缩前的模型做为教师模型。 +- teacher_model_filename: 教师模型的模型文件名称,格式为 *.pdmodel 或 __model__。仅当设置`teacher_model_dir`后生效。 +- teacher_params_filename: 教师模型的参数文件名称,格式为 *.pdiparams 或 __params__。仅当设置`teacher_model_dir`后生效。 + + +### 1.1.3 结构化稀疏(sparsity) 结构化稀疏参数设置如下所示: ```yaml ChannelPrune: - # pruned_ratio: 裁剪比例 pruned_ratio: 0.25 - # prune_params_name: 需要裁剪的参数名字 prune_params_name: - conv1_weights - # criterion: 评估一个卷积层内通道重要性所参考的指标 criterion: l1_norm ``` -- criterion目前支持的有:l1_norm , bn_scale , geometry_median。具体定义和使用可参考[结构化稀疏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/prune/prune_api.html)。 -#### 配置定制ASP半结构化稀疏策略 +- pruned_ratio: 每个卷积层的通道数被剪裁的比例。 +- prune_params_name: 待剪裁的卷积层的权重名称。通过以下脚本获得推理模型中所有卷积层的权重名称: + +``` +import paddle +paddle.enable_static() +model_dir="./inference_model" +exe = paddle.static.Executor(paddle.CPUPlace()) +[inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model(model_dir, exe)) +for var_ in inference_program.list_vars(): + if var_.persistable and "conv2d" in var_.name: + print(f"{var_.name}") +``` + +或者,使用[Netron工具](https://netron.app/) 可视化`*.pdmodel`模型文件,选择合适的卷积层进行剪裁。 + +- criterion: 评估卷积通道重要性的指标。可选 “l1_norm” , “bn_scale” , “geometry_median”。具体定义和使用可参考[结构化稀疏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/prune/prune_api.html)。 + +### 1.1.4 ASP半结构化稀疏 半结构化稀疏参数设置如下所示: ```yaml ASPPrune: - # prune_params_name: 需要裁剪的参数名字 prune_params_name: - conv1_weights ``` -#### 配置定制针对Transformer结构的结构化剪枝策略 +- prune_params_name: 待剪裁的卷积层的权重名称。通过以下脚本获得推理模型中所有卷积层的权重名称: + +``` +import paddle +paddle.enable_static() +model_dir="./inference_model" +exe = paddle.static.Executor(paddle.CPUPlace()) +[inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model(model_dir, exe)) +for var_ in inference_program.list_vars(): + if var_.persistable and "conv2d" in var_.name: + print(f"{var_.name}") +``` + +或者,使用[Netron工具](https://netron.app/) 可视化`*.pdmodel`模型文件,选择合适的卷积层进行剪裁。 + +### 1.1.5 Transformer结构化剪枝 针对Transformer结构的结构化剪枝参数设置如下所示: ```yaml TransformerPrune: - # pruned_ratio: 每个全链接层的裁剪比例 pruned_ratio: 0.25 ``` +- pruned_ratio: 每个全链接层的被剪裁的比例。 -#### 配置定制非结构化稀疏策略 +### 1.1.6 非结构化稀疏策略 非结构化稀疏参数设置如下所示: ```yaml @@ -122,7 +201,7 @@ UnstructurePrune: - local_sparsity 表示剪裁比例(ratio)应用的范围,仅在 'ratio' 模式生效。local_sparsity 开启时意味着每个参与剪裁的参数矩阵稀疏度均为 'ratio', 关闭时表示只保证模型整体稀疏度达到'ratio',但是每个参数矩阵的稀疏度可能存在差异。各个矩阵稀疏度保持一致时,稀疏加速更显著。 - 更多非结构化稀疏的参数含义详见[非结构化稀疏API文档](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/dygraph/pruners/unstructured_pruner.rst) -#### 配置训练超参 +### 1.1.7 训练超参 训练参数主要设置学习率、训练次数(epochs)和优化器等。 ```yaml @@ -143,12 +222,69 @@ TrainConfig: boundaries: [4500] # 设置策略参数 values: [0.005, 0.0005] # 设置策略参数 ``` -## 其他参数配置 +## 1.2 FAQ -#### 1.自动蒸馏效果不理想,怎么自主选择蒸馏节点? +### 1.自动蒸馏效果不理想,怎么自主选择蒸馏节点? 首先使用[Netron工具](https://netron.app/) 可视化`model.pdmodel`模型文件,选择模型中某些层输出Tensor名称,对蒸馏节点进行配置。(一般选择Backbone或网络的输出等层进行蒸馏)
+ +### 2.如何获得推理模型中的OP类型 + +执行以下代码获取推理模型中的OP类型,其中`model_dir`为推理模型存储路径。 + +``` +import paddle +paddle.enable_static() +model_dir="./inference_model" +exe = paddle.static.Executor(paddle.CPUPlace()) +inference_program, _, _ = ( + paddle.static.load_inference_model(model_dir, exe)) +op_types = {} +for block in inference_program.blocks: + for op in block.ops: + op_types[op.type] = 1 +print(f"Operators in inference model:\n{op_types.keys()}") +``` + +所用飞桨框架接口: + +- [load_inference_model](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/load_inference_model_cn.html#load-inference-model) +- [Program](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/Program_cn.html#program) +- [Executor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/Executor_cn.html#executor) + +### 3. 量化支持对哪些OP进行量化 + +执行以下代码,查看当前PaddlePaddle版本的量化功能所支持的OP类型: +``` +from paddle.fluid.contrib.slim.quantization.utils import _weight_supported_quantizable_op_type, _act_supported_quantizable_op_type +print(f"_supported_quantizable_op_type:\n{_weight_supported_quantizable_op_type}") +print(f"_supported_quantizable_op_type:\n{_act_supported_quantizable_op_type}") +``` + +### 4. 如何设置推理模型中OP的‘name_scope’属性 + +以下代码,将输出变量为`conv2d_52.tmp_0`的OP的`name_scope`设置为'skip_quant': +``` +import paddle +paddle.enable_static() +model_dir="./original_model" +exe = paddle.static.Executor(paddle.CPUPlace()) +[inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model(model_dir, exe)) +skips = ['conv2d_52.tmp_0'] +for block in inference_program.blocks: + for op in block.ops: + if op.output_arg_names[0] in skips: + op._set_attr("name_scope", "skip_quant") + +feed_vars = [] +for var_ in inference_program.list_vars(): + if var_.name in feed_target_names: + feed_vars.append(var_) +paddle.static.save_inference_model("./infer_model", feed_vars, fetch_targets, exe, program=inference_program) + +``` diff --git a/example/auto_compression/image_classification/README.md b/example/auto_compression/image_classification/README.md index a6a08a321..14c70fbd4 100644 --- a/example/auto_compression/image_classification/README.md +++ b/example/auto_compression/image_classification/README.md @@ -21,28 +21,30 @@ ### PaddleClas模型 -| 模型 | 策略 | Top-1 Acc | GPU 耗时(ms) | ARM CPU 耗时(ms) | -|:------:|:------:|:------:|:------:|:------:| -| MobileNetV1 | Baseline | 70.90 | - | 33.15 | -| MobileNetV1 | 量化+蒸馏 | 70.49 | - | 13.64 | -| ResNet50_vd | Baseline | 79.12 | 3.19 | - | -| ResNet50_vd | 量化+蒸馏 | 78.55 | 0.92 | - | -| ShuffleNetV2_x1_0 | Baseline | 68.65 | - | 10.43 | -| ShuffleNetV2_x1_0 | 量化+蒸馏 | 67.78 | - | 5.51 | -| SqueezeNet1_0_infer | Baseline | 59.60 | - | 35.98 | -| SqueezeNet1_0_infer | 量化+蒸馏 | 59.13 | - | 16.96 | -| PPLCNetV2_base | Baseline | 76.86 | - | 36.50 | -| PPLCNetV2_base | 量化+蒸馏 | 76.43 | - | 15.79 | -| PPHGNet_tiny | Baseline | 79.59 | 2.82 | - | -| PPHGNet_tiny | 量化+蒸馏 | 79.19 | 0.98 | - | -| EfficientNetB0 | Baseline | 77.02 | 1.95 | - | -| EfficientNetB0 | 量化+蒸馏 | 73.61 | 1.44 | - | -| GhostNet_x1_0 | Baseline | 74.02 | 2.93 | - | -| GhostNet_x1_0 | 量化+蒸馏 | 71.11 | 1.03 | - | -| InceptionV3 | Baseline | 79.14 | 4.79 | - | -| InceptionV3 | 量化+蒸馏 | 73.16 | 1.47 | - | -| MobileNetV3_large_x1_0 | Baseline | 75.32 | - | 16.62 | -| MobileNetV3_large_x1_0 | 量化+蒸馏 | 68.84 | - | 9.85 | +| 模型 | 策略 | Top-1 Acc | GPU 耗时(ms) | ARM CPU 耗时(ms) | 配置文件 | Inference模型 | +|:------:|:------:|:------:|:------:|:------:|:------:|:------:| +| MobileNetV1 | Baseline | 70.90 | - | 33.15 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar) | +| MobileNetV1 | 量化+蒸馏 | 70.57 | - | 13.64 | [Config](./configs/MobileNetV1/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV1_QAT.tar) | +| ResNet50_vd | Baseline | 79.12 | 3.19 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar) | +| ResNet50_vd | 量化+蒸馏 | 78.74 | 0.92 | - | [Config](./configs/ResNet50_vd/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/ResNet50_vd_QAT.tar) | +| ShuffleNetV2_x1_0 | Baseline | 68.65 | - | 10.43 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ShuffleNetV2_x1_0_infer.tar) | +| ShuffleNetV2_x1_0 | 量化+蒸馏 | 68.32 | - | 5.51 | [Config](./configs/ShuffleNetV2_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/ShuffleNetV2_x1_0_QAT.tar) | +| SqueezeNet1_0 | Baseline | 59.60 | - | 35.98 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SqueezeNet1_0_infer.tar) | +| SqueezeNet1_0 | 量化+蒸馏 | 59.45 | - | 16.96 | [Config](./configs/SqueezeNet1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/SqueezeNet1_0_QAT.tar) | +| PPLCNetV2_base | Baseline | 76.86 | - | 36.50 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPLCNetV2_base_infer.tar) | +| PPLCNetV2_base | 量化+蒸馏 | 76.43 | - | 15.79 | [Config](./configs/PPLCNetV2_base/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/PPLCNetV2_base_QAT.tar) | +| PPHGNet_tiny | Baseline | 79.59 | 2.82 | - | - |[Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPHGNet_tiny_infer.tar) | +| PPHGNet_tiny | 量化+蒸馏 | 79.20 | 0.98 | - | [Config](./configs/PPHGNet_tiny/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/PPHGNet_tiny_QAT.tar) | +| InceptionV3 | Baseline | 79.14 | 4.79 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/InceptionV3_infer.tar) | +| InceptionV3 | 量化+蒸馏 | 78.32 | 1.47 | - | [Config](./configs/InceptionV3/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/InceptionV3_QAT.tar) | +| EfficientNetB0 | Baseline | 77.02 | 1.95 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/EfficientNetB0_infer.tar) | +| EfficientNetB0 | 量化+蒸馏 | 75.39 | 1.44 | - | [Config](./configs/EfficientNetB0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/EfficientNetB0_QAT.tar) | +| GhostNet_x1_0 | Baseline | 74.02 | 2.93 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/GhostNet_x1_0_infer.tar) | +| GhostNet_x1_0 | 量化+蒸馏 | 72.62 | 1.03 | - | [Config](./configs/GhostNet_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/GhostNet_x1_0_QAT.tar) | +| MobileNetV3_large_x1_0 | Baseline | 75.32 | - | 16.62 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_infer.tar) | +| MobileNetV3_large_x1_0 | 量化+蒸馏 | 74.41 | - | 9.85 | [Config](./configs/MobileNetV3_large_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV3_large_x1_0_QAT.tar) | +| MobileNetV3_large_x1_0_ssld | Baseline | 78.96 | - | 16.62 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_ssld_infer.tar) | +| MobileNetV3_large_x1_0_ssld | 量化+蒸馏 | 77.17 | - | 9.85 | [Config](./configs/MobileNetV3_large_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV3_large_x1_0_ssld_QAT.tar) | - ARM CPU 测试环境:`SDM865(4xA77+4xA55)` - Nvidia GPU 测试环境: @@ -56,7 +58,7 @@ - python >= 3.6 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 +- PaddleSlim >= 2.3 安装paddlepaddle: ```shell @@ -68,8 +70,7 @@ pip install paddlepaddle-gpu 安装paddleslim: ```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim ``` #### 3.2 准备数据集 @@ -119,22 +120,23 @@ python -m paddle.distributed.launch run.py --save_dir='./save_quant_mobilev1/' - 准备好inference模型后,使用以下命令进行预测: ```shell -python infer.py -c configs/infer.yaml +python infer.py --config_path="configs/infer.yaml" ``` 在配置文件```configs/infer.yaml```中有以下字段用于配置预测参数: -- ```Global.infer_imgs```:待预测的图片文件路径 -- ```Global.inference_model_dir```:inference 模型文件所在目录,该目录下需要有文件 .pdmodel 和 .pdiparams 两个文件 -- ```Global.use_tensorrt```:是否使用 TesorRT 预测引擎 -- ```Global.use_gpu```:是否使用 GPU 预测 -- ```Global.enable_mkldnn```:是否启用```MKL-DNN```加速库,注意```enable_mkldnn```与```use_gpu```同时为```True```时,将忽略```enable_mkldnn```,而使用```GPU```预测 -- ```Global.use_fp16```:是否启用```FP16``` -- ```PreProcess```:用于数据预处理配置 -- ```PostProcess```:由于后处理配置 -- ```PostProcess.Topk.class_id_map_file```:数据集 label 的映射文件,默认为```./images/imagenet1k_label_list.txt```,该文件为 PaddleClas 所使用的 ImageNet 数据集 label 映射文件 +- ```model_dir```:inference 模型文件所在目录,该目录下需要有文件 .pdmodel 和 .pdiparams 两个文件 +- ```model_filename```:model_dir文件夹下的模型文件名称 +- ```params_filename```:model_dir文件夹下的参数文件名称 +- ```batch_size```:预测一个batch的大小 +- ```image_size```:输入图像的大小 +- ```use_tensorrt```:是否使用 TesorRT 预测引擎 +- ```use_gpu```:是否使用 GPU 预测 +- ```enable_mkldnn```:是否启用```MKL-DNN```加速库,注意```enable_mkldnn```与```use_gpu```同时为```True```时,将忽略```enable_mkldnn```,而使用```GPU```预测 +- ```use_fp16```:是否启用```FP16``` +- ```use_int8```:是否启用```INT8``` 注意: -- 请注意模型的输入数据尺寸,部分模型需要修改参数:```PreProcess.resize_short```, ```PreProcess.resize``` +- 请注意模型的输入数据尺寸,如InceptionV3输入尺寸为299,部分模型需要修改参数:```image_size``` - 如果希望提升评测模型速度,使用 ```GPU``` 评测时,建议开启 ```TensorRT``` 加速预测,使用 ```CPU``` 评测时,建议开启 ```MKL-DNN``` 加速预测 - 若使用 TesorRT 预测引擎,需安装 ```WITH_TRT=ON``` 的Paddle,下载地址:[Python预测库](https://paddleinference.paddlepaddle.org.cn/master/user_guides/download_lib.html#python) diff --git a/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml b/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml index 608e091ce..f93f9dcf5 100644 --- a/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml index ef715027e..1a13ce1e4 100644 --- a/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant @@ -33,3 +33,4 @@ TrainConfig: optimizer: type: Momentum weight_decay: 0.00002 + origin_metric: 0.7402 diff --git a/example/auto_compression/image_classification/configs/InceptionV3/prune_dis.yaml b/example/auto_compression/image_classification/configs/InceptionV3/prune_dis.yaml index a7edd881e..1b57873fa 100644 --- a/example/auto_compression/image_classification/configs/InceptionV3/prune_dis.yaml +++ b/example/auto_compression/image_classification/configs/InceptionV3/prune_dis.yaml @@ -4,6 +4,8 @@ Global: model_filename: inference.pdmodel params_filename: inference.pdiparams batch_size: 32 + resize_size: 320 + crop_size: 299 data_dir: /ILSVRC2012 Distillation: diff --git a/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml b/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml index 61fb42bbb..e8b1630aa 100644 --- a/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml @@ -1,19 +1,21 @@ Global: input_name: x - model_dir: InceptionV3_infer + model_dir: save_quant_inception model_filename: inference.pdmodel params_filename: inference.pdiparams batch_size: 32 - data_dir: /ILSVRC2012 + resize_size: 320 + img_size: 299 + data_dir: /workspace/dataset/ILSVRC2012 Distillation: - alpha: 10.0 + alpha: 1.0 loss: l2 node: - softmax_1.tmp_0 Quantization: is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant @@ -21,6 +23,7 @@ Quantization: - conv2d - depthwise_conv2d weight_bits: 8 + TrainConfig: epochs: 1 eval_iter: 500 diff --git a/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml b/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml index e22a450b6..b0fff1454 100644 --- a/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml @@ -4,7 +4,7 @@ Global: model_filename: inference.pdmodel params_filename: inference.pdiparams batch_size: 32 - data_dir: /ILSVRC2012 + data_dir: ./ILSVRC2012 Distillation: alpha: 1.0 @@ -16,7 +16,7 @@ Quantization: activation_bits: 8 is_full_quantize: false activation_quantize_type: moving_average_abs_max - weight_quantize_type: abs_max + weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant quantize_op_types: diff --git a/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml index a890a6395..a1fbef1bc 100644 --- a/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml @@ -12,10 +12,10 @@ Distillation: node: - softmax_0.tmp_0 Quantization: + use_pact: true activation_bits: 8 is_full_quantize: false - use_pact: true - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant @@ -25,10 +25,10 @@ Quantization: weight_bits: 8 TrainConfig: epochs: 1 - eval_iter: 2000 + eval_iter: 500 learning_rate: type: CosineAnnealingDecay - learning_rate: 0.0001 + learning_rate: 0.015 optimizer_builder: optimizer: type: Momentum diff --git a/example/auto_compression/image_classification/configs/PPHGNet_tiny/prune_dis.yaml b/example/auto_compression/image_classification/configs/PPHGNet_tiny/prune_dis.yaml new file mode 100644 index 000000000..3d886b6d9 --- /dev/null +++ b/example/auto_compression/image_classification/configs/PPHGNet_tiny/prune_dis.yaml @@ -0,0 +1,37 @@ +Global: + input_name: x + model_dir: PPHGNet_tiny_infer + model_filename: inference.pdmodel + params_filename: inference.pdiparams + batch_size: 32 + data_dir: /ILSVRC2012 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - softmax_1.tmp_0 +UnstructurePrune: + prune_strategy: gmp + prune_mode: ratio + ratio: 0.75 + gmp_config: + stable_iterations: 0 + pruning_iterations: 4500 + tunning_iterations: 4500 + resume_iteration: -1 + pruning_steps: 100 + initial_ratio: 0.15 + prune_params_type: conv1x1_only + local_sparsity: True +TrainConfig: + epochs: 1 + eval_iter: 500 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.015 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7959 \ No newline at end of file diff --git a/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml new file mode 100644 index 000000000..6c2462f91 --- /dev/null +++ b/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml @@ -0,0 +1,36 @@ +Global: + input_name: x + model_dir: PPHGNet_tiny_infer + model_filename: inference.pdmodel + params_filename: inference.pdiparams + batch_size: 32 + data_dir: /ILSVRC2012 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - softmax_1.tmp_0 +Quantization: + use_pact: true + activation_bits: 8 + is_full_quantize: false + activation_quantize_type: moving_average_abs_max + weight_quantize_type: channel_wise_abs_max + not_quant_pattern: + - skip_quant + quantize_op_types: + - conv2d + - depthwise_conv2d + weight_bits: 8 +TrainConfig: + epochs: 1 + eval_iter: 500 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.015 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7959 diff --git a/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml index e9097d26f..19fdd97aa 100644 --- a/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml index 109dec68b..2754b5d8a 100644 --- a/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml b/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml index e99dddda1..f936cc40d 100644 --- a/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml @@ -4,7 +4,7 @@ Global: model_filename: inference.pdmodel params_filename: inference.pdiparams batch_size: 32 - data_dir: /ILSVRC2012 + data_dir: ./ILSVRC2012 Distillation: alpha: 1.0 @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml index 1b1cd8b86..815dccaed 100644 --- a/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml index ed240ec0f..33d7cc3f0 100644 --- a/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml @@ -11,13 +11,10 @@ Distillation: loss: l2 node: - softmax_0.tmp_0 - teacher_model_dir: SqueezeNet1_0_infer - teacher_model_filename: inference.pdmodel - teacher_params_filename: inference.pdiparams Quantization: activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml b/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml index 4941ee92a..148085943 100644 --- a/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml @@ -15,7 +15,7 @@ Quantization: use_pact: true activation_bits: 8 is_full_quantize: false - activation_quantize_type: range_abs_max + activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max not_quant_pattern: - skip_quant diff --git a/example/auto_compression/image_classification/configs/eval.yaml b/example/auto_compression/image_classification/configs/eval.yaml new file mode 100644 index 000000000..ae1987620 --- /dev/null +++ b/example/auto_compression/image_classification/configs/eval.yaml @@ -0,0 +1,7 @@ +model_dir: './MobileNetV1_infer' +model_filename: 'inference.pdmodel' +params_filename: "inference.pdiparams" +batch_size: 128 +data_dir: './ILSVRC2012_data_demo/ILSVRC2012/' +img_size: 224 +resize_size: 256 diff --git a/example/auto_compression/image_classification/configs/infer.yaml b/example/auto_compression/image_classification/configs/infer.yaml index 945575015..f283fc804 100644 --- a/example/auto_compression/image_classification/configs/infer.yaml +++ b/example/auto_compression/image_classification/configs/infer.yaml @@ -1,40 +1,16 @@ -Global: - infer_imgs: "./images/ILSVRC2012_val_00000010.jpeg" - inference_model_dir: "./MobileNetV1_infer" - model_filename: "inference.pdmodel" - params_filename: "inference.pdiparams" - batch_size: 1 - use_gpu: True - enable_mkldnn: True - cpu_num_threads: 10 - enable_benchmark: True - use_fp16: False - use_int8: False - ir_optim: True - use_tensorrt: True - gpu_mem: 8000 - enable_profile: False - benchmark: True - -PreProcess: - transform_ops: - - ResizeImage: - resize_short: 256 - - CropImage: - size: 224 - - NormalizeImage: - scale: 0.00392157 - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - channel_num: 3 - - ToCHWImage: - -PostProcess: - main_indicator: Topk - Topk: - topk: 5 - class_id_map_file: "./images/imagenet1k_label_list.txt" - SavePreLabel: - save_dir: ./pre_label/ +model_dir: "./MobileNetV1_infer" +model_filename: "inference.pdmodel" +params_filename: "inference.pdiparams" +batch_size: 1 +image_size: 224 +use_gpu: True +enable_mkldnn: True +cpu_num_threads: 10 +enable_benchmark: True +use_fp16: False +use_int8: False +ir_optim: True +use_tensorrt: True +gpu_mem: 8000 +enable_profile: False diff --git a/example/auto_compression/image_classification/eval.py b/example/auto_compression/image_classification/eval.py index 5d8a327aa..9cd9b4a3b 100644 --- a/example/auto_compression/image_classification/eval.py +++ b/example/auto_compression/image_classification/eval.py @@ -14,8 +14,6 @@ import os import sys -sys.path[0] = os.path.join( - os.path.dirname("__file__"), os.path.pardir, os.path.pardir) import argparse import functools from functools import partial @@ -23,9 +21,9 @@ import numpy as np import paddle import paddle.nn as nn -from paddle.io import Dataset, BatchSampler, DataLoader -import imagenet_reader as reader -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddle.io import DataLoader +from imagenet_reader import ImageNetDataset +from paddleslim.common import load_config as load_slim_config def argsparser(): @@ -33,15 +31,29 @@ def argsparser(): parser.add_argument( '--config_path', type=str, - default=None, - help="path of compression strategy config.", - required=True) + default='./image_classification/configs/eval.yaml', + help="path of compression strategy config.") + parser.add_argument( + '--model_dir', + type=str, + default='./MobileNetV1_infer', + help='model directory') + return parser -def eval_reader(data_dir, batch_size): - val_reader = paddle.batch( - reader.val(data_dir=data_dir), batch_size=batch_size) - return val_reader +def eval_reader(data_dir, batch_size, crop_size, resize_size): + val_reader = ImageNetDataset( + mode='val', + data_dir=data_dir, + crop_size=crop_size, + resize_size=resize_size) + val_loader = DataLoader( + val_reader, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=False, + num_workers=0) + return val_loader def eval(): @@ -55,19 +67,16 @@ def eval(): params_filename=global_config["params_filename"]) print('Loaded model from: {}'.format(global_config["model_dir"])) - val_reader = eval_reader(data_dir, batch_size=global_config['batch_size']) - image = paddle.static.data( - name=global_config['input_name'], - shape=[None, 3, 224, 224], - dtype='float32') - label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') + val_loader = eval_reader( + data_dir, + batch_size=global_config['batch_size'], + crop_size=img_size, + resize_size=resize_size) results = [] - print('Evaluating... It will take a while. Please wait...') - for batch_id, data in enumerate(val_reader()): - # top1_acc, top5_acc - image = np.array([[d[0]] for d in data]) - image = image.reshape((len(data), 3, 224, 224)) - label = [[d[1]] for d in data] + print('Evaluating...') + for batch_id, (image, label) in enumerate(val_loader): + image = np.array(image) + label = np.array(label).astype('int64') pred = exe.run(val_program, feed={feed_target_names[0]: image}, fetch_list=fetch_targets) @@ -87,13 +96,21 @@ def eval(): return result[0] -def main(): +def main(args): global global_config - all_config = load_slim_config(args.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" - global_config = all_config["Global"] + global_config = load_slim_config(args.config_path) + global data_dir data_dir = global_config['data_dir'] + if args.model_dir != global_config['model_dir']: + global_config['model_dir'] = args.model_dir + + global img_size, resize_size + img_size = int(global_config[ + 'img_size']) if 'img_size' in global_config else 224 + resize_size = int(global_config[ + 'resize_size']) if 'resize_size' in global_config else 256 + result = eval() print('Eval Top1:', result) @@ -102,4 +119,4 @@ def main(): paddle.enable_static() parser = argsparser() args = parser.parse_args() - main() + main(args) diff --git a/example/auto_compression/image_classification/imagenet_reader.py b/example/auto_compression/image_classification/imagenet_reader.py new file mode 100644 index 000000000..609bfba16 --- /dev/null +++ b/example/auto_compression/image_classification/imagenet_reader.py @@ -0,0 +1,245 @@ +import os +import math +import random +import functools +import numpy as np +import paddle +from PIL import Image, ImageEnhance +from paddle.io import Dataset + +random.seed(0) +np.random.seed(0) + +DATA_DIM = 224 +RESIZE_DIM = 256 + +THREAD = 16 +BUF_SIZE = 10240 + +DATA_DIR = 'data/ILSVRC2012/' +DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR) + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) // 2 + h_start = (height - size) // 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = np.random.randint(0, img.size[0] - w + 1) + j = np.random.randint(0, img.size[1] - h + 1) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.LANCZOS) + return img + + +def rotate_image(img): + angle = np.random.randint(-10, 11) + img = img.rotate(angle) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + np.random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image(sample, mode, color_jitter, rotate, crop_size, resize_size): + img_path = sample[0] + + try: + img = Image.open(img_path) + except: + print(img_path, "not exists!") + return None + if mode == 'train': + if rotate: img = rotate_image(img) + img = random_crop(img, crop_size) + else: + img = resize_short(img, target_size=resize_size) + img = crop_image(img, target_size=crop_size, center=True) + if mode == 'train': + if color_jitter: + img = distort_color(img) + if np.random.randint(0, 2) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + if mode == 'train' or mode == 'val': + return img, sample[1] + elif mode == 'test': + return [img] + + +def _reader_creator(file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False, + data_dir=DATA_DIR, + batch_size=1): + def reader(): + try: + with open(file_list) as flist: + full_lines = [line.strip() for line in flist] + if shuffle: + np.random.shuffle(full_lines) + lines = full_lines + for line in lines: + if mode == 'train' or mode == 'val': + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + yield img_path, int(label) + elif mode == 'test': + img_path = os.path.join(data_dir, line) + yield [img_path] + except Exception as e: + print("Reader failed!\n{}".format(str(e))) + os._exit(1) + + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def train(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'train_list.txt') + return _reader_creator( + file_list, + 'train', + shuffle=True, + color_jitter=False, + rotate=False, + data_dir=data_dir) + + +def val(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'val_list.txt') + return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir) + + +def test(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'test_list.txt') + return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir) + + +class ImageNetDataset(Dataset): + def __init__(self, + data_dir=DATA_DIR, + mode='train', + crop_size=DATA_DIM, + resize_size=RESIZE_DIM): + super(ImageNetDataset, self).__init__() + self.data_dir = data_dir + self.crop_size = crop_size + self.resize_size = resize_size + train_file_list = os.path.join(data_dir, 'train_list.txt') + val_file_list = os.path.join(data_dir, 'val_list.txt') + test_file_list = os.path.join(data_dir, 'test_list.txt') + self.mode = mode + if mode == 'train': + with open(train_file_list) as flist: + full_lines = [line.strip() for line in flist] + np.random.shuffle(full_lines) + lines = full_lines + self.data = [line.split() for line in lines] + else: + with open(val_file_list) as flist: + lines = [line.strip() for line in flist] + self.data = [line.split() for line in lines] + + def __getitem__(self, index): + sample = self.data[index] + data_path = os.path.join(self.data_dir, sample[0]) + if self.mode == 'train': + data, label = process_image( + [data_path, sample[1]], + mode='train', + color_jitter=False, + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) + return data, np.array([label]).astype('int64') + elif self.mode == 'val': + data, label = process_image( + [data_path, sample[1]], + mode='val', + color_jitter=False, + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) + return data, np.array([label]).astype('int64') + elif self.mode == 'test': + data = process_image( + [data_path, sample[1]], + mode='test', + color_jitter=False, + rotate=False, + crop_size=self.crop_size, + resize_size=self.resize_size) + return data + + def __len__(self): + return len(self.data) diff --git a/example/auto_compression/image_classification/images/ILSVRC2012_val_00000010.jpeg b/example/auto_compression/image_classification/images/ILSVRC2012_val_00000010.jpeg deleted file mode 100644 index 6fcafb4d9..000000000 Binary files a/example/auto_compression/image_classification/images/ILSVRC2012_val_00000010.jpeg and /dev/null differ diff --git a/example/auto_compression/image_classification/images/imagenet1k_label_list.txt b/example/auto_compression/image_classification/images/imagenet1k_label_list.txt deleted file mode 100644 index 376e18021..000000000 --- a/example/auto_compression/image_classification/images/imagenet1k_label_list.txt +++ /dev/null @@ -1,1000 +0,0 @@ -0 tench, Tinca tinca -1 goldfish, Carassius auratus -2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -3 tiger shark, Galeocerdo cuvieri -4 hammerhead, hammerhead shark -5 electric ray, crampfish, numbfish, torpedo -6 stingray -7 cock -8 hen -9 ostrich, Struthio camelus -10 brambling, Fringilla montifringilla -11 goldfinch, Carduelis carduelis -12 house finch, linnet, Carpodacus mexicanus -13 junco, snowbird -14 indigo bunting, indigo finch, indigo bird, Passerina cyanea -15 robin, American robin, Turdus migratorius -16 bulbul -17 jay -18 magpie -19 chickadee -20 water ouzel, dipper -21 kite -22 bald eagle, American eagle, Haliaeetus leucocephalus -23 vulture -24 great grey owl, great gray owl, Strix nebulosa -25 European fire salamander, Salamandra salamandra -26 common newt, Triturus vulgaris -27 eft -28 spotted salamander, Ambystoma maculatum -29 axolotl, mud puppy, Ambystoma mexicanum -30 bullfrog, Rana catesbeiana -31 tree frog, tree-frog -32 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -33 loggerhead, loggerhead turtle, Caretta caretta -34 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -35 mud turtle -36 terrapin -37 box turtle, box tortoise -38 banded gecko -39 common iguana, iguana, Iguana iguana -40 American chameleon, anole, Anolis carolinensis -41 whiptail, whiptail lizard -42 agama -43 frilled lizard, Chlamydosaurus kingi -44 alligator lizard -45 Gila monster, Heloderma suspectum -46 green lizard, Lacerta viridis -47 African chameleon, Chamaeleo chamaeleon -48 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -49 African crocodile, Nile crocodile, Crocodylus niloticus -50 American alligator, Alligator mississipiensis -51 triceratops -52 thunder snake, worm snake, Carphophis amoenus -53 ringneck snake, ring-necked snake, ring snake -54 hognose snake, puff adder, sand viper -55 green snake, grass snake -56 king snake, kingsnake -57 garter snake, grass snake -58 water snake -59 vine snake -60 night snake, Hypsiglena torquata -61 boa constrictor, Constrictor constrictor -62 rock python, rock snake, Python sebae -63 Indian cobra, Naja naja -64 green mamba -65 sea snake -66 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -67 diamondback, diamondback rattlesnake, Crotalus adamanteus -68 sidewinder, horned rattlesnake, Crotalus cerastes -69 trilobite -70 harvestman, daddy longlegs, Phalangium opilio -71 scorpion -72 black and gold garden spider, Argiope aurantia -73 barn spider, Araneus cavaticus -74 garden spider, Aranea diademata -75 black widow, Latrodectus mactans -76 tarantula -77 wolf spider, hunting spider -78 tick -79 centipede -80 black grouse -81 ptarmigan -82 ruffed grouse, partridge, Bonasa umbellus -83 prairie chicken, prairie grouse, prairie fowl -84 peacock -85 quail -86 partridge -87 African grey, African gray, Psittacus erithacus -88 macaw -89 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -90 lorikeet -91 coucal -92 bee eater -93 hornbill -94 hummingbird -95 jacamar -96 toucan -97 drake -98 red-breasted merganser, Mergus serrator -99 goose -100 black swan, Cygnus atratus -101 tusker -102 echidna, spiny anteater, anteater -103 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -104 wallaby, brush kangaroo -105 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -106 wombat -107 jellyfish -108 sea anemone, anemone -109 brain coral -110 flatworm, platyhelminth -111 nematode, nematode worm, roundworm -112 conch -113 snail -114 slug -115 sea slug, nudibranch -116 chiton, coat-of-mail shell, sea cradle, polyplacophore -117 chambered nautilus, pearly nautilus, nautilus -118 Dungeness crab, Cancer magister -119 rock crab, Cancer irroratus -120 fiddler crab -121 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -122 American lobster, Northern lobster, Maine lobster, Homarus americanus -123 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -124 crayfish, crawfish, crawdad, crawdaddy -125 hermit crab -126 isopod -127 white stork, Ciconia ciconia -128 black stork, Ciconia nigra -129 spoonbill -130 flamingo -131 little blue heron, Egretta caerulea -132 American egret, great white heron, Egretta albus -133 bittern -134 crane -135 limpkin, Aramus pictus -136 European gallinule, Porphyrio porphyrio -137 American coot, marsh hen, mud hen, water hen, Fulica americana -138 bustard -139 ruddy turnstone, Arenaria interpres -140 red-backed sandpiper, dunlin, Erolia alpina -141 redshank, Tringa totanus -142 dowitcher -143 oystercatcher, oyster catcher -144 pelican -145 king penguin, Aptenodytes patagonica -146 albatross, mollymawk -147 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -148 killer whale, killer, orca, grampus, sea wolf, Orcinus orca -149 dugong, Dugong dugon -150 sea lion -151 Chihuahua -152 Japanese spaniel -153 Maltese dog, Maltese terrier, Maltese -154 Pekinese, Pekingese, Peke -155 Shih-Tzu -156 Blenheim spaniel -157 papillon -158 toy terrier -159 Rhodesian ridgeback -160 Afghan hound, Afghan -161 basset, basset hound -162 beagle -163 bloodhound, sleuthhound -164 bluetick -165 black-and-tan coonhound -166 Walker hound, Walker foxhound -167 English foxhound -168 redbone -169 borzoi, Russian wolfhound -170 Irish wolfhound -171 Italian greyhound -172 whippet -173 Ibizan hound, Ibizan Podenco -174 Norwegian elkhound, elkhound -175 otterhound, otter hound -176 Saluki, gazelle hound -177 Scottish deerhound, deerhound -178 Weimaraner -179 Staffordshire bullterrier, Staffordshire bull terrier -180 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -181 Bedlington terrier -182 Border terrier -183 Kerry blue terrier -184 Irish terrier -185 Norfolk terrier -186 Norwich terrier -187 Yorkshire terrier -188 wire-haired fox terrier -189 Lakeland terrier -190 Sealyham terrier, Sealyham -191 Airedale, Airedale terrier -192 cairn, cairn terrier -193 Australian terrier -194 Dandie Dinmont, Dandie Dinmont terrier -195 Boston bull, Boston terrier -196 miniature schnauzer -197 giant schnauzer -198 standard schnauzer -199 Scotch terrier, Scottish terrier, Scottie -200 Tibetan terrier, chrysanthemum dog -201 silky terrier, Sydney silky -202 soft-coated wheaten terrier -203 West Highland white terrier -204 Lhasa, Lhasa apso -205 flat-coated retriever -206 curly-coated retriever -207 golden retriever -208 Labrador retriever -209 Chesapeake Bay retriever -210 German short-haired pointer -211 vizsla, Hungarian pointer -212 English setter -213 Irish setter, red setter -214 Gordon setter -215 Brittany spaniel -216 clumber, clumber spaniel -217 English springer, English springer spaniel -218 Welsh springer spaniel -219 cocker spaniel, English cocker spaniel, cocker -220 Sussex spaniel -221 Irish water spaniel -222 kuvasz -223 schipperke -224 groenendael -225 malinois -226 briard -227 kelpie -228 komondor -229 Old English sheepdog, bobtail -230 Shetland sheepdog, Shetland sheep dog, Shetland -231 collie -232 Border collie -233 Bouvier des Flandres, Bouviers des Flandres -234 Rottweiler -235 German shepherd, German shepherd dog, German police dog, alsatian -236 Doberman, Doberman pinscher -237 miniature pinscher -238 Greater Swiss Mountain dog -239 Bernese mountain dog -240 Appenzeller -241 EntleBucher -242 boxer -243 bull mastiff -244 Tibetan mastiff -245 French bulldog -246 Great Dane -247 Saint Bernard, St Bernard -248 Eskimo dog, husky -249 malamute, malemute, Alaskan malamute -250 Siberian husky -251 dalmatian, coach dog, carriage dog -252 affenpinscher, monkey pinscher, monkey dog -253 basenji -254 pug, pug-dog -255 Leonberg -256 Newfoundland, Newfoundland dog -257 Great Pyrenees -258 Samoyed, Samoyede -259 Pomeranian -260 chow, chow chow -261 keeshond -262 Brabancon griffon -263 Pembroke, Pembroke Welsh corgi -264 Cardigan, Cardigan Welsh corgi -265 toy poodle -266 miniature poodle -267 standard poodle -268 Mexican hairless -269 timber wolf, grey wolf, gray wolf, Canis lupus -270 white wolf, Arctic wolf, Canis lupus tundrarum -271 red wolf, maned wolf, Canis rufus, Canis niger -272 coyote, prairie wolf, brush wolf, Canis latrans -273 dingo, warrigal, warragal, Canis dingo -274 dhole, Cuon alpinus -275 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -276 hyena, hyaena -277 red fox, Vulpes vulpes -278 kit fox, Vulpes macrotis -279 Arctic fox, white fox, Alopex lagopus -280 grey fox, gray fox, Urocyon cinereoargenteus -281 tabby, tabby cat -282 tiger cat -283 Persian cat -284 Siamese cat, Siamese -285 Egyptian cat -286 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -287 lynx, catamount -288 leopard, Panthera pardus -289 snow leopard, ounce, Panthera uncia -290 jaguar, panther, Panthera onca, Felis onca -291 lion, king of beasts, Panthera leo -292 tiger, Panthera tigris -293 cheetah, chetah, Acinonyx jubatus -294 brown bear, bruin, Ursus arctos -295 American black bear, black bear, Ursus americanus, Euarctos americanus -296 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -297 sloth bear, Melursus ursinus, Ursus ursinus -298 mongoose -299 meerkat, mierkat -300 tiger beetle -301 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -302 ground beetle, carabid beetle -303 long-horned beetle, longicorn, longicorn beetle -304 leaf beetle, chrysomelid -305 dung beetle -306 rhinoceros beetle -307 weevil -308 fly -309 bee -310 ant, emmet, pismire -311 grasshopper, hopper -312 cricket -313 walking stick, walkingstick, stick insect -314 cockroach, roach -315 mantis, mantid -316 cicada, cicala -317 leafhopper -318 lacewing, lacewing fly -319 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -320 damselfly -321 admiral -322 ringlet, ringlet butterfly -323 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -324 cabbage butterfly -325 sulphur butterfly, sulfur butterfly -326 lycaenid, lycaenid butterfly -327 starfish, sea star -328 sea urchin -329 sea cucumber, holothurian -330 wood rabbit, cottontail, cottontail rabbit -331 hare -332 Angora, Angora rabbit -333 hamster -334 porcupine, hedgehog -335 fox squirrel, eastern fox squirrel, Sciurus niger -336 marmot -337 beaver -338 guinea pig, Cavia cobaya -339 sorrel -340 zebra -341 hog, pig, grunter, squealer, Sus scrofa -342 wild boar, boar, Sus scrofa -343 warthog -344 hippopotamus, hippo, river horse, Hippopotamus amphibius -345 ox -346 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -347 bison -348 ram, tup -349 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -350 ibex, Capra ibex -351 hartebeest -352 impala, Aepyceros melampus -353 gazelle -354 Arabian camel, dromedary, Camelus dromedarius -355 llama -356 weasel -357 mink -358 polecat, fitch, foulmart, foumart, Mustela putorius -359 black-footed ferret, ferret, Mustela nigripes -360 otter -361 skunk, polecat, wood pussy -362 badger -363 armadillo -364 three-toed sloth, ai, Bradypus tridactylus -365 orangutan, orang, orangutang, Pongo pygmaeus -366 gorilla, Gorilla gorilla -367 chimpanzee, chimp, Pan troglodytes -368 gibbon, Hylobates lar -369 siamang, Hylobates syndactylus, Symphalangus syndactylus -370 guenon, guenon monkey -371 patas, hussar monkey, Erythrocebus patas -372 baboon -373 macaque -374 langur -375 colobus, colobus monkey -376 proboscis monkey, Nasalis larvatus -377 marmoset -378 capuchin, ringtail, Cebus capucinus -379 howler monkey, howler -380 titi, titi monkey -381 spider monkey, Ateles geoffroyi -382 squirrel monkey, Saimiri sciureus -383 Madagascar cat, ring-tailed lemur, Lemur catta -384 indri, indris, Indri indri, Indri brevicaudatus -385 Indian elephant, Elephas maximus -386 African elephant, Loxodonta africana -387 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -388 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -389 barracouta, snoek -390 eel -391 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -392 rock beauty, Holocanthus tricolor -393 anemone fish -394 sturgeon -395 gar, garfish, garpike, billfish, Lepisosteus osseus -396 lionfish -397 puffer, pufferfish, blowfish, globefish -398 abacus -399 abaya -400 academic gown, academic robe, judge's robe -401 accordion, piano accordion, squeeze box -402 acoustic guitar -403 aircraft carrier, carrier, flattop, attack aircraft carrier -404 airliner -405 airship, dirigible -406 altar -407 ambulance -408 amphibian, amphibious vehicle -409 analog clock -410 apiary, bee house -411 apron -412 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -413 assault rifle, assault gun -414 backpack, back pack, knapsack, packsack, rucksack, haversack -415 bakery, bakeshop, bakehouse -416 balance beam, beam -417 balloon -418 ballpoint, ballpoint pen, ballpen, Biro -419 Band Aid -420 banjo -421 bannister, banister, balustrade, balusters, handrail -422 barbell -423 barber chair -424 barbershop -425 barn -426 barometer -427 barrel, cask -428 barrow, garden cart, lawn cart, wheelbarrow -429 baseball -430 basketball -431 bassinet -432 bassoon -433 bathing cap, swimming cap -434 bath towel -435 bathtub, bathing tub, bath, tub -436 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -437 beacon, lighthouse, beacon light, pharos -438 beaker -439 bearskin, busby, shako -440 beer bottle -441 beer glass -442 bell cote, bell cot -443 bib -444 bicycle-built-for-two, tandem bicycle, tandem -445 bikini, two-piece -446 binder, ring-binder -447 binoculars, field glasses, opera glasses -448 birdhouse -449 boathouse -450 bobsled, bobsleigh, bob -451 bolo tie, bolo, bola tie, bola -452 bonnet, poke bonnet -453 bookcase -454 bookshop, bookstore, bookstall -455 bottlecap -456 bow -457 bow tie, bow-tie, bowtie -458 brass, memorial tablet, plaque -459 brassiere, bra, bandeau -460 breakwater, groin, groyne, mole, bulwark, seawall, jetty -461 breastplate, aegis, egis -462 broom -463 bucket, pail -464 buckle -465 bulletproof vest -466 bullet train, bullet -467 butcher shop, meat market -468 cab, hack, taxi, taxicab -469 caldron, cauldron -470 candle, taper, wax light -471 cannon -472 canoe -473 can opener, tin opener -474 cardigan -475 car mirror -476 carousel, carrousel, merry-go-round, roundabout, whirligig -477 carpenter's kit, tool kit -478 carton -479 car wheel -480 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -481 cassette -482 cassette player -483 castle -484 catamaran -485 CD player -486 cello, violoncello -487 cellular telephone, cellular phone, cellphone, cell, mobile phone -488 chain -489 chainlink fence -490 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -491 chain saw, chainsaw -492 chest -493 chiffonier, commode -494 chime, bell, gong -495 china cabinet, china closet -496 Christmas stocking -497 church, church building -498 cinema, movie theater, movie theatre, movie house, picture palace -499 cleaver, meat cleaver, chopper -500 cliff dwelling -501 cloak -502 clog, geta, patten, sabot -503 cocktail shaker -504 coffee mug -505 coffeepot -506 coil, spiral, volute, whorl, helix -507 combination lock -508 computer keyboard, keypad -509 confectionery, confectionary, candy store -510 container ship, containership, container vessel -511 convertible -512 corkscrew, bottle screw -513 cornet, horn, trumpet, trump -514 cowboy boot -515 cowboy hat, ten-gallon hat -516 cradle -517 crane -518 crash helmet -519 crate -520 crib, cot -521 Crock Pot -522 croquet ball -523 crutch -524 cuirass -525 dam, dike, dyke -526 desk -527 desktop computer -528 dial telephone, dial phone -529 diaper, nappy, napkin -530 digital clock -531 digital watch -532 dining table, board -533 dishrag, dishcloth -534 dishwasher, dish washer, dishwashing machine -535 disk brake, disc brake -536 dock, dockage, docking facility -537 dogsled, dog sled, dog sleigh -538 dome -539 doormat, welcome mat -540 drilling platform, offshore rig -541 drum, membranophone, tympan -542 drumstick -543 dumbbell -544 Dutch oven -545 electric fan, blower -546 electric guitar -547 electric locomotive -548 entertainment center -549 envelope -550 espresso maker -551 face powder -552 feather boa, boa -553 file, file cabinet, filing cabinet -554 fireboat -555 fire engine, fire truck -556 fire screen, fireguard -557 flagpole, flagstaff -558 flute, transverse flute -559 folding chair -560 football helmet -561 forklift -562 fountain -563 fountain pen -564 four-poster -565 freight car -566 French horn, horn -567 frying pan, frypan, skillet -568 fur coat -569 garbage truck, dustcart -570 gasmask, respirator, gas helmet -571 gas pump, gasoline pump, petrol pump, island dispenser -572 goblet -573 go-kart -574 golf ball -575 golfcart, golf cart -576 gondola -577 gong, tam-tam -578 gown -579 grand piano, grand -580 greenhouse, nursery, glasshouse -581 grille, radiator grille -582 grocery store, grocery, food market, market -583 guillotine -584 hair slide -585 hair spray -586 half track -587 hammer -588 hamper -589 hand blower, blow dryer, blow drier, hair dryer, hair drier -590 hand-held computer, hand-held microcomputer -591 handkerchief, hankie, hanky, hankey -592 hard disc, hard disk, fixed disk -593 harmonica, mouth organ, harp, mouth harp -594 harp -595 harvester, reaper -596 hatchet -597 holster -598 home theater, home theatre -599 honeycomb -600 hook, claw -601 hoopskirt, crinoline -602 horizontal bar, high bar -603 horse cart, horse-cart -604 hourglass -605 iPod -606 iron, smoothing iron -607 jack-o'-lantern -608 jean, blue jean, denim -609 jeep, landrover -610 jersey, T-shirt, tee shirt -611 jigsaw puzzle -612 jinrikisha, ricksha, rickshaw -613 joystick -614 kimono -615 knee pad -616 knot -617 lab coat, laboratory coat -618 ladle -619 lampshade, lamp shade -620 laptop, laptop computer -621 lawn mower, mower -622 lens cap, lens cover -623 letter opener, paper knife, paperknife -624 library -625 lifeboat -626 lighter, light, igniter, ignitor -627 limousine, limo -628 liner, ocean liner -629 lipstick, lip rouge -630 Loafer -631 lotion -632 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -633 loupe, jeweler's loupe -634 lumbermill, sawmill -635 magnetic compass -636 mailbag, postbag -637 mailbox, letter box -638 maillot -639 maillot, tank suit -640 manhole cover -641 maraca -642 marimba, xylophone -643 mask -644 matchstick -645 maypole -646 maze, labyrinth -647 measuring cup -648 medicine chest, medicine cabinet -649 megalith, megalithic structure -650 microphone, mike -651 microwave, microwave oven -652 military uniform -653 milk can -654 minibus -655 miniskirt, mini -656 minivan -657 missile -658 mitten -659 mixing bowl -660 mobile home, manufactured home -661 Model T -662 modem -663 monastery -664 monitor -665 moped -666 mortar -667 mortarboard -668 mosque -669 mosquito net -670 motor scooter, scooter -671 mountain bike, all-terrain bike, off-roader -672 mountain tent -673 mouse, computer mouse -674 mousetrap -675 moving van -676 muzzle -677 nail -678 neck brace -679 necklace -680 nipple -681 notebook, notebook computer -682 obelisk -683 oboe, hautboy, hautbois -684 ocarina, sweet potato -685 odometer, hodometer, mileometer, milometer -686 oil filter -687 organ, pipe organ -688 oscilloscope, scope, cathode-ray oscilloscope, CRO -689 overskirt -690 oxcart -691 oxygen mask -692 packet -693 paddle, boat paddle -694 paddlewheel, paddle wheel -695 padlock -696 paintbrush -697 pajama, pyjama, pj's, jammies -698 palace -699 panpipe, pandean pipe, syrinx -700 paper towel -701 parachute, chute -702 parallel bars, bars -703 park bench -704 parking meter -705 passenger car, coach, carriage -706 patio, terrace -707 pay-phone, pay-station -708 pedestal, plinth, footstall -709 pencil box, pencil case -710 pencil sharpener -711 perfume, essence -712 Petri dish -713 photocopier -714 pick, plectrum, plectron -715 pickelhaube -716 picket fence, paling -717 pickup, pickup truck -718 pier -719 piggy bank, penny bank -720 pill bottle -721 pillow -722 ping-pong ball -723 pinwheel -724 pirate, pirate ship -725 pitcher, ewer -726 plane, carpenter's plane, woodworking plane -727 planetarium -728 plastic bag -729 plate rack -730 plow, plough -731 plunger, plumber's helper -732 Polaroid camera, Polaroid Land camera -733 pole -734 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -735 poncho -736 pool table, billiard table, snooker table -737 pop bottle, soda bottle -738 pot, flowerpot -739 potter's wheel -740 power drill -741 prayer rug, prayer mat -742 printer -743 prison, prison house -744 projectile, missile -745 projector -746 puck, hockey puck -747 punching bag, punch bag, punching ball, punchball -748 purse -749 quill, quill pen -750 quilt, comforter, comfort, puff -751 racer, race car, racing car -752 racket, racquet -753 radiator -754 radio, wireless -755 radio telescope, radio reflector -756 rain barrel -757 recreational vehicle, RV, R.V. -758 reel -759 reflex camera -760 refrigerator, icebox -761 remote control, remote -762 restaurant, eating house, eating place, eatery -763 revolver, six-gun, six-shooter -764 rifle -765 rocking chair, rocker -766 rotisserie -767 rubber eraser, rubber, pencil eraser -768 rugby ball -769 rule, ruler -770 running shoe -771 safe -772 safety pin -773 saltshaker, salt shaker -774 sandal -775 sarong -776 sax, saxophone -777 scabbard -778 scale, weighing machine -779 school bus -780 schooner -781 scoreboard -782 screen, CRT screen -783 screw -784 screwdriver -785 seat belt, seatbelt -786 sewing machine -787 shield, buckler -788 shoe shop, shoe-shop, shoe store -789 shoji -790 shopping basket -791 shopping cart -792 shovel -793 shower cap -794 shower curtain -795 ski -796 ski mask -797 sleeping bag -798 slide rule, slipstick -799 sliding door -800 slot, one-armed bandit -801 snorkel -802 snowmobile -803 snowplow, snowplough -804 soap dispenser -805 soccer ball -806 sock -807 solar dish, solar collector, solar furnace -808 sombrero -809 soup bowl -810 space bar -811 space heater -812 space shuttle -813 spatula -814 speedboat -815 spider web, spider's web -816 spindle -817 sports car, sport car -818 spotlight, spot -819 stage -820 steam locomotive -821 steel arch bridge -822 steel drum -823 stethoscope -824 stole -825 stone wall -826 stopwatch, stop watch -827 stove -828 strainer -829 streetcar, tram, tramcar, trolley, trolley car -830 stretcher -831 studio couch, day bed -832 stupa, tope -833 submarine, pigboat, sub, U-boat -834 suit, suit of clothes -835 sundial -836 sunglass -837 sunglasses, dark glasses, shades -838 sunscreen, sunblock, sun blocker -839 suspension bridge -840 swab, swob, mop -841 sweatshirt -842 swimming trunks, bathing trunks -843 swing -844 switch, electric switch, electrical switch -845 syringe -846 table lamp -847 tank, army tank, armored combat vehicle, armoured combat vehicle -848 tape player -849 teapot -850 teddy, teddy bear -851 television, television system -852 tennis ball -853 thatch, thatched roof -854 theater curtain, theatre curtain -855 thimble -856 thresher, thrasher, threshing machine -857 throne -858 tile roof -859 toaster -860 tobacco shop, tobacconist shop, tobacconist -861 toilet seat -862 torch -863 totem pole -864 tow truck, tow car, wrecker -865 toyshop -866 tractor -867 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -868 tray -869 trench coat -870 tricycle, trike, velocipede -871 trimaran -872 tripod -873 triumphal arch -874 trolleybus, trolley coach, trackless trolley -875 trombone -876 tub, vat -877 turnstile -878 typewriter keyboard -879 umbrella -880 unicycle, monocycle -881 upright, upright piano -882 vacuum, vacuum cleaner -883 vase -884 vault -885 velvet -886 vending machine -887 vestment -888 viaduct -889 violin, fiddle -890 volleyball -891 waffle iron -892 wall clock -893 wallet, billfold, notecase, pocketbook -894 wardrobe, closet, press -895 warplane, military plane -896 washbasin, handbasin, washbowl, lavabo, wash-hand basin -897 washer, automatic washer, washing machine -898 water bottle -899 water jug -900 water tower -901 whiskey jug -902 whistle -903 wig -904 window screen -905 window shade -906 Windsor tie -907 wine bottle -908 wing -909 wok -910 wooden spoon -911 wool, woolen, woollen -912 worm fence, snake fence, snake-rail fence, Virginia fence -913 wreck -914 yawl -915 yurt -916 web site, website, internet site, site -917 comic book -918 crossword puzzle, crossword -919 street sign -920 traffic light, traffic signal, stoplight -921 book jacket, dust cover, dust jacket, dust wrapper -922 menu -923 plate -924 guacamole -925 consomme -926 hot pot, hotpot -927 trifle -928 ice cream, icecream -929 ice lolly, lolly, lollipop, popsicle -930 French loaf -931 bagel, beigel -932 pretzel -933 cheeseburger -934 hotdog, hot dog, red hot -935 mashed potato -936 head cabbage -937 broccoli -938 cauliflower -939 zucchini, courgette -940 spaghetti squash -941 acorn squash -942 butternut squash -943 cucumber, cuke -944 artichoke, globe artichoke -945 bell pepper -946 cardoon -947 mushroom -948 Granny Smith -949 strawberry -950 orange -951 lemon -952 fig -953 pineapple, ananas -954 banana -955 jackfruit, jak, jack -956 custard apple -957 pomegranate -958 hay -959 carbonara -960 chocolate sauce, chocolate syrup -961 dough -962 meat loaf, meatloaf -963 pizza, pizza pie -964 potpie -965 burrito -966 red wine -967 espresso -968 cup -969 eggnog -970 alp -971 bubble -972 cliff, drop, drop-off -973 coral reef -974 geyser -975 lakeside, lakeshore -976 promontory, headland, head, foreland -977 sandbar, sand bar -978 seashore, coast, seacoast, sea-coast -979 valley, vale -980 volcano -981 ballplayer, baseball player -982 groom, bridegroom -983 scuba diver -984 rapeseed -985 daisy -986 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -987 corn -988 acorn -989 hip, rose hip, rosehip -990 buckeye, horse chestnut, conker -991 coral fungus -992 agaric -993 gyromitra -994 stinkhorn, carrion fungus -995 earthstar -996 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -997 bolete -998 ear, spike, capitulum -999 toilet tissue, toilet paper, bathroom tissue diff --git a/example/auto_compression/image_classification/infer.py b/example/auto_compression/image_classification/infer.py index 88e4b82de..46eb7115a 100644 --- a/example/auto_compression/image_classification/infer.py +++ b/example/auto_compression/image_classification/infer.py @@ -13,141 +13,85 @@ # limitations under the License. import os -import sys -import cv2 import numpy as np -import platform +import cv2 +import time +import sys import argparse -import base64 -import shutil +import yaml +from utils import preprocess, postprocess import paddle -from postprocess import build_postprocess -from preprocess import create_operators -from paddleslim.auto_compression.config_helpers import load_config +from paddle.inference import create_predictor +from paddleslim.common import load_config def argsparser(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '-c', - '--config', + '--config_path', type=str, - default='configs/config.yaml', + default='./image_classification/configs/infer.yaml', help='config file path') + parser.add_argument( + '--model_dir', + type=str, + default='./MobileNetV1_infer', + help='model directory') + parser.add_argument( + '--use_fp16', type=bool, default=False, help='Whether to use fp16') + parser.add_argument( + '--use_int8', type=bool, default=False, help='Whether to use int8') return parser -def print_arguments(args): - print('----------- Running Arguments -----------') - for arg, value in args.items(): - print('%s: %s' % (arg, value)) - print('------------------------------------------') - - -def get_image_list(img_file): - imgs_lists = [] - if img_file is None or not os.path.exists(img_file): - raise Exception("not found any img file in {}".format(img_file)) - - img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] - if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: - imgs_lists.append(img_file) - elif os.path.isdir(img_file): - for single_file in os.listdir(img_file): - if single_file.split('.')[-1] in img_end: - imgs_lists.append(os.path.join(img_file, single_file)) - if len(imgs_lists) == 0: - raise Exception("not found any img file in {}".format(img_file)) - imgs_lists = sorted(imgs_lists) - return imgs_lists - - class Predictor(object): def __init__(self, config): - predict_args = config['Global'] - # HALF precission predict only work when using tensorrt - if predict_args['use_fp16'] is True: - assert predict_args.use_tensorrt is True - self.args = predict_args - if self.args.get("use_onnx", False): - self.predictor, self.config = self.create_onnx_predictor( - predict_args) - else: - self.predictor, self.config = self.create_paddle_predictor( - predict_args) - - self.preprocess_ops = [] - self.postprocess = None - if "PreProcess" in config: - if "transform_ops" in config["PreProcess"]: - self.preprocess_ops = create_operators(config["PreProcess"][ - "transform_ops"]) - if "PostProcess" in config: - self.postprocess = build_postprocess(config["PostProcess"]) - - # for whole_chain project to test each repo of paddle - self.benchmark = config["Global"].get("benchmark", False) - if self.benchmark: - import auto_log - import os - pid = os.getpid() - size = config["PreProcess"]["transform_ops"][1]["CropImage"]["size"] - if config["Global"].get("use_int8", False): - precision = "int8" - elif config["Global"].get("use_fp16", False): - precision = "fp16" - else: - precision = "fp32" - self.auto_logger = auto_log.AutoLogger( - model_name=config["Global"].get("model_name", "cls"), - model_precision=precision, - batch_size=config["Global"].get("batch_size", 1), - data_shape=[3, size, size], - save_path=config["Global"].get("save_log_path", - "./auto_log.log"), - inference_config=self.config, - pids=pid, - process_name=None, - gpu_ids=None, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], - warmup=2) - - def create_paddle_predictor(self, args): - inference_model_dir = args['inference_model_dir'] - - params_file = os.path.join(inference_model_dir, args['params_filename']) - model_file = os.path.join(inference_model_dir, args['model_filename']) + # HALF precission predict only work when using tensorrt + if config['use_fp16'] is True: + assert config['use_tensorrt'] is True + self.config = config + + self.paddle_predictor = self.create_paddle_predictor() + input_names = self.paddle_predictor.get_input_names() + self.input_tensor = self.paddle_predictor.get_input_handle(input_names[ + 0]) + + output_names = self.paddle_predictor.get_output_names() + self.output_tensor = self.paddle_predictor.get_output_handle( + output_names[0]) + + def create_paddle_predictor(self): + inference_model_dir = self.config['model_dir'] + model_file = os.path.join(inference_model_dir, + self.config['model_filename']) + params_file = os.path.join(inference_model_dir, + self.config['params_filename']) config = paddle.inference.Config(model_file, params_file) - - if args['use_gpu']: - config.enable_use_gpu(args['gpu_mem'], 0) + precision = paddle.inference.Config.Precision.Float32 + if self.config['use_int8']: + precision = paddle.inference.Config.Precision.Int8 + elif self.config['use_fp16']: + precision = paddle.inference.Config.Precision.Half + + if self.config['use_gpu']: + config.enable_use_gpu(self.config['gpu_mem'], 0) else: config.disable_gpu() - if args['enable_mkldnn']: - # there is no set_mkldnn_cache_capatity() on macOS - if platform.system() != "Darwin": - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) + if self.config['enable_mkldnn']: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() - config.set_cpu_math_library_num_threads(args['cpu_num_threads']) + config.set_cpu_math_library_num_threads(self.config['cpu_num_threads']) - if args['enable_profile']: + if self.config['enable_profile']: config.enable_profile() config.disable_glog_info() - config.switch_ir_optim(args['ir_optim']) # default true - if args['use_tensorrt']: - precision = paddle.inference.Config.Precision.Float32 - if args.get("use_int8", False): - precision = paddle.inference.Config.Precision.Int8 - elif args.get("use_fp16", False): - precision = paddle.inference.Config.Precision.Half - + config.switch_ir_optim(self.config['ir_optim']) # default true + if self.config['use_tensorrt']: config.enable_tensorrt_engine( precision_mode=precision, - max_batch_size=args['batch_size'], + max_batch_size=self.config['batch_size'], workspace_size=1 << 30, min_subgraph_size=30, use_calib_mode=False) @@ -155,112 +99,43 @@ def create_paddle_predictor(self, args): config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) - predictor = paddle.inference.create_predictor(config) - - return predictor, config - - def create_onnx_predictor(self, args): - import onnxruntime as ort - inference_model_dir = args['inference_model_dir'] - model_file = os.path.join(inference_model_dir, args['model_filename']) - config = ort.SessionOptions() - if args['use_gpu']: - raise ValueError( - "onnx inference now only supports cpu! please specify use_gpu false." - ) - else: - config.intra_op_num_threads = args['cpu_num_threads'] - if args['ir_optim']: - config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL - predictor = ort.InferenceSession(model_file, sess_options=config) - return predictor, config - - def predict(self, images): - use_onnx = self.args.get("use_onnx", False) - if not use_onnx: - input_names = self.predictor.get_input_names() - input_tensor = self.predictor.get_input_handle(input_names[0]) - - output_names = self.predictor.get_output_names() - output_tensor = self.predictor.get_output_handle(output_names[0]) - else: - input_names = self.predictor.get_inputs()[0].name - output_names = self.predictor.get_outputs()[0].name - - if self.benchmark: - self.auto_logger.times.start() - if not isinstance(images, (list, )): - images = [images] - for idx in range(len(images)): - for ops in self.preprocess_ops: - images[idx] = ops(images[idx]) - image = np.array(images) - if self.benchmark: - self.auto_logger.times.stamp() - - if not use_onnx: - input_tensor.copy_from_cpu(image) - self.predictor.run() - batch_output = output_tensor.copy_to_cpu() - else: - batch_output = self.predictor.run( - output_names=[output_names], input_feed={input_names: image})[0] - - if self.benchmark: - self.auto_logger.times.stamp() - if self.postprocess is not None: - batch_output = self.postprocess(batch_output) - if self.benchmark: - self.auto_logger.times.end(stamp=True) - return batch_output - - -def main(config): - predictor = Predictor(config) - image_list = get_image_list(config["Global"]["infer_imgs"]) - image_list = image_list * 1000 - batch_imgs = [] - batch_names = [] - cnt = 0 - for idx, img_path in enumerate(image_list): - img = cv2.imread(img_path) - if img is None: - logger.warning( - "Image file failed to read and has been skipped. The path: {}". - format(img_path)) - else: - img = img[:, :, ::-1] - batch_imgs.append(img) - img_name = os.path.basename(img_path) - batch_names.append(img_name) - cnt += 1 - - if cnt % config["Global"]["batch_size"] == 0 or (idx + 1 - ) == len(image_list): - if len(batch_imgs) == 0: - continue - batch_results = predictor.predict(batch_imgs) - for number, result_dict in enumerate(batch_results): - if "PersonAttribute" in config[ - "PostProcess"] or "VehicleAttribute" in config[ - "PostProcess"]: - filename = batch_names[number] - else: - filename = batch_names[number] - clas_ids = result_dict["class_ids"] - scores_str = "[{}]".format(", ".join("{:.2f}".format( - r) for r in result_dict["scores"])) - label_names = result_dict["label_names"] - batch_imgs = [] - batch_names = [] - if predictor.benchmark: - predictor.auto_logger.report() - return + predictor = create_predictor(config) + + return predictor + + def predict(self): + test_num = 1000 + test_time = 0.0 + for i in range(0, test_num + 10): + inputs = np.random.rand(config['batch_size'], 3, + config['image_size'], + config['image_size']).astype(np.float32) + start_time = time.time() + self.input_tensor.copy_from_cpu(inputs) + self.paddle_predictor.run() + batch_output = self.output_tensor.copy_to_cpu().flatten() + if i >= 10: + test_time += time.time() - start_time + time.sleep(0.01) # sleep for T4 GPU + + fp_message = "FP16" if config['use_fp16'] else "FP32" + fp_message = "INT8" if config['use_int8'] else fp_message + trt_msg = "using tensorrt" if config[ + 'use_tensorrt'] else "not using tensorrt" + print("{0}\t{1}\tbatch size: {2}\ttime(ms): {3}".format( + trt_msg, fp_message, config[ + 'batch_size'], 1000 * test_time / test_num)) if __name__ == "__main__": parser = argsparser() args = parser.parse_args() - config = load_config(args.config) - print_arguments(config['Global']) - main(config) + config = load_config(args.config_path) + if args.model_dir != config['model_dir']: + config['model_dir'] = args.model_dir + if args.use_fp16 != config['use_fp16']: + config['use_fp16'] = args.use_fp16 + if args.use_int8 != config['use_int8']: + config['use_int8'] = args.use_int8 + predictor = Predictor(config) + predictor.predict() diff --git a/example/auto_compression/image_classification/postprocess.py b/example/auto_compression/image_classification/postprocess.py deleted file mode 100644 index 9b9b4afcb..000000000 --- a/example/auto_compression/image_classification/postprocess.py +++ /dev/null @@ -1,131 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import copy -import shutil -from functools import partial -import importlib -import numpy as np -import paddle -import paddle.nn.functional as F - - -def build_postprocess(config): - if config is None: - return None - - mod = importlib.import_module(__name__) - config = copy.deepcopy(config) - - main_indicator = config.pop( - "main_indicator") if "main_indicator" in config else None - main_indicator = main_indicator if main_indicator else "" - - func_list = [] - for func in config: - func_list.append(getattr(mod, func)(**config[func])) - return PostProcesser(func_list, main_indicator) - - -class PostProcesser(object): - def __init__(self, func_list, main_indicator="Topk"): - self.func_list = func_list - self.main_indicator = main_indicator - - def __call__(self, x, image_file=None): - rtn = None - for func in self.func_list: - tmp = func(x, image_file) - if type(func).__name__ in self.main_indicator: - rtn = tmp - return rtn - - -class Topk(object): - def __init__(self, topk=1, class_id_map_file=None): - assert isinstance(topk, (int, )) - self.class_id_map = self.parse_class_id_map(class_id_map_file) - self.topk = topk - - def parse_class_id_map(self, class_id_map_file): - if class_id_map_file is None: - return None - - if not os.path.exists(class_id_map_file): - print( - "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" - ) - return None - - try: - class_id_map = {} - with open(class_id_map_file, "r") as fin: - lines = fin.readlines() - for line in lines: - partition = line.split("\n")[0].partition(" ") - class_id_map[int(partition[0])] = str(partition[-1]) - except Exception as ex: - print(ex) - class_id_map = None - return class_id_map - - def __call__(self, x, file_names=None, multilabel=False): - if file_names is not None: - assert x.shape[0] == len(file_names) - y = [] - for idx, probs in enumerate(x): - index = probs.argsort(axis=0)[-self.topk:][::-1].astype( - "int32") if not multilabel else np.where( - probs >= 0.5)[0].astype("int32") - clas_id_list = [] - score_list = [] - label_name_list = [] - for i in index: - clas_id_list.append(i.item()) - score_list.append(probs[i].item()) - if self.class_id_map is not None: - label_name_list.append(self.class_id_map[i.item()]) - result = { - "class_ids": clas_id_list, - "scores": np.around( - score_list, decimals=5).tolist(), - } - if file_names is not None: - result["file_name"] = file_names[idx] - if label_name_list is not None: - result["label_names"] = label_name_list - y.append(result) - return y - - -class SavePreLabel(object): - def __init__(self, save_dir): - if save_dir is None: - raise Exception( - "Please specify save_dir if SavePreLabel specified.") - self.save_dir = partial(os.path.join, save_dir) - - def __call__(self, x, file_names=None): - if file_names is None: - return - assert x.shape[0] == len(file_names) - for idx, probs in enumerate(x): - index = probs.argsort(axis=0)[-1].astype("int32") - self.save(index, file_names[idx]) - - def save(self, id, image_file): - output_dir = self.save_dir(str(id)) - os.makedirs(output_dir, exist_ok=True) - shutil.copy(image_file, output_dir) diff --git a/example/auto_compression/image_classification/preprocess.py b/example/auto_compression/image_classification/preprocess.py deleted file mode 100644 index 95561698b..000000000 --- a/example/auto_compression/image_classification/preprocess.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from functools import partial -import six -import math -import random -import cv2 -import numpy as np -import importlib -from PIL import Image - - -def create_operators(params): - """ - create operators based on the config - - Args: - params(list): a dict list, used to create some operators - """ - assert isinstance(params, list), ('operator config should be a list') - mod = importlib.import_module(__name__) - ops = [] - for operator in params: - assert isinstance(operator, - dict) and len(operator) == 1, "yaml format error" - op_name = list(operator)[0] - param = {} if operator[op_name] is None else operator[op_name] - op = getattr(mod, op_name)(**param) - ops.append(op) - - return ops - - -class UnifiedResize(object): - def __init__(self, interpolation=None, backend="cv2"): - _cv2_interp_from_str = { - 'nearest': cv2.INTER_NEAREST, - 'bilinear': cv2.INTER_LINEAR, - 'area': cv2.INTER_AREA, - 'bicubic': cv2.INTER_CUBIC, - 'lanczos': cv2.INTER_LANCZOS4 - } - _pil_interp_from_str = { - 'nearest': Image.NEAREST, - 'bilinear': Image.BILINEAR, - 'bicubic': Image.BICUBIC, - 'box': Image.BOX, - 'lanczos': Image.LANCZOS, - 'hamming': Image.HAMMING - } - - def _pil_resize(src, size, resample): - pil_img = Image.fromarray(src) - pil_img = pil_img.resize(size, resample) - return np.asarray(pil_img) - - if backend.lower() == "cv2": - if isinstance(interpolation, str): - interpolation = _cv2_interp_from_str[interpolation.lower()] - # compatible with opencv < version 4.4.0 - elif interpolation is None: - interpolation = cv2.INTER_LINEAR - self.resize_func = partial(cv2.resize, interpolation=interpolation) - elif backend.lower() == "pil": - if isinstance(interpolation, str): - interpolation = _pil_interp_from_str[interpolation.lower()] - self.resize_func = partial(_pil_resize, resample=interpolation) - else: - logger.warning( - f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." - ) - self.resize_func = cv2.resize - - def __call__(self, src, size): - return self.resize_func(src, size) - - -class OperatorParamError(ValueError): - """ OperatorParamError - """ - pass - - -class ResizeImage(object): - """ resize image """ - - def __init__(self, - size=None, - resize_short=None, - interpolation=None, - backend="cv2"): - if resize_short is not None and resize_short > 0: - self.resize_short = resize_short - self.w = None - self.h = None - elif size is not None: - self.resize_short = None - self.w = size if type(size) is int else size[0] - self.h = size if type(size) is int else size[1] - else: - raise OperatorParamError("invalid params for ReisizeImage for '\ - 'both 'size' and 'resize_short' are None") - - self._resize_func = UnifiedResize( - interpolation=interpolation, backend=backend) - - def __call__(self, img): - img_h, img_w = img.shape[:2] - if self.resize_short is not None: - percent = float(self.resize_short) / min(img_w, img_h) - w = int(round(img_w * percent)) - h = int(round(img_h * percent)) - else: - w = self.w - h = self.h - return self._resize_func(img, (w, h)) - - -class CropImage(object): - """ crop image """ - - def __init__(self, size): - if type(size) is int: - self.size = (size, size) - else: - self.size = size # (h, w) - - def __call__(self, img): - w, h = self.size - img_h, img_w = img.shape[:2] - - if img_h < h or img_w < w: - raise Exception( - f"The size({h}, {w}) of CropImage must be greater than size({img_h}, {img_w}) of image. Please check image original size and size of ResizeImage if used." - ) - - w_start = (img_w - w) // 2 - h_start = (img_h - h) // 2 - - w_end = w_start + w - h_end = h_start + h - return img[h_start:h_end, w_start:w_end, :] - - -class NormalizeImage(object): - """ normalize image such as substract mean, divide std - """ - - def __init__(self, - scale=None, - mean=None, - std=None, - order='chw', - output_fp16=False, - channel_num=3): - if isinstance(scale, str): - scale = eval(scale) - assert channel_num in [ - 3, 4 - ], "channel number of input image should be set to 3 or 4." - self.channel_num = channel_num - self.output_dtype = 'float16' if output_fp16 else 'float32' - self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) - self.order = order - mean = mean if mean is not None else [0.485, 0.456, 0.406] - std = std if std is not None else [0.229, 0.224, 0.225] - - shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) - self.mean = np.array(mean).reshape(shape).astype('float32') - self.std = np.array(std).reshape(shape).astype('float32') - - def __call__(self, img): - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - - assert isinstance(img, - np.ndarray), "invalid input 'img' in NormalizeImage" - - img = (img.astype('float32') * self.scale - self.mean) / self.std - - if self.channel_num == 4: - img_h = img.shape[1] if self.order == 'chw' else img.shape[0] - img_w = img.shape[2] if self.order == 'chw' else img.shape[1] - pad_zeros = np.zeros( - (1, img_h, img_w)) if self.order == 'chw' else np.zeros( - (img_h, img_w, 1)) - img = (np.concatenate( - (img, pad_zeros), axis=0) - if self.order == 'chw' else np.concatenate( - (img, pad_zeros), axis=2)) - return img.astype(self.output_dtype) - - -class ToCHWImage(object): - """ convert hwc image to chw image - """ - - def __init__(self): - pass - - def __call__(self, img): - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - - return img.transpose((2, 0, 1)) diff --git a/example/auto_compression/image_classification/run.py b/example/auto_compression/image_classification/run.py index c3ecf1377..dee25a175 100644 --- a/example/auto_compression/image_classification/run.py +++ b/example/auto_compression/image_classification/run.py @@ -14,19 +14,17 @@ import os import sys -sys.path[0] = os.path.join( - os.path.dirname("__file__"), os.path.pardir, os.path.pardir) import argparse import functools from functools import partial +import math import numpy as np -import math import paddle import paddle.nn as nn -from paddle.io import Dataset, BatchSampler, DataLoader -import imagenet_reader as reader -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddle.io import DataLoader +from imagenet_reader import ImageNetDataset +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression @@ -48,41 +46,53 @@ def argsparser(): type=int, default=1281167, help="the number of total training images.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") return parser # yapf: enable def reader_wrapper(reader, input_name): def gen(): - for i, data in enumerate(reader()): - imgs = np.float32([item[0] for item in data]) + for i, (imgs, label) in enumerate(reader()): yield {input_name: imgs} return gen -def eval_reader(data_dir, batch_size): - val_reader = paddle.batch( - reader.val(data_dir=data_dir), batch_size=batch_size) - return val_reader +def eval_reader(data_dir, batch_size, crop_size, resize_size, place=None): + val_reader = ImageNetDataset( + mode='val', + data_dir=data_dir, + crop_size=crop_size, + resize_size=resize_size) + val_loader = DataLoader( + val_reader, + places=[place] if place is not None else None, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=False, + num_workers=0) + return val_loader def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): - val_reader = eval_reader(data_dir, batch_size=global_config['batch_size']) - image = paddle.static.data( - name=global_config['input_name'], - shape=[None, 3, 224, 224], - dtype='float32') - label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') + val_loader = eval_reader( + data_dir, + batch_size=global_config['batch_size'], + crop_size=img_size, + resize_size=resize_size) results = [] - print('Evaluating... It will take a while. Please wait...') - for batch_id, data in enumerate(val_reader()): + print('Evaluating...') + for batch_id, (image, label) in enumerate(val_loader): # top1_acc, top5_acc if len(test_feed_names) == 1: - image = np.array([[d[0]] for d in data]) - image = image.reshape((len(data), 3, 224, 224)) - label = [[d[1]] for d in data] + image = np.array(image) + label = np.array(label).astype('int64') pred = exe.run(compiled_test_program, feed={test_feed_names[0]: image}, fetch_list=test_fetch_list) @@ -100,9 +110,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): results.append([top_1, top_5]) else: # eval "eval model", which inputs are image and label, output is top1 and top5 accuracy - image = np.array([[d[0]] for d in data]) - image = image.reshape((len(data), 3, 224, 224)) - label = [[d[1]] for d in data] + image = np.array(image) + label = np.array(label).astype('int64') result = exe.run( compiled_test_program, feed={test_feed_names[0]: image, @@ -110,15 +119,26 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): fetch_list=test_fetch_list) result = [np.mean(r) for r in result] results.append(result) + if batch_id % 100 == 0: + print('Eval iter: ', batch_id) result = np.mean(np.array(results), axis=0) return result[0] def main(): + rank_id = paddle.distributed.get_rank() + if args.devices == 'gpu': + place = paddle.CUDAPlace(rank_id) + paddle.set_device('gpu') + else: + place = paddle.CPUPlace() + paddle.set_device('cpu') global global_config all_config = load_slim_config(args.config_path) + assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" global_config = all_config["Global"] + gpu_num = paddle.distributed.get_world_size() if isinstance(all_config['TrainConfig']['learning_rate'], dict) and all_config['TrainConfig']['learning_rate'][ @@ -129,12 +149,29 @@ def main(): gpu_num))) all_config['TrainConfig']['learning_rate']['T_max'] = step print('total training steps:', step) + global data_dir data_dir = global_config['data_dir'] - train_reader = paddle.batch( - reader.train(data_dir=data_dir), batch_size=global_config['batch_size']) - train_dataloader = reader_wrapper(train_reader, global_config['input_name']) + global img_size, resize_size + img_size = global_config['img_size'] if 'img_size' in global_config else 224 + resize_size = global_config[ + 'resize_size'] if 'resize_size' in global_config else 256 + + train_dataset = ImageNetDataset( + mode='train', + data_dir=data_dir, + crop_size=img_size, + resize_size=resize_size) + + train_loader = DataLoader( + train_dataset, + places=[place], + batch_size=global_config['batch_size'], + shuffle=True, + drop_last=True, + num_workers=0) + train_dataloader = reader_wrapper(train_loader, global_config['input_name']) ac = AutoCompression( model_dir=global_config['model_dir'], @@ -143,9 +180,14 @@ def main(): save_dir=args.save_dir, config=all_config, train_dataloader=train_dataloader, - eval_callback=eval_function, + eval_callback=eval_function if rank_id == 0 else None, eval_dataloader=reader_wrapper( - eval_reader(data_dir, global_config['batch_size']), + eval_reader( + data_dir, + global_config['batch_size'], + crop_size=img_size, + resize_size=resize_size, + place=place), global_config['input_name'])) ac.compress() diff --git a/example/auto_compression/image_classification/run.sh b/example/auto_compression/image_classification/run.sh deleted file mode 100644 index 4d8777d88..000000000 --- a/example/auto_compression/image_classification/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -# 单卡启动 -export CUDA_VISIBLE_DEVICES=0 -python3.7 eval.py --save_dir='./save_quant_mobilev1/' --config_path='./configs/MobileNetV1/qat_dis.yaml' - -# 多卡启动 -export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -m paddle.distributed.launch run.py --save_dir='./save_quant_mobilev1/' --config_path='./configs/MobileNetV1/qat_dis.yaml' - diff --git a/example/auto_compression/image_classification/utils.py b/example/auto_compression/image_classification/utils.py new file mode 100644 index 000000000..f7a94e67b --- /dev/null +++ b/example/auto_compression/image_classification/utils.py @@ -0,0 +1,107 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import base64 +import shutil +import cv2 +import numpy as np + + +def preprocess(img, args): + resize_op = ResizeImage(resize_short=args.resize_short) + img = resize_op(img) + crop_op = CropImage(size=(args.resize, args.resize)) + img = crop_op(img) + if args.normalize: + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + normalize_op = NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + img = normalize_op(img) + tensor_op = ToTensor() + img = tensor_op(img) + return img + + +def postprocess(batch_outputs, topk=5, multilabel=False): + batch_results = [] + for probs in batch_outputs: + results = [] + if multilabel: + index = np.where(probs >= 0.5)[0].astype('int32') + else: + index = probs.argsort(axis=0)[-topk:][::-1].astype("int32") + clas_id_list = [] + score_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + batch_results.append({"clas_ids": clas_id_list, "scores": score_list}) + return batch_results + + +class ResizeImage(object): + def __init__(self, resize_short=None): + self.resize_short = resize_short + + def __call__(self, img): + img_h, img_w = img.shape[:2] + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + return cv2.resize(img, (w, h)) + + +class CropImage(object): + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class NormalizeImage(object): + def __init__(self, scale=None, mean=None, std=None): + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + return (img.astype('float32') * self.scale - self.mean) / self.std + + +class ToTensor(object): + def __init__(self): + pass + + def __call__(self, img): + img = img.transpose((2, 0, 1)) + return img diff --git a/example/auto_compression/nlp/README.md b/example/auto_compression/nlp/README.md index 5fabc771b..347fcd800 100644 --- a/example/auto_compression/nlp/README.md +++ b/example/auto_compression/nlp/README.md @@ -48,7 +48,7 @@ #### 3.1 准备环境 - python >= 3.6 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 +- PaddleSlim >= 2.3 - PaddleNLP >= 2.3 安装paddlepaddle: @@ -61,8 +61,7 @@ pip install paddlepaddle-gpu 安装paddleslim: ```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim ``` 安装paddlenlp: @@ -110,10 +109,10 @@ export CUDA_VISIBLE_DEVICES=0 python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --save_dir='./save_afqmc_pruned/' ``` -如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./save_afqmc_pruned``` ,命令加上```--eval True```即可: ```shell export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/cola.yaml --eval True +python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --eval True ``` ## 4. 压缩配置介绍 diff --git a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml index 261e8635e..b5ba0f149 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./AFQMC - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: afqmc dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml index a7f48f92e..2d4c0e4de 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CLUEWSC - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: cluewsc dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml index 4ccfd53cc..67b9ff0b1 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CMNLI - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: cmnli dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml index 8b5172f08..70ccbea85 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CSL - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: csl dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml index 0e766ada1..7a74e16d1 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./IFLYTEK - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: iflytek dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml index f00a770c8..929a2ff18 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./OCNLI - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: ocnli dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml index 9682f2bb0..49093ab87 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./TNEWS - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: tnews dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py index 04ad4f293..e1bf4f254 100644 --- a/example/auto_compression/nlp/run.py +++ b/example/auto_compression/nlp/run.py @@ -4,6 +4,7 @@ import functools from functools import partial import numpy as np +import shutil import paddle import paddle.nn as nn from paddle.io import Dataset, BatchSampler, DataLoader @@ -14,7 +15,7 @@ from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.data.sampler import SamplerHelper from paddlenlp.metrics import Mcc, PearsonAndSpearman -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config from paddleslim.auto_compression.compressor import AutoCompression @@ -305,6 +306,15 @@ def main(): if 'HyperParameterOptimization' not in all_config else eval_dataloader, eval_dataloader=eval_dataloader) + if not os.path.exists(args.save_dir): + os.makedirs(args.save_dir) + + for file_name in os.listdir(global_config['model_dir']): + if 'json' in file_name or 'txt' in file_name: + shutil.copy( + os.path.join(global_config['model_dir'], file_name), + args.save_dir) + ac.compress() diff --git a/example/auto_compression/pytorch_huggingface/README.md b/example/auto_compression/pytorch_huggingface/README.md index 465414c95..730fd5840 100644 --- a/example/auto_compression/pytorch_huggingface/README.md +++ b/example/auto_compression/pytorch_huggingface/README.md @@ -32,19 +32,19 @@ 模型在多个任务上平均精度以及加速对比如下: | bert-base-cased | Accuracy(avg) | 时延(ms) | 加速比 | |:-------:|:----------:|:------------:| :------:| -| 压缩前 | 81.35 | 8.18 | - | -| 压缩后 | 81.53 | 6.35 | 1.29 | +| 压缩前 | 81.35 | 11.60 | - | +| 压缩后 | 81.53 | 4.83 | 2.40 | - Nvidia GPU 测试环境: - 硬件:NVIDIA Tesla T4 单卡 - 软件:CUDA 11.2, cuDNN 8.0, TensorRT 8.4 - - 测试配置:batch_size: 1, seqence length: 128 + - 测试配置:batch_size: 40, seqence length: 128 ## 3. 自动压缩流程 #### 3.1 准备环境 - python >= 3.6 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本或PaddleSlim>=2.3.0 +- PaddleSlim >= 2.3 - X2Paddle develop版本 - transformers >= 4.18.0 - PaddleNLP >= 2.3 @@ -62,8 +62,7 @@ pip install paddlepaddle-gpu 安装paddleslim: ```shell -git clone https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim ``` 安装X2Paddle: @@ -74,12 +73,6 @@ git checkout develop python setup.py install ``` -安装transformers: -```shell -pip install transformers -``` -注:安装transformers的目的是为了使用transformers中的Tokenizer。 - 安装paddlenlp: ```shell pip install paddlenlp @@ -101,10 +94,10 @@ import torch import numpy as np # 将PyTorch模型设置为eval模式 torch_model.eval() -# 构建输入 -input_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0) -token_type_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0) -attention_msk = torch.unsqueeze(torch.tensor([0] * max_length), 0) +# 构建输入, +input_ids = torch.zeros([batch_size, max_length]).long() +token_type_ids = torch.zeros([batch_size, max_length]).long() +attention_msk = torch.zeros([batch_size, max_length]).long() # 进行转换 from x2paddle.convert import pytorch2paddle pytorch2paddle(torch_model, @@ -120,7 +113,7 @@ PyTorch2Paddle支持trace和script两种方式的转换,均是PyTorch动态图 注意: - 由于自动压缩的是静态图模型,所以这里需要将```jit_type```设置为```trace```,并且注意PyTorch模型中需要设置```pad_to_max_length```,且设置的```max_length```需要和转换时构建的数据相同。 - HuggingFace默认输入```attention_mask```,PaddleNLP默认不输入,这里需要保持一致。可以PaddleNLP中设置```return_attention_mask=True```。 -- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入```model_config.json, special_tokens_map.json, tokenizer_config.json, vocab.txt```这些文件。 +- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入tokenizer的配置文件,可使用PaddleNLP中训练后自动保存的 ```model_config.json,special_tokens_map.json, tokenizer_config.json, vocab.txt```,也可使用Huggingface训练后自动保存的 ```config.json,special_tokens_map.json, tokenizer_config.json, vocab.txt```。 更多Pytorch2Paddle示例可参考[PyTorch模型转换文档](https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/inference_model_convertor/pytorch2paddle.md)。其他框架转换可参考[X2Paddle模型转换工具](https://github.com/PaddlePaddle/X2Paddle) @@ -191,7 +184,7 @@ export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' ``` -如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola``` ,命令加上```--eval True```即可: ```shell export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --eval True diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py index 9b3467e83..0c730dffa 100644 --- a/example/auto_compression/pytorch_huggingface/run.py +++ b/example/auto_compression/pytorch_huggingface/run.py @@ -20,13 +20,14 @@ import paddle.nn as nn import functools from functools import partial +import shutil from paddle.io import Dataset, BatchSampler, DataLoader from paddle.metric import Metric, Accuracy -from transformers import AutoTokenizer +from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenizer from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression.compressor import AutoCompression @@ -164,7 +165,10 @@ def reader(): ): fn(samples) train_batch_sampler = paddle.io.BatchSampler( - train_ds, batch_size=global_config['batch_size'], shuffle=True) + train_ds, + batch_size=global_config['batch_size'], + shuffle=True, + drop_last=True) feed_list = create_data_holder(global_config['task_name'], global_config['input_names']) @@ -208,7 +212,8 @@ def reader(): dev_batch_sampler_matched = paddle.io.BatchSampler( dev_ds_matched, batch_size=global_config['batch_size'], - shuffle=False) + shuffle=False, + drop_last=True) dev_data_loader_matched = DataLoader( dataset=dev_ds_matched, batch_sampler=dev_batch_sampler_matched, @@ -219,21 +224,26 @@ def reader(): dev_batch_sampler_mismatched = paddle.io.BatchSampler( dev_ds_mismatched, batch_size=global_config['batch_size'], - shuffle=False) + shuffle=False, + drop_last=True) dev_data_loader_mismatched = DataLoader( dataset=dev_ds_mismatched, batch_sampler=dev_batch_sampler_mismatched, collate_fn=batchify_fn, num_workers=0, feed_list=feed_list, - return_list=False) + return_list=False, + drop_last=True) return train_data_loader, dev_data_loader_matched, dev_data_loader_mismatched else: dev_ds = load_dataset( global_config['dataset'], global_config['task_name'], splits='dev') dev_ds = dev_ds.map(dev_trans_func, lazy=True) dev_batch_sampler = paddle.io.BatchSampler( - dev_ds, batch_size=global_config['batch_size'], shuffle=False) + dev_ds, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=True) dev_data_loader = DataLoader( dataset=dev_ds, batch_sampler=dev_batch_sampler, @@ -353,6 +363,15 @@ def main(): 'HyperParameterOptimization' not in all_config else eval_dataloader, eval_dataloader=eval_dataloader) + if not os.path.exists(args.save_dir): + os.makedirs(args.save_dir) + + for file_name in os.listdir(global_config['model_dir']): + if 'json' in file_name or 'txt' in file_name: + shutil.copy( + os.path.join(global_config['model_dir'], file_name), + args.save_dir) + ac.compress() diff --git a/example/auto_compression/pytorch_huggingface/run.sh b/example/auto_compression/pytorch_huggingface/run.sh deleted file mode 100644 index eb444ba06..000000000 --- a/example/auto_compression/pytorch_huggingface/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' diff --git a/example/auto_compression/pytorch_yolo_series/README.md b/example/auto_compression/pytorch_yolo_series/README.md new file mode 100644 index 000000000..3f80ad6f1 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/README.md @@ -0,0 +1,170 @@ +# YOLO系列模型自动压缩示例 + +目录: +- [1.简介](#1简介) +- [2.Benchmark](#2Benchmark) +- [3.开始自动压缩](#自动压缩流程) + - [3.1 环境准备](#31-准备环境) + - [3.2 准备数据集](#32-准备数据集) + - [3.3 准备预测模型](#33-准备预测模型) + - [3.4 测试模型精度](#34-测试模型精度) + - [3.5 自动压缩并产出模型](#35-自动压缩并产出模型) +- [4.预测部署](#4预测部署) +- [5.FAQ](5FAQ) + +## 1. 简介 + +本示例将以以[ultralytics/yolov5](https://github.com/ultralytics/yolov5),[meituan/YOLOv6](https://github.com/meituan/YOLOv6) 和 [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) 目标检测模型为例,借助[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)的能力,将PyTorch框架模型转换为Paddle框架模型,再使用ACT自动压缩功能进行模型压缩,压缩后的模型可使用Paddle Inference或者导出至ONNX,利用TensorRT部署。 + +## 2.Benchmark + +| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 模型体积 | 预测时延FP32
|预测时延FP16
| 预测时延INT8
| 配置文件 | Inference模型 | +| :-------- |:-------- |:--------: | :--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: | +| YOLOv5s | Base模型 | 640*640 | 37.4 | 28.1MB | 5.95ms | 2.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | +| YOLOv5s | 离线量化 | 640*640 | 36.0 | 7.4MB | - | - | 1.87ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - | +| YOLOv5s | ACT量化训练 | 640*640 | **36.9** | 7.4MB | - | - | **1.87ms** | [config](./configs/yolov5s_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.onnx) | +| | | | | | | | | | +| YOLOv6s | Base模型 | 640*640 | 42.4 | 65.9MB | 9.06ms | 2.90ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | +| YOLOv6s | KL离线量化 | 640*640 | 30.3 | 16.8MB | - | - | 1.83ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - | +| YOLOv6s | 量化蒸馏训练 | 640*640 | **41.3** | 16.8MB | - | - | **1.83ms** | [config](./configs/yolov6s_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_quant.onnx) | +| | | | | | | | | | +| YOLOv7 | Base模型 | 640*640 | 51.1 | 141MB | 26.84ms | 7.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | +| YOLOv7 | 离线量化 | 640*640 | 50.2 | 36MB | - | - | 4.55ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - | +| YOLOv7 | ACT量化训练 | 640*640 | **50.9** | 36MB | - | - | **4.55ms** | [config](./configs/yolov7_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_quant.onnx) | +| | | | | | | | | | +| YOLOv7-Tiny | Base模型 | 640*640 | 37.3 | 24MB | 5.06ms | 2.32ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7-tiny.onnx) | +| YOLOv7-Tiny | 离线量化 | 640*640 | 35.8 | 6.1MB | - | - | 1.68ms | - | - | +| YOLOv7-Tiny | ACT量化训练 | 640*640 | **37.0** | 6.1MB | - | - | **1.68ms** | [config](./configs/yolov7_tiny_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_tiny_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_tiny_quant.onnx) | + +说明: +- mAP的指标均在COCO val2017数据集中评测得到。 +- YOLOv7模型在Tesla T4的GPU环境下开启TensorRT 8.4.1,batch_size=1, 测试脚本是[cpp_infer](./cpp_infer)。 + +## 3. 自动压缩流程 + +#### 3.1 准备环境 +- PaddlePaddle >= 2.3.2版本 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)根据相应环境的安装指令进行安装) +- PaddleSlim develop 版本 + +(1)安装paddlepaddle +``` +# CPU +pip install paddlepaddle==2.3.2 +# GPU +pip install paddlepaddle-gpu==2.3.2 +``` + +(2)安装paddleslim: +```shell +git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim +python setup.py install +``` + + +#### 3.2 准备数据集 + +本示例默认以COCO数据进行自动压缩实验,可以从[MS COCO官网](https://cocodataset.org)下载[Train](http://images.cocodataset.org/zips/train2017.zip)、[Val](http://images.cocodataset.org/zips/val2017.zip)、[annotation](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)。 + +目录格式如下: +``` +dataset/coco/ +├── annotations +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ | ... +├── train2017 +│ ├── 000000000009.jpg +│ ├── 000000580008.jpg +│ | ... +├── val2017 +│ ├── 000000000139.jpg +│ ├── 000000000285.jpg +``` + +如果是自定义数据集,请按照如上COCO数据格式准备数据。 + + +#### 3.3 准备预测模型 + +(1)准备ONNX模型: + +- YOLOv5: + + 本示例模型使用[ultralytics/yolov5](https://github.com/ultralytics/yolov5)的master分支导出,要求v6.1之后的ONNX模型,可以根据官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。 + ```shell + python export.py --weights yolov5s.pt --include onnx + ``` + +- YOLOv6: + + 可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型。也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。 + +- YOLOv7: 可通过[WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)的导出脚本来准备ONNX模型,具体步骤如下: + ```shell + git clone https://github.com/WongKinYiu/yolov7.git + python export.py --weights yolov7-tiny.pt --grid + ``` + + **注意**:目前ACT支持**不带NMS**模型,使用如上命令导出即可。也可以直接下载我们已经准备好的[yolov7.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov7-tiny.onnx)。 + +#### 3.4 自动压缩并产出模型 + +蒸馏量化自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中模型路径、蒸馏、量化、和训练等部分的参数,配置完成后便可对模型进行量化和蒸馏。 + +本示例启动自动压缩以YOLOv7-Tiny为例,如果想要更换模型,可修改`--config_path`路径即可,具体运行命令为: + +- 单卡训练: +``` +export CUDA_VISIBLE_DEVICES=0 +python run.py --config_path=./configs/yolov7_tiny_qat_dis.yaml --save_dir='./output/' +``` + +- 多卡训练: +``` +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --log_dir=log --gpus 0,1,2,3 run.py \ + --config_path=./configs/yolov7_tiny_qat_dis.yaml --save_dir='./output/' +``` + +#### 3.5 测试模型精度 + +修改[yolov7_qat_dis.yaml](./configs/yolov7_qat_dis.yaml)中`model_dir`字段为模型存储路径,然后使用eval.py脚本得到模型的mAP: +``` +export CUDA_VISIBLE_DEVICES=0 +python eval.py --config_path=./configs/yolov7_tiny_qat_dis.yaml +``` + + +## 4.预测部署 + +#### 导出至ONNX使用TensorRT部署 + +执行完自动压缩后会默认在`save_dir`中生成`quant_model.onnx`的ONNX模型文件,可以直接使用TensorRT测试脚本进行验证。 + +- 进行测试: +```shell +python yolov7_onnx_trt.py --model_path=output/quant_model.onnx --image_file=images/000000570688.jpg --precision=int8 +``` + +#### Paddle-TensorRT部署 +- C++部署 + +进入[cpp_infer](./cpp_infer)文件夹内,请按照[C++ TensorRT Benchmark测试教程](./cpp_infer/README.md)进行准备环境及编译,然后开始测试: +```shell +# 编译 +bash complie.sh +# 执行 +./build/trt_run --model_file yolov7_quant/model.pdmodel --params_file yolov7_quant/model.pdiparams --run_mode=trt_int8 +``` + +- Python部署: + +首先安装带有TensorRT的[Paddle安装包](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)。 + +然后使用[paddle_trt_infer.py](./paddle_trt_infer.py)进行部署: +```shell +python paddle_trt_infer.py --model_path=output --image_file=images/000000570688.jpg --benchmark=True --run_mode=trt_int8 +``` + +## 5.FAQ + +- 如果想对模型进行离线量化,可进入[YOLO系列模型离线量化示例](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series)中进行实验。 diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml similarity index 57% rename from example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml rename to example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml index de8d89e4d..d5c853bed 100644 --- a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml @@ -1,18 +1,19 @@ - Global: - reader_config: configs/yolov5_reader.yml - input_list: {'image': 'x2paddle_images'} + model_dir: ./yolov5s.onnx + dataset_dir: dataset/coco/ + train_image_dir: train2017 + val_image_dir: val2017 + train_anno_path: annotations/instances_train2017.json + val_anno_path: annotations/instances_val2017.json Evaluation: True - arch: 'YOLOv5' - model_dir: ./yolov5s_infer - model_filename: model.pdmodel - params_filename: model.pdiparams + arch: YOLOv5 Distillation: alpha: 1.0 - loss: l2 + loss: soft_label Quantization: + onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml new file mode 100644 index 000000000..e14a6b651 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml @@ -0,0 +1,32 @@ +Global: + model_dir: ./yolov6s.onnx + dataset_dir: dataset/coco/ + train_image_dir: train2017 + val_image_dir: val2017 + train_anno_path: annotations/instances_train2017.json + val_anno_path: annotations/instances_val2017.json + Evaluation: True + arch: YOLOv6 + +Distillation: + alpha: 1.0 + loss: soft_label + +Quantization: + onnx_format: true + activation_quantize_type: 'moving_average_abs_max' + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + train_iter: 8000 + eval_iter: 1000 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.00003 + T_max: 8000 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 0.00004 diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml new file mode 100644 index 000000000..437ceea90 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml @@ -0,0 +1,32 @@ +Global: + model_dir: ./yolov7.onnx + dataset_dir: dataset/coco/ + train_image_dir: train2017 + val_image_dir: val2017 + train_anno_path: annotations/instances_train2017.json + val_anno_path: annotations/instances_val2017.json + Evaluation: True + arch: YOLOv7 + +Distillation: + alpha: 1.0 + loss: soft_label + +Quantization: + onnx_format: true + activation_quantize_type: 'moving_average_abs_max' + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + train_iter: 5000 + eval_iter: 1000 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.00003 + T_max: 8000 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 0.00004 diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml new file mode 100644 index 000000000..958182f6c --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml @@ -0,0 +1,32 @@ +Global: + model_dir: ./yolov7-tiny.onnx + dataset_dir: dataset/coco/ + train_image_dir: train2017 + val_image_dir: val2017 + train_anno_path: annotations/instances_train2017.json + val_anno_path: annotations/instances_val2017.json + Evaluation: True + arch: YOLOv7 + +Distillation: + alpha: 1.0 + loss: soft_label + +Quantization: + onnx_format: true + activation_quantize_type: 'moving_average_abs_max' + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + train_iter: 5000 + eval_iter: 1000 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.00003 + T_max: 8000 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 0.00004 diff --git a/example/auto_compression/pytorch_yolo_series/cpp_infer/CMakeLists.txt b/example/auto_compression/pytorch_yolo_series/cpp_infer/CMakeLists.txt new file mode 100644 index 000000000..d5307c657 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/CMakeLists.txt @@ -0,0 +1,263 @@ +cmake_minimum_required(VERSION 3.0) +project(cpp_inference_demo CXX C) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) +option(USE_TENSORRT "Compile demo with TensorRT." OFF) +option(WITH_ROCM "Compile demo with rocm." OFF) +option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF) +option(WITH_ARM "Compile demo with ARM" OFF) +option(WITH_MIPS "Compile demo with MIPS" OFF) +option(WITH_SW "Compile demo with SW" OFF) +option(WITH_XPU "Compile demow ith xpu" OFF) +option(WITH_NPU "Compile demow ith npu" OFF) + +if(NOT WITH_STATIC_LIB) + add_definitions("-DPADDLE_WITH_SHARED_LIB") +else() + # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. + # Set it to empty in static library mode to avoid compilation issues. + add_definitions("/DPD_INFER_DECL=") +endif() + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +if(NOT DEFINED PADDLE_LIB) + message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") +endif() +if(NOT DEFINED DEMO_NAME) + message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") +endif() + +include_directories("${PADDLE_LIB}/") +set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include") + +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib") +link_directories("${PADDLE_LIB}/paddle/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib") + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) + if (MSVC_STATIC_CRT) + if (WITH_MKL) + set(FLAG_OPENMP "/openmp") + endif() + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + safe_set_static_flag() + if (WITH_STATIC_LIB) + add_definitions(-DSTATIC_LIB) + endif() + endif() +else() + if(WITH_MKL) + set(FLAG_OPENMP "-fopenmp") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") +endif() + +if(WITH_GPU) + if(NOT WIN32) + include_directories("/usr/local/cuda/include") + if(CUDA_LIB STREQUAL "") + set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + endif() + else() + include_directories("C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include") + if(CUDA_LIB STREQUAL "") + set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") + endif() + endif(NOT WIN32) +endif() + +if (USE_TENSORRT AND WITH_GPU) + set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") + if("${TENSORRT_ROOT}" STREQUAL "") + message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") + endif() + set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) + set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) + file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") + file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + endif() + if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") + message(SEND_ERROR "Failed to detect TensorRT version.") + endif() + string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" + TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") + message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " + "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") + include_directories("${TENSORRT_INCLUDE_DIR}") + link_directories("${TENSORRT_LIB_DIR}") +endif() + +if(WITH_MKL) + set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml") + include_directories("${MATH_LIB_PATH}/include") + if(WIN32) + set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + if(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + else(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif(WIN32) + endif() +elseif((NOT WITH_MIPS) AND (NOT WITH_SW)) + set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") + include_directories("${OPENBLAS_LIB_PATH}/include/openblas") + if(WIN32) + set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + +if(WITH_STATIC_LIB) + set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + if(WIN32) + set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() +endif() + +if (WITH_ONNXRUNTIME) + if(WIN32) + set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx) + elseif(APPLE) + set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx) + else() + set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx) + endif() +endif() + +if (NOT WIN32) + set(EXTERNAL_LIB "-lrt -ldl -lpthread") + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf xxhash cryptopp + ${EXTERNAL_LIB}) +else() + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags_static libprotobuf xxhash cryptopp-static ${EXTERNAL_LIB}) + set(DEPS ${DEPS} shlwapi.lib) +endif(NOT WIN32) + +if(WITH_GPU) + if(NOT WIN32) + if (USE_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + if(USE_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + endif() +endif() + +if(WITH_ROCM AND NOT WIN32) + set(DEPS ${DEPS} ${ROCM_LIB}/libamdhip64${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +if(WITH_XPU AND NOT WIN32) + set(XPU_INSTALL_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}xpu") + set(DEPS ${DEPS} ${XPU_INSTALL_PATH}/lib/libxpuapi${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${XPU_INSTALL_PATH}/lib/libxpurt${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +if(WITH_NPU AND NOT WIN32) + set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libgraph${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libge_runner${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libascendcl${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libascendcl${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libacl_op_compiler${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) +target_link_libraries(${DEMO_NAME} ${DEPS}) +if(WIN32) + if(USE_TENSORRT) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}) + endif() + endif() + if(WITH_MKL) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/Release + ) + else() + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release + ) + endif() + if(WITH_ONNXRUNTIME) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() + if(NOT WITH_STATIC_LIB) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() +endif() diff --git a/example/auto_compression/pytorch_yolo_series/cpp_infer/README.md b/example/auto_compression/pytorch_yolo_series/cpp_infer/README.md new file mode 100644 index 000000000..0286c26df --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/README.md @@ -0,0 +1,85 @@ +# YOLOv7 TensorRT Benchmark测试(Linux) + +## 环境准备 + +- CUDA、CUDNN:确认环境中已经安装CUDA和CUDNN,并且提前获取其安装路径。 + +- TensorRT:可通过NVIDIA官网下载[TensorRT 8.4.1.5](https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.4.1/tars/tensorrt-8.4.1.5.linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz)或其他版本安装包。 + +- Paddle Inference C++预测库:编译develop版本请参考[编译文档](https://www.paddlepaddle.org.cn/inference/user_guides/source_compile.html)。编译完成后,会在build目录下生成`paddle_inference_install_dir`文件夹,这个就是我们需要的C++预测库文件。 + +## 编译可执行程序 + +- (1)修改`compile.sh`中依赖库路径,主要是以下内容: +```shell +# Paddle Inference预测库路径 +LIB_DIR=/root/auto_compress/Paddle/build/paddle_inference_install_dir/ +# CUDNN路径 +CUDNN_LIB=/usr/lib/x86_64-linux-gnu/ +# CUDA路径 +CUDA_LIB=/usr/local/cuda/lib64 +# TensorRT安装包路径,为TRT资源包解压完成后的绝对路径,其中包含`lib`和`include`文件夹 +TENSORRT_ROOT=/root/auto_compress/trt/trt8.4/ +``` + +## Paddle tensorRT测试 + +- YOLOv5 +``` +# FP32 +./build/trt_run --model_file yolov5s_infer/model.pdmodel --params_file yolov5s_infer/model.pdiparams --run_mode=trt_fp32 +# FP16 +./build/trt_run --model_file yolov5s_infer/model.pdmodel --params_file yolov5s_infer/model.pdiparams --run_mode=trt_fp16 +# INT8 +./build/trt_run --model_file yolov5s_quant/model.pdmodel --params_file yolov5s_quant/model.pdiparams --run_mode=trt_int8 +``` + +- YOLOv6 +``` +# FP32 +./build/trt_run --arch=YOLOv6 --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp32 +# FP16 +./build/trt_run --arch=YOLOv6 --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp16 +# INT8 +./build/trt_run --arch=YOLOv6 --model_file yolov6s_quant/model.pdmodel --params_file yolov6s_quant/model.pdiparams --run_mode=trt_int8 +``` + + +- YOLOv7 +``` +# FP32 +./build/trt_run --model_file yolov7_infer/model.pdmodel --params_file yolov7_infer/model.pdiparams --run_mode=trt_fp32 +# FP16 +./build/trt_run --model_file yolov7_infer/model.pdmodel --params_file yolov7_infer/model.pdiparams --run_mode=trt_fp16 +# INT8 +./build/trt_run --model_file yolov7_quant/model.pdmodel --params_file yolov7_quant/model.pdiparams --run_mode=trt_int8 +``` + +## 原生TensorRT测试 + +```shell +# FP32 +trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw +# FP16 +trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw --fp16 +# INT8 +trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw --int8 +``` +- 注:可把--onnx=yolov5s.onnx替换成yolov6s.onnx和yolov7.onnx模型 + +## 性能对比 + +| 预测库 | 模型 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | +| :--------: | :--------: |:-------- |:--------: | :---------------------: | +| Paddle TensorRT | yolov5s | 5.95ms | 2.44ms | 1.87ms | +| TensorRT | yolov5s | 6.16ms | 2.58ms | 2.07ms | +| | | | | | +| Paddle TensorRT | YOLOv6s | 9.06ms | 2.90ms | 1.83ms | +| TensorRT | YOLOv6s | 8.59ms | 2.83ms | 1.87ms | +| | | | | | +| Paddle TensorRT | YOLOv7 | 26.84ms | 7.44ms | 4.55ms | +| TensorRT | YOLOv7 | 28.25ms | 7.23ms | 4.67ms | + +环境: +- Tesla T4,TensorRT 8.4.1,CUDA 11.2 +- batch_size=1 diff --git a/example/auto_compression/pytorch_yolo_series/cpp_infer/compile.sh b/example/auto_compression/pytorch_yolo_series/cpp_infer/compile.sh new file mode 100644 index 000000000..afff924b4 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/compile.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set +x +set -e + +work_path=$(dirname $(readlink -f $0)) + +mkdir -p build +cd build +rm -rf * + +DEMO_NAME=trt_run + +WITH_MKL=ON +WITH_GPU=ON +USE_TENSORRT=ON + +LIB_DIR=/root/auto_compress/Paddle/build/paddle_inference_install_dir/ +CUDNN_LIB=/usr/lib/x86_64-linux-gnu/ +CUDA_LIB=/usr/local/cuda/lib64 +TENSORRT_ROOT=/root/auto_compress/trt/trt8.4/ + +WITH_ROCM=OFF +ROCM_LIB=/opt/rocm/lib + +cmake .. -DPADDLE_LIB=${LIB_DIR} \ + -DWITH_MKL=${WITH_MKL} \ + -DDEMO_NAME=${DEMO_NAME} \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_STATIC_LIB=OFF \ + -DUSE_TENSORRT=${USE_TENSORRT} \ + -DWITH_ROCM=${WITH_ROCM} \ + -DROCM_LIB=${ROCM_LIB} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DTENSORRT_ROOT=${TENSORRT_ROOT} + +make -j diff --git a/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc b/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc new file mode 100644 index 000000000..22095b39e --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle/include/paddle_inference_api.h" +#include "paddle/include/experimental/phi/common/float16.h" + +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::CreatePredictor; +using paddle_infer::PrecisionType; +using phi::dtype::float16; + +DEFINE_string(model_dir, "", "Directory of the inference model."); +DEFINE_string(model_file, "", "Path of the inference model file."); +DEFINE_string(params_file, "", "Path of the inference params file."); +DEFINE_string(arch, "YOLOv5", "Architectures name, can be: YOLOv5, YOLOv6, YOLOv7."); +DEFINE_string(run_mode, "trt_fp32", "run_mode which can be: trt_fp32, trt_fp16 and trt_int8"); +DEFINE_int32(batch_size, 1, "Batch size."); +DEFINE_int32(gpu_id, 0, "GPU card ID num."); +DEFINE_int32(trt_min_subgraph_size, 3, "tensorrt min_subgraph_size"); +DEFINE_int32(warmup, 50, "warmup"); +DEFINE_int32(repeats, 1000, "repeats"); + +using Time = decltype(std::chrono::high_resolution_clock::now()); +Time time() { return std::chrono::high_resolution_clock::now(); }; +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; +} + +std::shared_ptr InitPredictor() { + Config config; + std::string model_path; + if (FLAGS_model_dir != "") { + config.SetModel(FLAGS_model_dir); + model_path = FLAGS_model_dir.substr(0, FLAGS_model_dir.find_last_of("/")); + } else { + config.SetModel(FLAGS_model_file, FLAGS_params_file); + model_path = FLAGS_model_file.substr(0, FLAGS_model_file.find_last_of("/")); + } + // enable tune + std::cout << "model_path: " << model_path << std::endl; + config.EnableUseGpu(256, FLAGS_gpu_id); + if (FLAGS_run_mode == "trt_fp32") { + config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size, + PrecisionType::kFloat32, false, false); + } else if (FLAGS_run_mode == "trt_fp16") { + config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size, + PrecisionType::kHalf, false, false); + } else if (FLAGS_run_mode == "trt_int8") { + config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size, + PrecisionType::kInt8, false, false); + } + config.EnableMemoryOptim(); + config.SwitchIrOptim(true); + return CreatePredictor(config); +} + +template +void run(Predictor *predictor, const std::vector &input, + const std::vector &input_shape, type* out_data, std::vector out_shape) { + + // prepare input + int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, + std::multiplies()); + + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + input_t->Reshape(input_shape); + input_t->CopyFromCpu(input.data()); + + for (int i = 0; i < FLAGS_warmup; ++i) + CHECK(predictor->Run()); + + auto st = time(); + for (int i = 0; i < FLAGS_repeats; ++i) { + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + input_t->Reshape(input_shape); + input_t->CopyFromCpu(input.data()); + + CHECK(predictor->Run()); + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + output_t -> ShareExternalData(out_data, out_shape, paddle_infer::PlaceType::kGPU); + } + + LOG(INFO) << "[" << FLAGS_run_mode << " bs-" << FLAGS_batch_size << " ] run avg time is " << time_diff(st, time()) / FLAGS_repeats + << " ms"; +} + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto predictor = InitPredictor(); + std::vector input_shape = {FLAGS_batch_size, 3, 640, 640}; + // float16 + using dtype = float16; + std::vector input_data(FLAGS_batch_size * 3 * 640 * 640, dtype(1.0)); + + int out_box_shape = 25200; + if (FLAGS_arch == "YOLOv6"){ + out_box_shape = 8400; + } + dtype *out_data; + int out_data_size = FLAGS_batch_size * out_box_shape * 85; + cudaHostAlloc((void**)&out_data, sizeof(float) * out_data_size, cudaHostAllocMapped); + + std::vector out_shape{ FLAGS_batch_size, 1, out_box_shape, 85}; + run(predictor.get(), input_data, input_shape, out_data, out_shape); + return 0; +} diff --git a/example/auto_compression/pytorch_yolo_series/dataset.py b/example/auto_compression/pytorch_yolo_series/dataset.py new file mode 100644 index 000000000..0250b936f --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/dataset.py @@ -0,0 +1,115 @@ +from pycocotools.coco import COCO +import cv2 +import os +import numpy as np +import paddle + + +class COCOValDataset(paddle.io.Dataset): + def __init__(self, + dataset_dir=None, + image_dir=None, + anno_path=None, + img_size=[640, 640], + input_name='x2paddle_images'): + self.dataset_dir = dataset_dir + self.image_dir = image_dir + self.img_size = img_size + self.input_name = input_name + self.ann_file = os.path.join(dataset_dir, anno_path) + self.coco = COCO(self.ann_file) + ori_ids = list(sorted(self.coco.imgs.keys())) + # check gt bbox + clean_ids = [] + for idx in ori_ids: + ins_anno_ids = self.coco.getAnnIds(imgIds=[idx], iscrowd=False) + instances = self.coco.loadAnns(ins_anno_ids) + num_bbox = 0 + for inst in instances: + if inst.get('ignore', False): + continue + if 'bbox' not in inst.keys(): + continue + elif not any(np.array(inst['bbox'])): + continue + else: + num_bbox += 1 + if num_bbox > 0: + clean_ids.append(idx) + self.ids = clean_ids + + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return { + 'image': img, + 'im_id': np.array([img_id]), + 'scale_factor': scale_factor + } + + def __len__(self): + return len(self.ids) + + def _get_img_data_from_img_id(self, img_id): + img_info = self.coco.loadImgs(img_id)[0] + img_path = os.path.join(self.dataset_dir, self.image_dir, + img_info['file_name']) + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img + + def _generate_scale(self, im, target_shape, keep_ratio=True): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + if keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(target_shape) + target_size_max = np.max(target_shape) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = target_shape + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + def image_preprocess(self, img, target_shape): + # Resize image + im_scale_y, im_scale_x = self._generate_scale(img, target_shape) + img = cv2.resize( + img, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + # Pad + im_h, im_w = img.shape[:2] + h, w = target_shape[:] + if h != im_h or w != im_w: + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32) + canvas[0:im_h, 0:im_w, :] = img.astype(np.float32) + img = canvas + img = np.transpose(img / 255, [2, 0, 1]) + scale_factor = np.array([im_scale_y, im_scale_x]) + return img.astype(np.float32), scale_factor + + +class COCOTrainDataset(COCOValDataset): + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return {self.input_name: img} diff --git a/example/auto_compression/pytorch_yolo_series/eval.py b/example/auto_compression/pytorch_yolo_series/eval.py new file mode 100644 index 000000000..de11989e4 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/eval.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +from tqdm import tqdm +import paddle +from paddleslim.common import load_config +from paddleslim.common import load_inference_model +from post_process import YOLOPostProcess, coco_metric +from dataset import COCOValDataset + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--batch_size', type=int, default=1, help="Batch size of model input.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + + return parser + + +def eval(): + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + + val_program, feed_target_names, fetch_targets = load_inference_model( + global_config["model_dir"], exe) + + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(val_program, + feed={feed_target_names[0]: data_all['image']}, + fetch_list=fetch_targets, + return_numpy=False) + postprocess = YOLOPostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + + coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + + +def main(): + global global_config + all_config = load_config(FLAGS.config_path) + global_config = all_config["Global"] + + global val_loader + dataset = COCOValDataset( + dataset_dir=global_config['dataset_dir'], + image_dir=global_config['val_image_dir'], + anno_path=global_config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader( + dataset, batch_size=FLAGS.batch_size, drop_last=True) + + eval() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/auto_compression/pytorch_yolov5/images/000000570688.jpg b/example/auto_compression/pytorch_yolo_series/images/000000570688.jpg similarity index 100% rename from example/auto_compression/pytorch_yolov5/images/000000570688.jpg rename to example/auto_compression/pytorch_yolo_series/images/000000570688.jpg diff --git a/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py b/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py new file mode 100644 index 000000000..3540c33d6 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py @@ -0,0 +1,378 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cv2 +import tensorrt as trt +import pycuda.driver as cuda +import pycuda.autoinit +import os +import time +import random +import argparse + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) +EXPLICIT_PRECISION = 1 << ( + int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION) + +# load coco labels +CLASS_LABEL = [ + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", + "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", + "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", + "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", + "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", + "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" +] + + +def preprocess(image, input_size, mean=None, std=None, swap=(2, 0, 1)): + if len(image.shape) == 3: + padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 + else: + padded_img = np.ones(input_size) * 114.0 + img = np.array(image) + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, ).astype(np.float32) + padded_img[:int(img.shape[0] * r), :int(img.shape[1] * r)] = resized_img + + padded_img = padded_img[:, :, ::-1] + padded_img /= 255.0 + if mean is not None: + padded_img -= mean + if std is not None: + padded_img /= std + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + return padded_img, r + + +def postprocess(predictions, ratio): + boxes = predictions[:, :4] + scores = predictions[:, 4:5] * predictions[:, 5:] + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. + boxes_xyxy /= ratio + dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1) + return dets + + +def nms(boxes, scores, nms_thr): + """Single class NMS implemented in Numpy.""" + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= nms_thr)[0] + order = order[inds + 1] + + return keep + + +def multiclass_nms(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy""" + final_dets = [] + num_classes = scores.shape[1] + for cls_ind in range(num_classes): + cls_scores = scores[:, cls_ind] + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + continue + else: + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if len(keep) > 0: + cls_inds = np.ones((len(keep), 1)) * cls_ind + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1) + final_dets.append(dets) + if len(final_dets) == 0: + return None + return np.concatenate(final_dets, 0) + + +def get_color_map_list(num_classes): + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def draw_box(img, boxes, scores, cls_ids, conf=0.5, class_names=None): + color_list = get_color_map_list(len(class_names)) + for i in range(len(boxes)): + box = boxes[i] + cls_id = int(cls_ids[i]) + color = tuple(color_list[cls_id]) + score = scores[i] + if score < conf: + continue + x0 = int(box[0]) + y0 = int(box[1]) + x1 = int(box[2]) + y1 = int(box[3]) + + text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) + font = cv2.FONT_HERSHEY_SIMPLEX + + txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] + cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) + cv2.rectangle(img, (x0, y0 + 1), + (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), + color, -1) + cv2.putText( + img, + text, (x0, y0 + txt_size[1]), + font, + 0.8, (0, 255, 0), + thickness=2) + + return img + + +def get_engine(precision, model_file_path): + # TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) + TRT_LOGGER = trt.Logger() + builder = trt.Builder(TRT_LOGGER) + config = builder.create_builder_config() + if precision == 'int8': + network = builder.create_network(EXPLICIT_BATCH | EXPLICIT_PRECISION) + else: + network = builder.create_network(EXPLICIT_BATCH) + parser = trt.OnnxParser(network, TRT_LOGGER) + + runtime = trt.Runtime(TRT_LOGGER) + if model_file_path.endswith('.trt'): + # If a serialized engine exists, use it instead of building an engine. + print("Reading engine from file {}".format(model_file_path)) + with open(model_file_path, + "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + engine = runtime.deserialize_cuda_engine(f.read()) + for i in range(network.num_layers): + layer = network.get_layer(i) + print(i, layer.name) + return engine + else: + config.max_workspace_size = 1 << 30 + + if precision == "fp16": + if not builder.platform_has_fast_fp16: + print("FP16 is not supported natively on this platform/device") + else: + config.set_flag(trt.BuilderFlag.FP16) + elif precision == "int8": + if not builder.platform_has_fast_int8: + print("INT8 is not supported natively on this platform/device") + else: + if builder.platform_has_fast_fp16: + # Also enable fp16, as some layers may be even more efficient in fp16 than int8 + config.set_flag(trt.BuilderFlag.FP16) + config.set_flag(trt.BuilderFlag.INT8) + + builder.max_batch_size = 1 + print('Loading ONNX file from path {}...'.format(model_file_path)) + with open(model_file_path, 'rb') as model: + print('Beginning ONNX file parsing') + if not parser.parse(model.read()): + print('ERROR: Failed to parse the ONNX file.') + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + print('Completed parsing of ONNX file') + print('Building an engine from file {}; this may take a while...'. + format(model_file_path)) + plan = builder.build_serialized_network(network, config) + engine = runtime.deserialize_cuda_engine(plan) + print("Completed creating Engine") + with open(model_file_path, "wb") as f: + f.write(engine.serialize()) + for i in range(network.num_layers): + layer = network.get_layer(i) + print(i, layer.name) + return engine + + +# Simple helper data class that's a little nicer to use than a 2-tuple. +class HostDeviceMem(object): + def __init__(self, host_mem, device_mem): + self.host = host_mem + self.device = device_mem + + def __str__(self): + return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) + + def __repr__(self): + return self.__str__() + + +def allocate_buffers(engine): + inputs = [] + outputs = [] + bindings = [] + stream = cuda.Stream() + for binding in engine: + size = trt.volume(engine.get_binding_shape( + binding)) * engine.max_batch_size + dtype = trt.nptype(engine.get_binding_dtype(binding)) + # Allocate host and device buffers + host_mem = cuda.pagelocked_empty(size, dtype) + device_mem = cuda.mem_alloc(host_mem.nbytes) + # Append the device buffer to device bindings. + bindings.append(int(device_mem)) + # Append to the appropriate list. + if engine.binding_is_input(binding): + inputs.append(HostDeviceMem(host_mem, device_mem)) + else: + outputs.append(HostDeviceMem(host_mem, device_mem)) + return inputs, outputs, bindings, stream + + +def run_inference(context, bindings, inputs, outputs, stream): + # Transfer input data to the GPU. + [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] + # Run inference. + context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) + # Transfer predictions back from the GPU. + [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] + # Synchronize the stream + stream.synchronize() + # Return only the host outputs. + return [out.host for out in outputs] + + +def main(args): + onnx_model = args.model_path + img_path = args.image_file + num_class = len(CLASS_LABEL) + repeat = 1000 + engine = get_engine(args.precision, onnx_model) + + model_all_names = [] + for idx in range(engine.num_bindings): + is_input = engine.binding_is_input(idx) + name = engine.get_binding_name(idx) + op_type = engine.get_binding_dtype(idx) + model_all_names.append(name) + shape = engine.get_binding_shape(idx) + print('input id:', idx, ' is input: ', is_input, ' binding name:', + name, ' shape:', shape, 'type: ', op_type) + + context = engine.create_execution_context() + print('Allocate buffers ...') + inputs, outputs, bindings, stream = allocate_buffers(engine) + print("TRT set input ...") + + origin_img = cv2.imread(img_path) + input_shape = [args.img_shape, args.img_shape] + input_image, ratio = preprocess(origin_img, input_shape) + + inputs[0].host = np.expand_dims(input_image, axis=0) + + for _ in range(0, 50): + trt_outputs = run_inference( + context, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream) + + time1 = time.time() + for _ in range(0, repeat): + trt_outputs = run_inference( + context, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream) + time2 = time.time() + # total time cost(ms) + total_inference_cost = (time2 - time1) * 1000 + print("model path: ", onnx_model, " precision: ", args.precision) + print("In TensorRT, ", + "average latency is : {} ms".format(total_inference_cost / repeat)) + # Do postprocess + output = trt_outputs[0] + predictions = np.reshape(output, (1, -1, int(5 + num_class)))[0] + dets = postprocess(predictions, ratio) + # Draw rectangles and labels on the original image + if dets is not None: + final_boxes, final_scores, final_cls_inds = dets[:, : + 4], dets[:, 4], dets[:, + 5] + origin_img = draw_box( + origin_img, + final_boxes, + final_scores, + final_cls_inds, + conf=0.5, + class_names=CLASS_LABEL) + cv2.imwrite('output.jpg', origin_img) + print('The prediction results are saved in output.jpg.') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--model_path', + type=str, + default="quant_model.onnx", + help="inference model filepath") + parser.add_argument( + '--image_file', type=str, default="bus.jpg", help="image path") + parser.add_argument( + '--precision', type=str, default='fp32', help="support fp32/fp16/int8.") + parser.add_argument('--img_shape', type=int, default=640, help="input_size") + args = parser.parse_args() + main(args) diff --git a/example/auto_compression/pytorch_yolov5/paddle_trt_infer.py b/example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py similarity index 96% rename from example/auto_compression/pytorch_yolov5/paddle_trt_infer.py rename to example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py index 62c2c89b2..eacb67fbb 100644 --- a/example/auto_compression/pytorch_yolov5/paddle_trt_infer.py +++ b/example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py @@ -21,7 +21,7 @@ from paddle.inference import Config from paddle.inference import create_predictor -from post_process import YOLOv5PostProcess +from post_process import YOLOv7PostProcess CLASS_LABEL = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', @@ -244,8 +244,9 @@ def predict_image(predictor, threshold=0.5, arch='YOLOv5'): img, scale_factor = image_preprocess(image_file, image_shape) - inputs = {} - if arch == 'YOLOv5': + if arch == 'YOLOv6': + inputs['x2paddle_image_arrays'] = img + else: inputs['x2paddle_images'] = img input_names = predictor.get_input_names() for i in range(len(input_names)): @@ -275,8 +276,8 @@ def predict_image(predictor, print('Inference time(ms): min={}, max={}, avg={}'.format( round(time_min * 1000, 2), round(time_max * 1000, 1), round(time_avg * 1000, 1))) - postprocess = YOLOv5PostProcess( - score_threshold=0.001, nms_threshold=0.6, multi_label=True) + postprocess = YOLOv7PostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) res = postprocess(np_boxes, scale_factor) res_img = draw_box( image_file, res['bbox'], CLASS_LABEL, threshold=threshold) @@ -306,6 +307,8 @@ def predict_image(predictor, default='GPU', help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU" ) + parser.add_argument( + '--arch', type=str, default='YOLOv5', help="architectures name.") parser.add_argument('--img_shape', type=int, default=640, help="input_size") args = parser.parse_args() @@ -319,4 +322,5 @@ def predict_image(predictor, args.image_file, image_shape=[args.img_shape, args.img_shape], warmup=warmup, - repeats=repeats) + repeats=repeats, + arch=args.arch) diff --git a/example/auto_compression/pytorch_yolov5/post_process.py b/example/auto_compression/pytorch_yolo_series/post_process.py similarity index 75% rename from example/auto_compression/pytorch_yolov5/post_process.py rename to example/auto_compression/pytorch_yolo_series/post_process.py index 9d20926b5..644c24b81 100644 --- a/example/auto_compression/pytorch_yolov5/post_process.py +++ b/example/auto_compression/pytorch_yolo_series/post_process.py @@ -14,6 +14,8 @@ import numpy as np import cv2 +import json +import sys def box_area(boxes): @@ -68,9 +70,9 @@ def nms(boxes, scores, iou_threshold): return keep -class YOLOv5PostProcess(object): +class YOLOPostProcess(object): """ - Post process of YOLOv5 network. + Post process of YOLO-series network. args: score_threshold(float): Threshold to filter out bounding boxes with low confidence score. If not provided, consider all boxes. @@ -157,8 +159,8 @@ def __call__(self, outs, scale_factor): if len(pred.shape) == 1: pred = pred[np.newaxis, :] pred_bboxes = pred[:, :4] - scale_factor = np.tile(scale_factor[i][::-1], (1, 2)) - pred_bboxes /= scale_factor + scale = np.tile(scale_factor[i][::-1], (2)) + pred_bboxes /= scale bbox = np.concatenate( [ pred[:, -1][:, np.newaxis], pred[:, -2][:, np.newaxis], @@ -171,3 +173,59 @@ def __call__(self, outs, scale_factor): bboxs = np.concatenate(bboxs, axis=0) box_nums = np.array(box_nums) return {'bbox': bboxs, 'bbox_num': box_nums} + + +def coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list): + try: + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + except: + print( + "[ERROR] Not found pycocotools, please install by `pip install pycocotools`" + ) + sys.exit(1) + + coco_gt = COCO(anno_file) + cats = coco_gt.loadCats(coco_gt.getCatIds()) + clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} + results = [] + for bboxes, bbox_nums, image_id in zip(bboxes_list, bbox_nums_list, + image_id_list): + results += _get_det_res(bboxes, bbox_nums, image_id, clsid2catid) + + output = "bbox.json" + with open(output, 'w') as f: + json.dump(results, f) + + coco_dt = coco_gt.loadRes(output) + coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval.stats + + +def _get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map): + det_res = [] + k = 0 + for i in range(len(bbox_nums)): + cur_image_id = int(image_id[i][0]) + det_nums = bbox_nums[i] + for j in range(det_nums): + dt = bboxes[k] + k = k + 1 + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] + w = xmax - xmin + h = ymax - ymin + bbox = [xmin, ymin, w, h] + dt_res = { + 'image_id': cur_image_id, + 'category_id': category_id, + 'bbox': bbox, + 'score': score + } + det_res.append(dt_res) + return det_res diff --git a/example/auto_compression/pytorch_yolo_series/run.py b/example/auto_compression/pytorch_yolo_series/run.py new file mode 100644 index 000000000..1a22d8220 --- /dev/null +++ b/example/auto_compression/pytorch_yolo_series/run.py @@ -0,0 +1,127 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +from tqdm import tqdm +import paddle +from paddleslim.common import load_config +from paddleslim.auto_compression import AutoCompression +from dataset import COCOValDataset, COCOTrainDataset +from post_process import YOLOPostProcess, coco_metric + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='output', + help="directory to save compressed model.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + parser.add_argument( + '--eval', type=bool, default=False, help="whether to run evaluation.") + + return parser + + +def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(compiled_test_program, + feed={test_feed_names[0]: data_all['image']}, + fetch_list=test_fetch_list, + return_numpy=False) + res = {} + postprocess = YOLOPostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + map_res = coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + return map_res[0] + + +def main(): + global global_config + all_config = load_config(FLAGS.config_path) + assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + global_config = all_config["Global"] + input_name = 'x2paddle_image_arrays' if global_config[ + 'arch'] == 'YOLOv6' else 'x2paddle_images' + dataset = COCOTrainDataset( + dataset_dir=global_config['dataset_dir'], + image_dir=global_config['train_image_dir'], + anno_path=global_config['train_anno_path'], + input_name=input_name) + train_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0) + + if 'Evaluation' in global_config.keys() and global_config[ + 'Evaluation'] and paddle.distributed.get_rank() == 0: + eval_func = eval_function + global val_loader + dataset = COCOValDataset( + dataset_dir=global_config['dataset_dir'], + image_dir=global_config['val_image_dir'], + anno_path=global_config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader( + dataset, + batch_size=1, + shuffle=False, + drop_last=False, + num_workers=0) + else: + eval_func = None + + ac = AutoCompression( + model_dir=global_config["model_dir"], + train_dataloader=train_loader, + save_dir=FLAGS.save_dir, + config=all_config, + eval_callback=eval_func) + ac.compress() + ac.export_onnx() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/auto_compression/pytorch_yolov5/README.md b/example/auto_compression/pytorch_yolov5/README.md deleted file mode 100644 index b6078db17..000000000 --- a/example/auto_compression/pytorch_yolov5/README.md +++ /dev/null @@ -1,127 +0,0 @@ -# 目标检测模型自动压缩示例 - -目录: -- [1.简介](#1简介) -- [2.Benchmark](#2Benchmark) -- [3.开始自动压缩](#自动压缩流程) - - [3.1 环境准备](#31-准备环境) - - [3.2 准备数据集](#32-准备数据集) - - [3.3 准备预测模型](#33-准备预测模型) - - [3.4 测试模型精度](#34-测试模型精度) - - [3.5 自动压缩并产出模型](#35-自动压缩并产出模型) -- [4.预测部署](#4预测部署) -- [5.FAQ](5FAQ) - -## 1. 简介 - -飞桨模型转换工具[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)支持将```Caffe/TensorFlow/ONNX/PyTorch```的模型一键转为飞桨(PaddlePaddle)的预测模型。借助X2Paddle的能力,各种框架的推理模型可以很方便的使用PaddleSlim的自动化压缩功能。 - -本示例将以[ultralytics/yolov5](https://github.com/ultralytics/yolov5)目标检测模型为例,将PyTorch框架模型转换为Paddle框架模型,再使用ACT自动压缩功能进行自动压缩。本示例使用的自动压缩策略为量化训练。 - -## 2.Benchmark - -| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 | -| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: | -| YOLOv5s | Base模型 | 640*640 | 37.4 | 7.8ms | 4.3ms | - | - | [Model](https://bj.bcebos.com/v1/paddle-slim-models/detection/yolov5s_infer.tar) | -| YOLOv5s | 量化+蒸馏 | 640*640 | 36.8 | - | - | 3.4ms | [config](./configs/yolov5s_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.tar) | - -说明: -- mAP的指标均在COCO val2017数据集中评测得到。 -- YOLOv5s模型在Tesla T4的GPU环境下测试,并且开启TensorRT,测试脚本是[benchmark demo](./paddle_trt_infer.py)。 - -## 3. 自动压缩流程 - -#### 3.1 准备环境 -- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 -- PaddleDet >= 2.4 -- [X2Paddle](https://github.com/PaddlePaddle/X2Paddle) >= 1.3.6 -- opencv-python - -(1)安装paddlepaddle: -```shell -# CPU -pip install paddlepaddle -# GPU -pip install paddlepaddle-gpu -``` - -(2)安装paddleslim: -```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install -``` - -(3)安装paddledet: -```shell -pip install paddledet -``` - -注:安装PaddleDet的目的是为了直接使用PaddleDetection中的Dataloader组件。 - -(4)安装X2Paddle的1.3.6以上版本: -```shell -pip install x2paddle sympy onnx -``` - -#### 3.2 准备数据集 - -本案例默认以COCO数据进行自动压缩实验,并且依赖PaddleDetection中数据读取模块,如果自定义COCO数据,或者其他格式数据,请参考[PaddleDetection数据准备文档](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/docs/tutorials/PrepareDataSet.md) 来准备数据。 - - -#### 3.3 准备预测模型 - -(1)准备ONNX模型: - -可通过[ultralytics/yolov5](https://github.com/ultralytics/yolov5) 官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。 -``` -python export.py --weights yolov5s.pt --include onnx -``` - -(2) 转换模型: -``` -x2paddle --framework=onnx --model=yolov5s.onnx --save_dir=pd_model -cp -r pd_model/inference_model/ yolov5s_infer -``` -即可得到YOLOv5s模型的预测模型(`model.pdmodel` 和 `model.pdiparams`)。如想快速体验,可直接下载上方表格中YOLOv5s的[Paddle预测模型](https://bj.bcebos.com/v1/paddle-slim-models/detection/yolov5s_infer.tar)。 - - -预测模型的格式为:`model.pdmodel` 和 `model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。 - - -#### 3.4 自动压缩并产出模型 - -蒸馏量化自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中模型路径、蒸馏、量化、和训练等部分的参数,配置完成后便可对模型进行量化和蒸馏。具体运行命令为: - -- 单卡训练: -``` -export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/yolov5s_qat_dis.yaml --save_dir='./output/' -``` - -- 多卡训练: -``` -CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --log_dir=log --gpus 0,1,2,3 run.py \ - --config_path=./configs/yolov5s_qat_dis.yaml --save_dir='./output/' -``` - -#### 3.5 测试模型精度 - -使用eval.py脚本得到模型的mAP: -``` -export CUDA_VISIBLE_DEVICES=0 -python eval.py --config_path=./configs/yolov5s_qat_dis.yaml -``` - -**注意**:要测试的模型路径需要在配置文件中`model_dir`字段下进行修改指定。 - - -## 4.预测部署 - -- Paddle-TensorRT部署: -使用[paddle_trt_infer.py](./paddle_trt_infer.py)进行部署: -```shell -python paddle_trt_infer.py --model_path=output --image_file=images/000000570688.jpg --benchmark=True --run_mode=trt_int8 -``` - -## 5.FAQ diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml b/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml deleted file mode 100644 index cb87c3f8f..000000000 --- a/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml +++ /dev/null @@ -1,27 +0,0 @@ -metric: COCO -num_classes: 80 - -# Datset configuration -TrainDataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco/ - -EvalDataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco/ - -worker_num: 0 - -# preprocess reader in test -EvalReader: - sample_transforms: - - Decode: {} - - Resize: {target_size: [640, 640], keep_ratio: True} - - Pad: {size: [640, 640], fill_value: [114., 114., 114.]} - - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True} - - Permute: {} - batch_size: 1 diff --git a/example/auto_compression/pytorch_yolov5/run.py b/example/auto_compression/pytorch_yolov5/run.py deleted file mode 100644 index 130003c05..000000000 --- a/example/auto_compression/pytorch_yolov5/run.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -import argparse -import paddle -from ppdet.core.workspace import load_config, merge_config -from ppdet.core.workspace import create -from ppdet.metrics import COCOMetric, VOCMetric -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config -from paddleslim.auto_compression import AutoCompression - -from post_process import YOLOv5PostProcess - - -def argsparser(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - '--config_path', - type=str, - default=None, - help="path of compression strategy config.", - required=True) - parser.add_argument( - '--save_dir', - type=str, - default='output', - help="directory to save compressed model.") - parser.add_argument( - '--devices', - type=str, - default='gpu', - help="which device used to compress.") - parser.add_argument( - '--eval', type=bool, default=False, help="whether to run evaluation.") - - return parser - - -def print_arguments(args): - print('----------- Running Arguments -----------') - for arg, value in sorted(vars(args).items()): - print('%s: %s' % (arg, value)) - print('------------------------------------------') - - -def reader_wrapper(reader, input_list): - def gen(): - for data in reader: - in_dict = {} - if isinstance(input_list, list): - for input_name in input_list: - in_dict[input_name] = data[input_name] - elif isinstance(input_list, dict): - for input_name in input_list.keys(): - in_dict[input_list[input_name]] = data[input_name] - yield in_dict - - return gen - - -def convert_numpy_data(data, metric): - data_all = {} - data_all = {k: np.array(v) for k, v in data.items()} - if isinstance(metric, VOCMetric): - for k, v in data_all.items(): - if not isinstance(v[0], np.ndarray): - tmp_list = [] - for t in v: - tmp_list.append(np.array(t)) - data_all[k] = np.array(tmp_list) - else: - data_all = {k: np.array(v) for k, v in data.items()} - return data_all - - -def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): - metric = global_config['metric'] - for batch_id, data in enumerate(val_loader): - data_all = convert_numpy_data(data, metric) - data_input = {} - for k, v in data.items(): - if isinstance(global_config['input_list'], list): - if k in test_feed_names: - data_input[k] = np.array(v) - elif isinstance(global_config['input_list'], dict): - if k in global_config['input_list'].keys(): - data_input[global_config['input_list'][k]] = np.array(v) - outs = exe.run(compiled_test_program, - feed=data_input, - fetch_list=test_fetch_list, - return_numpy=False) - res = {} - if 'arch' in global_config and global_config['arch'] == 'YOLOv5': - postprocess = YOLOv5PostProcess( - score_threshold=0.001, nms_threshold=0.6, multi_label=True) - res = postprocess(np.array(outs[0]), data_all['scale_factor']) - else: - for out in outs: - v = np.array(out) - if len(v.shape) > 1: - res['bbox'] = v - else: - res['bbox_num'] = v - - metric.update(data_all, res) - if batch_id % 100 == 0: - print('Eval iter:', batch_id) - metric.accumulate() - metric.log() - map_res = metric.get_results() - metric.reset() - return map_res['bbox'][0] - - -def main(): - global global_config - all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" - global_config = all_config["Global"] - reader_cfg = load_config(global_config['reader_config']) - - train_loader = create('EvalReader')(reader_cfg['TrainDataset'], - reader_cfg['worker_num'], - return_list=True) - train_loader = reader_wrapper(train_loader, global_config['input_list']) - - if 'Evaluation' in global_config.keys() and global_config[ - 'Evaluation'] and paddle.distributed.get_rank() == 0: - eval_func = eval_function - dataset = reader_cfg['EvalDataset'] - global val_loader - _eval_batch_sampler = paddle.io.BatchSampler( - dataset, batch_size=reader_cfg['EvalReader']['batch_size']) - val_loader = create('EvalReader')(dataset, - reader_cfg['worker_num'], - batch_sampler=_eval_batch_sampler, - return_list=True) - metric = None - if reader_cfg['metric'] == 'COCO': - clsid2catid = {v: k for k, v in dataset.catid2clsid.items()} - anno_file = dataset.get_anno() - metric = COCOMetric( - anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox') - elif reader_cfg['metric'] == 'VOC': - metric = VOCMetric( - label_list=dataset.get_label_list(), - class_num=reader_cfg['num_classes'], - map_type=reader_cfg['map_type']) - else: - raise ValueError("metric currently only supports COCO and VOC.") - global_config['metric'] = metric - else: - eval_func = None - - ac = AutoCompression( - model_dir=global_config["model_dir"], - model_filename=global_config["model_filename"], - params_filename=global_config["params_filename"], - save_dir=FLAGS.save_dir, - config=all_config, - train_dataloader=train_loader, - eval_callback=eval_func) - ac.compress() - - -if __name__ == '__main__': - paddle.enable_static() - parser = argsparser() - FLAGS = parser.parse_args() - print_arguments(FLAGS) - - assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] - paddle.set_device(FLAGS.devices) - - main() diff --git a/example/auto_compression/semantic_segmentation/README.md b/example/auto_compression/semantic_segmentation/README.md index d67676277..a923ec352 100644 --- a/example/auto_compression/semantic_segmentation/README.md +++ b/example/auto_compression/semantic_segmentation/README.md @@ -8,32 +8,31 @@ - [3.2 准备数据集](#32-准备数据集) - [3.3 准备预测模型](#33-准备预测模型) - [3.4 自动压缩并产出模型](#34-自动压缩并产出模型) -- [4.预测部署](#4预测部署) +- [4.评估精度](#4评估精度) +- [5.预测部署](#5预测部署) - [5.FAQ](5FAQ) ## 1.简介 -本示例将以语义分割模型PP-HumanSeg-Lite为例,介绍如何使用PaddleSeg中Inference部署模型进行自动压缩。本示例使用的自动压缩策略为非结构化稀疏、蒸馏和量化、蒸馏。 +本示例将以语义分割模型[PP-HumanSeg-Lite](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/PP-HumanSeg#portrait-segmentation)为例,介绍如何使用PaddleSeg中Inference部署模型进行自动压缩。本示例使用的自动压缩策略为非结构化稀疏、蒸馏和量化、蒸馏。 ## 2.Benchmark -- [PP-HumanSeg-Lite](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/PP-HumanSeg#portrait-segmentation) - | 模型 | 策略 | Total IoU | ARM CPU耗时(ms)
thread=1 |Nvidia GPU耗时(ms)| 配置文件 | Inference模型 | |:-----:|:-----:|:----------:|:---------:| :------:|:------:|:------:| | PP-HumanSeg-Lite | Baseline | 92.87 | 56.363 |-| - | [model](https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224_with_softmax.tar.gz) | | PP-HumanSeg-Lite | 非结构化稀疏+蒸馏 | 92.35 | 37.712 |-| [config](./configs/pp_human/pp_human_sparse.yaml)| - | -| PP-HumanSeg-Lite | 量化+蒸馏 | 92.84 | 49.656 |-| [config](./configs/pp_human/pp_human_qat.yaml) | - | +| PP-HumanSeg-Lite | 量化+蒸馏 | 92.84 | 49.656 |-| [config](./configs/pp_human/pp_human_qat.yaml) | [model](https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/pp_humanseg_qat.zip) (非最佳) | | PP-Liteseg | Baseline | 77.04| - | 1.425| - |[model](https://paddleseg.bj.bcebos.com/tipc/easyedge/RES-paddle2-PPLIteSegSTDC1.zip)| -| PP-Liteseg | 量化训练 | 76.93 | - | 1.158|[config](./configs/pp_liteseg/pp_liteseg_qat.yaml) | - | +| PP-Liteseg | 量化训练 | 76.93 | - | 1.158|[config](./configs/pp_liteseg/pp_liteseg_qat.yaml) | [model](https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/pp-liteseg.zip) | | HRNet | Baseline | 78.97 | - |8.188|-| [model](https://paddleseg.bj.bcebos.com/tipc/easyedge/RES-paddle2-HRNetW18-Seg.zip)| -| HRNet | 量化训练 | 78.90 | - |5.812| [config](./configs/hrnet/hrnet_qat.yaml) | - | +| HRNet | 量化训练 | 78.90 | - |5.812| [config](./configs/hrnet/hrnet_qat.yaml) | [model](https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/hrnet.zip) | | UNet | Baseline | 65.00 | - |15.291|-| [model](https://paddleseg.bj.bcebos.com/tipc/easyedge/RES-paddle2-UNet.zip) | -| UNet | 量化训练 | 64.93 | - |10.228| [config](./configs/unet/unet_qat.yaml) | - | +| UNet | 量化训练 | 64.93 | - |10.228| [config](./configs/unet/unet_qat.yaml) | [model](https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/unet.zip) | | Deeplabv3-ResNet50 | Baseline | 79.90 | -|12.766| -| [model](https://paddleseg.bj.bcebos.com/tipc/easyedge/RES-paddle2-Deeplabv3-ResNet50.zip)| | Deeplabv3-ResNet50 | 量化训练 | 78.89 | - |8.839|[config](./configs/deeplabv3/deeplabv3_qat.yaml) | - | -- ARM CPU测试环境:`SDM710 2*A75(2.2GHz) 6*A55(1.7GHz)`; +- ARM CPU测试环境:`高通骁龙710处理器(SDM710 2*A75(2.2GHz) 6*A55(1.7GHz))`; - Nvidia GPU测试环境: @@ -41,16 +40,6 @@ - 软件:CUDA 11.0, cuDNN 8.0, TensorRT 8.0 - 测试配置:batch_size: 40, max_seq_len: 128 -- PP-HumanSeg-Lite数据集 - - - 数据集:AISegment + PP-HumanSeg14K + 内部自建数据集。其中 AISegment 是开源数据集,可从[链接](https://github.com/aisegmentcn/matting_human_datasets)处获取;PP-HumanSeg14K 是 PaddleSeg 自建数据集,可从[官方渠道](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/contrib/PP-HumanSeg/paper.md#pp-humanseg14k-a-large-scale-teleconferencing-video-dataset)获取;内部数据集不对外公开。 - - 示例数据集: 用于快速跑通人像分割的压缩和推理流程, 不能用该数据集复现 benckmark 表中的压缩效果。 [下载链接](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) - -- PP-Liteseg,HRNet,UNet,Deeplabv3-ResNet50数据集 - - - cityscapes: 请从[cityscapes官网](https://www.cityscapes-dataset.com/login/)下载完整数据 - - 示例数据集: cityscapes数据集的一个子集,用于快速跑通压缩和推理流程,不能用该数据集复现 benchmark 表中的压缩效果。[下载链接](https://bj.bcebos.com/v1/paddle-slim-models/data/mini_cityscapes/mini_cityscapes.tar) - 下面将以开源数据集为例介绍如何对PP-HumanSeg-Lite进行自动压缩。 ## 3. 自动压缩流程 @@ -58,7 +47,7 @@ #### 3.1 准备环境 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 +- PaddleSlim >= 2.3 - PaddleSeg >= 2.5 安装paddlepaddle: @@ -71,30 +60,46 @@ pip install paddlepaddle-gpu 安装paddleslim: ```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim +``` + +准备paddleslim示例代码: +```shell +git clone https://github.com/PaddlePaddle/PaddleSlim.git ``` 安装paddleseg ```shell -pip install paddleseg +pip install paddleseg==2.5.0 ``` -注:安装[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)的目的只是为了直接使用PaddleSeg中的Dataloader组件,不涉及模型组网等。 +注:安装[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)的目的只是为了直接使用PaddleSeg中的Dataloader组件,不涉及模型组网等。推荐安装PaddleSeg 2.5.0, 不同版本的PaddleSeg的Dataloader返回数据的格式略有不同. #### 3.2 准备数据集 开发者可下载开源数据集 (如[AISegment](https://github.com/aisegmentcn/matting_human_datasets)) 或自定义语义分割数据集。请参考[PaddleSeg数据准备文档](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/docs/data/marker/marker_cn.md)来检查、对齐数据格式即可。 -可以通过以下命令下载人像分割示例数据: +本示例使用示例开源数据集 AISegment 数据集为例介绍如何对PP-HumanSeg-Lite进行自动压缩。示例数据集仅用于快速跑通自动压缩流程,并不能复现出 benckmark 表中的压缩效果。 +可以通过以下命令下载人像分割示例数据: ```shell -cd ./data -python download_data.py mini_humanseg - +cd PaddleSlim/example/auto_compression/semantic_segmentation +python ./data/download_data.py mini_humanseg +### 下载后的数据位置为 ./data/humanseg/ ``` +**提示:** +- PP-HumanSeg-Lite压缩过程使用的数据集 + + - 数据集:AISegment + PP-HumanSeg14K + 内部自建数据集。其中 AISegment 是开源数据集,可从[链接](https://github.com/aisegmentcn/matting_human_datasets)处获取;PP-HumanSeg14K 是 PaddleSeg 自建数据集,可从[官方渠道](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/contrib/PP-HumanSeg/paper.md#pp-humanseg14k-a-large-scale-teleconferencing-video-dataset)获取;内部数据集不对外公开。 + - 示例数据集: 用于快速跑通人像分割的压缩和推理流程, 不能用该数据集复现 benckmark 表中的压缩效果。 [下载链接](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) + +- PP-Liteseg,HRNet,UNet,Deeplabv3-ResNet50数据集 + + - cityscapes: 请从[cityscapes官网](https://www.cityscapes-dataset.com/login/)下载完整数据 + - 示例数据集: cityscapes数据集的一个子集,用于快速跑通压缩和推理流程,不能用该数据集复现 benchmark 表中的压缩效果。[下载链接](https://bj.bcebos.com/v1/paddle-slim-models/data/mini_cityscapes/mini_cityscapes.tar) + #### 3.3 准备预测模型 预测模型的格式为:`model.pdmodel` 和 `model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。 @@ -112,86 +117,175 @@ tar -xzf ppseg_lite_portrait_398x224_with_softmax.tar.gz #### 3.4 自动压缩并产出模型 -自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。首先要配置config文件中模型路径、数据集路径、蒸馏、量化、稀疏化和训练等部分的参数,配置完成后便可对模型进行非结构化稀疏、蒸馏和量化、蒸馏。 +自动压缩示例通过run.py脚本启动,会使用接口 ```paddleslim.auto_compression.AutoCompression``` 对模型进行自动压缩。首先要配置config文件中模型路径、数据集路径、蒸馏、量化、稀疏化和训练等部分的参数,配置完成后便可对模型进行非结构化稀疏、蒸馏和量化、蒸馏。 -当只设置训练参数,并传入``deploy_hardware``字段时,将自动搜索压缩策略进行压缩。以骁龙710(SD710)为部署硬件,进行自动压缩的运行命令如下: +当只设置训练参数,并在config文件中 ```Global``` 配置中传入 ```deploy_hardware``` 字段时,将自动搜索压缩策略进行压缩。以骁龙710(SD710)为部署硬件,进行自动压缩的运行命令如下: ```shell # 单卡启动 export CUDA_VISIBLE_DEVICES=0 -python run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_auto.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' \ - --deploy_hardware='SD710' +python run.py --config_path='./configs/pp_humanseg/pp_humanseg_auto.yaml' --save_dir='./save_compressed_model' # 多卡启动 export CUDA_VISIBLE_DEVICES=0,1 -python -m paddle.distributed.launch run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_auto.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' \ - --deploy_hardware='SD710' +python -m paddle.distributed.launch run.py --config_path='./configs/pp_humanseg/pp_humanseg_auto.yaml' --save_dir='./save_compressed_model' ``` + - 自行配置稀疏参数进行非结构化稀疏和蒸馏训练,配置参数含义详见[自动压缩超参文档](https://github.com/PaddlePaddle/PaddleSlim/blob/27dafe1c722476f1b16879f7045e9215b6f37559/demo/auto_compression/hyperparameter_tutorial.md)。具体命令如下所示: ```shell # 单卡启动 export CUDA_VISIBLE_DEVICES=0 -python run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_sparse.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' +python run.py --config_path='./configs/pp_humanseg/pp_humanseg_sparse.yaml' --save_dir='./save_sparse_model' # 多卡启动 export CUDA_VISIBLE_DEVICES=0,1 -python -m paddle.distributed.launch run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_sparse.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' +python -m paddle.distributed.launch run.py --config_path='./configs/pp_humanseg/pp_humanseg_sparse.yaml' --save_dir='./save_sparse_model' ``` - 自行配置量化参数进行量化和蒸馏训练,配置参数含义详见[自动压缩超参文档](https://github.com/PaddlePaddle/PaddleSlim/blob/27dafe1c722476f1b16879f7045e9215b6f37559/demo/auto_compression/hyperparameter_tutorial.md)。具体命令如下所示: ```shell # 单卡启动 export CUDA_VISIBLE_DEVICES=0 -python run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_qat.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' +python run.py --config_path='./configs/pp_humanseg/pp_humanseg_qat.yaml' --save_dir='./save_quant_model' # 多卡启动 export CUDA_VISIBLE_DEVICES=0,1 -python -m paddle.distributed.launch run.py \ - --model_dir='./ppseg_lite_portrait_398x224_with_softmax' \ - --model_filename='model.pdmodel' \ - --params_filename='model.pdiparams' \ - --save_dir='./save_model' \ - --strategy_config='configs/pp_humanseg/pp_humanseg_qat.yaml' \ - --dataset_config='configs/dataset/humanseg_dataset.yaml' +python -m paddle.distributed.launch run.py --config_path='./configs/pp_humanseg/pp_humanseg_qat.yaml' --save_dir='./save_quant_model' ``` 压缩完成后会在`save_dir`中产出压缩好的预测模型,可直接预测部署。 -## 4.预测部署 +## 4.评估精度 + +本小节以人像分割模型和小数据集为例, 介绍如何在测试集上评估压缩后的模型. + +下载经过量化训练压缩后的推理模型: +``` +wget https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/pp_humanseg_qat.zip +unzip pp_humanseg_qat.zip +``` + +通过以下命令下载人像分割示例数据: + +```shell +cd ./data +python download_data.py mini_humanseg +cd - + +``` + +执行以下命令评估模型在测试集上的精度: + +``` +python eval.py \ +--model_dir ./pp_humanseg_qat \ +--model_filename model.pdmodel \ +--params_filename model.pdiparams \ +--dataset_config configs/dataset/humanseg_dataset.yaml + +``` + +## 5.预测部署 + +本小节以人像分割为例, 介绍如何使用Paddle Inference推理库执行压缩后的模型. + +### 5.1 安装推理库 + +请参考该链接安装Python版本的PaddleInference推理库: [推理库安装教程](https://www.paddlepaddle.org.cn/inference/user_guides/download_lib.html#python) + +### 5.2 准备模型和数据 + +从 [2.Benchmark](#2Benchmark) 的表格中获得压缩前后的推理模型的下载链接,执行以下命令下载并解压推理模型: + +下载Float32数值类型的模型: +``` +wget https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224_with_softmax.tar.gz +tar -xzf ppseg_lite_portrait_398x224_with_softmax.tar.gz +mv ppseg_lite_portrait_398x224_with_softmax pp_humanseg_fp32 +``` + +下载经过量化训练压缩后的推理模型: +``` +wget https://bj.bcebos.com/v1/paddle-slim-models/act/PaddleSeg/qat/pp_humanseg_qat.zip +unzip pp_humanseg_qat.zip +``` + +准备好需要处理的图片,这里直接使用人像示例图片 `./data/human_demo.jpg`。 + +### 5.3 执行推理 + +执行以下命令,直接使用飞桨框架的原生推理(仅支持Float32, 无需依赖TensorRT): + +``` +export CUDA_VISIBLE_DEVICES=0 +python infer.py \ +--image_file "./data/human_demo.jpg" \ +--model_path "./pp_humanseg_fp32/model.pdmodel" \ +--params_path "./pp_humanseg_fp32/model.pdiparams" \ +--save_file "./humanseg_result_fp32.png" \ +--dataset "human" \ +--benchmark True \ +--precision "fp32" +``` + +执行以下命令,使用Int8推理: + +``` +export CUDA_VISIBLE_DEVICES=0 +python infer.py \ +--image_file "./data/human_demo.jpg" \ +--model_path "./pp_humanseg_qat/model.pdmodel" \ +--params_path "./pp_humanseg_qat/model.pdiparams" \ +--save_file "./humanseg_result_qat.png" \ +--dataset "human" \ +--benchmark True \ +--use_trt True \ +--precision "int8" +``` + + + + + + + + + + + + + + + + + + +
+原始图片 + + +
+FP32推理结果 + + +
+Int8推理结果 + + +
+ +执行以下命令查看更多关于 `infer.py` 使用说明: + +``` +python infer.py --help +``` + + +### 5.4 更多部署教程 - [Paddle Inference Python部署](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/docs/deployment/inference/python_inference.md) - [Paddle Inference C++部署](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/docs/deployment/inference/cpp_inference.md) - [Paddle Lite部署](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/docs/deployment/lite/lite.md) -## 5.FAQ +## 6.FAQ diff --git a/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml b/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml index 0d718cc6a..035c28902 100644 --- a/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml @@ -1,5 +1,8 @@ Global: reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-Deeplabv3-ResNet50 + model_filename: model + params_filename: params Distillation: alpha: 1.0 diff --git a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml index b3d22929f..36d3fde4a 100644 --- a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml @@ -1,5 +1,8 @@ Global: reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-HRNetW18-Seg + model_filename: model + params_filename: params Distillation: alpha: 1.0 diff --git a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_sparse.yaml b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_sparse.yaml index 6091d8894..922589c3b 100644 --- a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_sparse.yaml +++ b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_sparse.yaml @@ -1,5 +1,8 @@ Global: reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-HRNetW18-Seg + model_filename: model + params_filename: params Distillation: alpha: 1.0 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_auto.yaml b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_auto.yaml index 8adde821a..d9f312517 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_auto.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_auto.yaml @@ -1,3 +1,10 @@ +Global: + reader_config: configs/dataset/humanseg_dataset.yaml + model_dir: ./ppseg_lite_portrait_398x224_with_softmax + model_filename: model.pdmodel + params_filename: model.pdiparams + deploy_hardware: SD710 + TrainConfig: epochs: 14 eval_iter: 400 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml index d11af1633..8a917f985 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml @@ -1,3 +1,9 @@ +Global: + reader_config: configs/dataset/humanseg_dataset.yaml + model_dir: ./ppseg_lite_portrait_398x224_with_softmax + model_filename: model.pdmodel + params_filename: model.pdiparams + Distillation: alpha: 1.0 loss: l2 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_sparse.yaml b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_sparse.yaml index 5e71f3967..1cb7adb13 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_sparse.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_sparse.yaml @@ -1,3 +1,9 @@ +Global: + reader_config: configs/dataset/humanseg_dataset.yaml + model_dir: ./ppseg_lite_portrait_398x224_with_softmax + model_filename: model.pdmodel + params_filename: model.pdiparams + Distillation: alpha: 1.0 loss: l2 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_auto.yaml b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_auto.yaml index 42ff6c5db..003078aa8 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_auto.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_auto.yaml @@ -1,3 +1,9 @@ +Global: + reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-PPLIteSegSTDC1 + model_filename: model + params_filename: params + TrainConfig: epochs: 14 eval_iter: 90 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml index 6bb5b42dc..67ee9d690 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml @@ -1,3 +1,9 @@ +Global: + reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-PPLIteSegSTDC1 + model_filename: model + params_filename: params + Distillation: alpha: 1.0 loss: l2 diff --git a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_sparse.yaml b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_sparse.yaml index f3a6d9589..52f256da8 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_sparse.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_sparse.yaml @@ -1,3 +1,9 @@ +Global: + reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-PPLIteSegSTDC1 + model_filename: model + params_filename: params + Distillation: alpha: 1.0 loss: l2 diff --git a/example/auto_compression/semantic_segmentation/configs/unet/unet_channel_prune.yaml b/example/auto_compression/semantic_segmentation/configs/unet/unet_channel_prune.yaml index 45716fa8f..920c3b4d7 100644 --- a/example/auto_compression/semantic_segmentation/configs/unet/unet_channel_prune.yaml +++ b/example/auto_compression/semantic_segmentation/configs/unet/unet_channel_prune.yaml @@ -1,5 +1,8 @@ Global: reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-UNet + model_filename: model + params_filename: params Distillation: alpha: 1.0 diff --git a/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml b/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml index 02009ebff..f686f41f1 100644 --- a/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml @@ -1,5 +1,8 @@ Global: reader_config: configs/dataset/cityscapes_1024x512_scale1.0.yml + model_dir: ./RES-paddle2-UNet + model_filename: model + params_filename: params Distillation: alpha: 1.0 diff --git a/example/auto_compression/semantic_segmentation/data/cityscape_demo.jpg b/example/auto_compression/semantic_segmentation/data/cityscape_demo.jpg new file mode 100644 index 000000000..de0b21d16 Binary files /dev/null and b/example/auto_compression/semantic_segmentation/data/cityscape_demo.jpg differ diff --git a/example/auto_compression/semantic_segmentation/data/human_demo.jpg b/example/auto_compression/semantic_segmentation/data/human_demo.jpg new file mode 100644 index 000000000..9b499ab55 Binary files /dev/null and b/example/auto_compression/semantic_segmentation/data/human_demo.jpg differ diff --git a/example/auto_compression/semantic_segmentation/eval.py b/example/auto_compression/semantic_segmentation/eval.py new file mode 100644 index 000000000..520620056 --- /dev/null +++ b/example/auto_compression/semantic_segmentation/eval.py @@ -0,0 +1,129 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import random +import paddle +import numpy as np +from tqdm import tqdm +from paddleseg.cvlibs import Config as PaddleSegDataConfig +from paddleseg.utils import worker_init_fn + +from paddleseg.core.infer import reverse_transform +from paddleseg.utils import metrics + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model evaluation') + parser.add_argument( + '--model_dir', + type=str, + default=None, + help="inference model directory.") + parser.add_argument( + '--model_filename', + type=str, + default=None, + help="inference model filename.") + parser.add_argument( + '--params_filename', + type=str, + default=None, + help="inference params filename.") + parser.add_argument( + '--dataset_config', + type=str, + default=None, + help="path of dataset config.") + return parser.parse_args() + + +def eval(args): + exe = paddle.static.Executor(paddle.CUDAPlace(0)) + inference_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + args.model_dir, + exe, + model_filename=args.model_filename, + params_filename=args.params_filename) + + data_cfg = PaddleSegDataConfig(args.dataset_config) + eval_dataset = data_cfg.val_dataset + + batch_sampler = paddle.io.BatchSampler( + eval_dataset, batch_size=1, shuffle=False, drop_last=False) + loader = paddle.io.DataLoader( + eval_dataset, + batch_sampler=batch_sampler, + num_workers=1, + return_list=True, ) + + total_iters = len(loader) + intersect_area_all = 0 + pred_area_all = 0 + label_area_all = 0 + + print("Start evaluating (total_samples: {}, total_iters: {})...".format( + len(eval_dataset), total_iters)) + + for (image, label) in tqdm(loader): + label = np.array(label).astype('int64') + ori_shape = np.array(label).shape[-2:] + image = np.array(image) + logits = exe.run(inference_program, + feed={feed_target_names[0]: image}, + fetch_list=fetch_targets, + return_numpy=True) + + paddle.disable_static() + logit = logits[0] + + logit = reverse_transform( + paddle.to_tensor(logit), + ori_shape, + eval_dataset.transforms.transforms, + mode='bilinear') + pred = paddle.to_tensor(logit) + if len( + pred.shape + ) == 4: # for humanseg model whose prediction is distribution but not class id + pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32') + + intersect_area, pred_area, label_area = metrics.calculate_area( + pred, + paddle.to_tensor(label), + eval_dataset.num_classes, + ignore_index=eval_dataset.ignore_index) + intersect_area_all = intersect_area_all + intersect_area + pred_area_all = pred_area_all + pred_area + label_area_all = label_area_all + label_area + + class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all, + label_area_all) + class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all) + kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all) + class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all, + label_area_all) + + infor = "[EVAL] #Images: {} mIoU: {:.4f} Acc: {:.4f} Kappa: {:.4f} Dice: {:.4f}".format( + len(eval_dataset), miou, acc, kappa, mdice) + print(infor) + + +if __name__ == '__main__': + rank_id = paddle.distributed.get_rank() + place = paddle.CUDAPlace(rank_id) + args = parse_args() + paddle.enable_static() + eval(args) diff --git a/example/auto_compression/semantic_segmentation/infer.py b/example/auto_compression/semantic_segmentation/infer.py new file mode 100644 index 000000000..5e556256b --- /dev/null +++ b/example/auto_compression/semantic_segmentation/infer.py @@ -0,0 +1,182 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import numpy as np +import argparse +import time +import PIL +from PIL import Image +import paddle +import paddleseg.transforms as T +from paddleseg.cvlibs import Config as PaddleSegDataConfig +from paddleseg.core.infer import reverse_transform +from paddleseg.utils import get_image_list +from paddleseg.utils.visualize import get_pseudo_color_map +from paddle.inference import create_predictor, PrecisionType +from paddle.inference import Config as PredictConfig + + +def _transforms(dataset): + transforms = [] + if dataset == "human": + transforms.append(T.PaddingByAspectRatio(aspect_ratio=1.77777778)) + transforms.append(T.Resize(target_size=[398, 224])) + transforms.append(T.Normalize()) + elif dataset == "cityscape": + transforms.append(T.Normalize()) + return transforms + return T.Compose(transforms) + + +def auto_tune_trt(args): + auto_tuned_shape_file = "./auto_tuning_shape" + pred_cfg = PredictConfig(args.model_path, args.params_path) + pred_cfg.enable_use_gpu(100, 0) + pred_cfg.collect_shape_range_info("./auto_tuning_shape") + predictor = create_predictor(pred_cfg) + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + transforms = _transforms(args.dataset) + transform = T.Compose(transforms) + img = cv2.imread(args.image_file).astype('float32') + data, _ = transform(img) + data = np.array(data)[np.newaxis, :] + input_handle.reshape(data.shape) + input_handle.copy_from_cpu(data) + predictor.run() + return auto_tuned_shape_file + + +def load_predictor(args): + pred_cfg = PredictConfig(args.model_path, args.params_path) + pred_cfg.disable_glog_info() + pred_cfg.enable_memory_optim() + pred_cfg.switch_ir_optim(True) + if args.device == "GPU": + pred_cfg.enable_use_gpu(100, 0) + + if args.use_trt: + # To collect the dynamic shapes of inputs for TensorRT engine + auto_tuned_shape_file = auto_tune_trt(args) + precision_map = { + "fp16": PrecisionType.Half, + "fp32": PrecisionType.Float32, + "int8": PrecisionType.Int8 + } + pred_cfg.enable_tensorrt_engine( + workspace_size=1 << 30, + max_batch_size=1, + min_subgraph_size=4, + precision_mode=precision_map[args.precision], + use_static=False, + use_calib_mode=False) + allow_build_at_runtime = True + pred_cfg.enable_tuned_tensorrt_dynamic_shape(auto_tuned_shape_file, + allow_build_at_runtime) + predictor = create_predictor(pred_cfg) + return predictor + + +def predict_image(args, predictor): + + transforms = _transforms(args.dataset) + transform = T.Compose(transforms) + + # Step1: Load image and preprocess + im = cv2.imread(args.image_file).astype('float32') + data, _ = transform(im) + data = np.array(data)[np.newaxis, :] + + # Step2: Inference + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + input_handle.reshape(data.shape) + input_handle.copy_from_cpu(data) + + warmup, repeats = 0, 1 + if args.benchmark: + warmup, repeats = 20, 100 + + for i in range(warmup): + predictor.run() + + start_time = time.time() + for i in range(repeats): + predictor.run() + results = output_handle.copy_to_cpu() + total_time = time.time() - start_time + avg_time = float(total_time) / repeats + print(f"Average inference time: \033[91m{round(avg_time*1000, 2)}ms\033[0m") + + # Step3: Post process + if args.dataset == "human": + results = reverse_transform( + paddle.to_tensor(results), im.shape, transforms, mode='bilinear') + results = np.argmax(results, axis=1) + result = get_pseudo_color_map(results[0]) + + # Step4: Save result to file + if args.save_file is not None: + result.save(args.save_file) + print(f"Saved result to \033[91m{args.save_file}\033[0m") + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument( + '--image_file', type=str, help="Image path to be processed.") + parser.add_argument( + '--save_file', type=str, help="The path to save the processed image.") + parser.add_argument( + '--model_path', type=str, help="Inference model filepath.") + parser.add_argument( + '--params_path', type=str, help="Inference parameters filepath.") + parser.add_argument( + '--dataset', + type=str, + default="human", + choices=["human", "cityscape"], + help="The type of given image which can be 'human' or 'cityscape'.") + parser.add_argument( + '--benchmark', + type=bool, + default=False, + help="Whether to run benchmark or not.") + parser.add_argument( + '--use_trt', + type=bool, + default=False, + help="Whether to use tensorrt engine or not.") + parser.add_argument( + '--device', + type=str, + default='GPU', + choices=["CPU", "GPU"], + help="Choose the device you want to run, it can be: CPU/GPU, default is GPU" + ) + parser.add_argument( + '--precision', + type=str, + default='fp32', + choices=["fp32", "fp16", "int8"], + help="The precision of inference. It can be 'fp32', 'fp16' or 'int8'. Default is 'fp16'." + ) + args = parser.parse_args() + predictor = load_predictor(args) + predict_image(args, predictor) diff --git a/example/auto_compression/semantic_segmentation/run.py b/example/auto_compression/semantic_segmentation/run.py index c974f5f67..a8a486c25 100644 --- a/example/auto_compression/semantic_segmentation/run.py +++ b/example/auto_compression/semantic_segmentation/run.py @@ -21,61 +21,38 @@ from paddleseg.utils import worker_init_fn from paddleslim.auto_compression import AutoCompression +from paddleslim.common import load_config as load_slim_config from paddleseg.core.infer import reverse_transform from paddleseg.utils import metrics -def parse_args(): - parser = argparse.ArgumentParser(description='Model training') +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--model_dir', + '--config_path', type=str, default=None, - help="inference model directory.") - parser.add_argument( - '--model_filename', - type=str, - default=None, - help="inference model filename.") - parser.add_argument( - '--params_filename', - type=str, - default=None, - help="inference params filename.") + help="path of compression strategy config.") parser.add_argument( '--save_dir', type=str, default=None, help="directory to save compressed model.") parser.add_argument( - '--strategy_config', - type=str, - default=None, - help="path of compression strategy config.") - parser.add_argument( - '--dataset_config', + '--devices', type=str, - default=None, - help="path of dataset config.") - parser.add_argument( - '--deploy_hardware', - type=str, - default=None, - help="The hardware you want to deploy.") - return parser.parse_args() + default='gpu', + help="which device used to compress.") + return parser def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): - - nranks = paddle.distributed.ParallelEnv().local_rank - if nranks > 1 and paddle.distributed.get_rank() != 0: - return batch_sampler = paddle.io.BatchSampler( eval_dataset, batch_size=1, shuffle=False, drop_last=False) loader = paddle.io.DataLoader( eval_dataset, batch_sampler=batch_sampler, - num_workers=1, + num_workers=0, return_list=True, ) total_iters = len(loader) @@ -145,12 +122,20 @@ def gen(): return gen -if __name__ == '__main__': +def main(args): + all_config = load_slim_config(args.config_path) + assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + config = all_config["Global"] - args = parse_args() - paddle.enable_static() + rank_id = paddle.distributed.get_rank() + if args.devices == 'gpu': + place = paddle.CUDAPlace(rank_id) + paddle.set_device('gpu') + else: + place = paddle.CPUPlace() + paddle.set_device('cpu') # step1: load dataset config and create dataloader - data_cfg = PaddleSegDataConfig(args.dataset_config) + data_cfg = PaddleSegDataConfig(config['reader_config']) train_dataset = data_cfg.train_dataset eval_dataset = data_cfg.val_dataset batch_sampler = paddle.io.DistributedBatchSampler( @@ -160,22 +145,33 @@ def gen(): drop_last=True) train_loader = paddle.io.DataLoader( train_dataset, + places=[place], batch_sampler=batch_sampler, - num_workers=2, + num_workers=0, return_list=True, worker_init_fn=worker_init_fn) train_dataloader = reader_wrapper(train_loader) + nranks = paddle.distributed.get_world_size() + rank_id = paddle.distributed.get_rank() + # step2: create and instance of AutoCompression ac = AutoCompression( - model_dir=args.model_dir, - model_filename=args.model_filename, - params_filename=args.params_filename, + model_dir=config['model_dir'], + model_filename=config['model_filename'], + params_filename=config['params_filename'], save_dir=args.save_dir, - config=args.strategy_config, + config=all_config, train_dataloader=train_dataloader, - eval_callback=eval_function, - deploy_hardware=args.deploy_hardware) + eval_callback=eval_function if nranks > 1 and rank_id != 0 else None, + deploy_hardware=config.get('deploy_hardware') or None) # step3: start the compression job ac.compress() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + args = parser.parse_args() + main(args) diff --git a/example/auto_compression/tensorflow_mobilenet/README.md b/example/auto_compression/tensorflow_mobilenet/README.md index 7a2dca214..0a41d6e96 100644 --- a/example/auto_compression/tensorflow_mobilenet/README.md +++ b/example/auto_compression/tensorflow_mobilenet/README.md @@ -32,7 +32,7 @@ #### 3.1 准备环境 - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim develop版本 +- PaddleSlim >= 2.3 - [X2Paddle](https://github.com/PaddlePaddle/X2Paddle) >= 1.3.6 - opencv-python @@ -46,8 +46,7 @@ pip install paddlepaddle-gpu (2)安装paddleslim: ```shell -https://github.com/PaddlePaddle/PaddleSlim.git -python setup.py install +pip install paddleslim ``` (3)安装TensorFlow: diff --git a/example/auto_compression/tensorflow_mobilenet/eval.py b/example/auto_compression/tensorflow_mobilenet/eval.py index 85e5fdaf5..bf0987e35 100644 --- a/example/auto_compression/tensorflow_mobilenet/eval.py +++ b/example/auto_compression/tensorflow_mobilenet/eval.py @@ -23,7 +23,7 @@ import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config def argsparser(): @@ -93,7 +93,8 @@ def eval(): def main(): global global_config all_config = load_slim_config(args.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] global data_dir data_dir = global_config['data_dir'] diff --git a/example/auto_compression/tensorflow_mobilenet/run.py b/example/auto_compression/tensorflow_mobilenet/run.py index 86345ec20..aefd2941f 100644 --- a/example/auto_compression/tensorflow_mobilenet/run.py +++ b/example/auto_compression/tensorflow_mobilenet/run.py @@ -23,7 +23,7 @@ import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression @@ -107,7 +107,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(args.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] global data_dir data_dir = global_config['data_dir'] diff --git a/example/post_training_quantization/analysis.md b/example/post_training_quantization/analysis.md new file mode 100644 index 000000000..ce5e7a75e --- /dev/null +++ b/example/post_training_quantization/analysis.md @@ -0,0 +1,49 @@ +# 量化分析工具详细教程 + +## 1. 量化分析工具功能 +1. 遍历模型所有层,依次量化该层,计算量化后精度。为所有只量化一层的模型精度排序,可视化不适合量化的层,以供量化时可选择性跳过不适合量化的层。 +2. 可视化量化效果最好和最差的层的权重和激活分布图,以供分析模型量化效果的原因。 +3. 【敬请期待】输入预期精度,直接产出符合预期精度的量化模型。 + +## 2. paddleslim.quant.AnalysisQuant 可传入参数解析 +```yaml +model_dir +model_filename: None +params_filename: None +eval_function: None +data_loader: None +save_dir: 'analysis_results' +checkpoint_name: 'analysis_checkpoint.pkl' +num_histogram_plots: 10 +ptq_config +``` +- model_dir: 必须传入的模型文件路径,可为文件夹名;若模型为ONNX类型,直接输入'.onnx'模型文件名称即可。 +- model_filename: 默认为None,若model_dir为文件夹名,则必须传入以'.pdmodel'结尾的模型名称,若model_dir为'.onnx'模型文件名称,则不需要传入。 +- params_filename: 默认为None,若model_dir为文件夹名,则必须传入以'.pdiparams'结尾的模型名称,若model_dir为'.onnx'模型文件名称,则不需要传入。 +- eval_function:目前不支持为None,需要传入自定义的验证函数。 +- data_loader:模型校准时使用的数据,DataLoader继承自`paddle.io.DataLoader`。可以直接使用模型套件中的DataLoader,或者根据[paddle.io.DataLoader](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/io/DataLoader_cn.html#dataloader)自定义所需要的DataLoader。 +- save_dir:分析后保存模型精度或pdf等文件的文件夹,默认为`analysis_results`。 +- checkpoint_name:由于模型可能存在大量层需要分析,因此分析过程中会中间保存结果,如果程序中断会自动加载已经分析好的结果,默认为`analysis_checkpoint.pkl`。 +- num_histogram_plots:需要可视化的直方分布图数量。可视化量化效果最好和最坏的该数量个权重和激活的分布图。默认为10。若不需要可视化直方图,设置为0即可。 +- ptq_config:可传入的离线量化中的参数,详细可参考[离线量化文档](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/quant/quant_post)。 + + + + +## 3. 量化分析工具产出内容 + +量化分析工具会默认会产出以下目录: +``` +analysis_results/ +├── analysis.txt +├── best_weight_hist_result.pdf +├── best_act_hist_result.pdf +├── worst_weight_hist_result.pdf +├── worst_act_hist_result.pdf +``` +- 所有只量化一层的模型精度排序,将默认保存在 `./analysis_results/analysis.txt` 中。 +- 通过设置参数`num_histogram_plots`,可选择绘出该数量个量化效果最好和最差层的weight和activation的直方分布图,将以PDF形式保存在 `./analysis_results` 文件夹下, 分别保存为 `best_weight_hist_result.pdf`,`best_act_hist_result.pdf`,`worst_weight_hist_result.pdf` 和 `worst_act_hist_result.pdf` 中以供对比分析。 + + +## 3. 根据分析结果执行离线量化 +执行完量化分析工具后,可根据 `analysis.txt` 中的精度排序,在量化中去掉效果较差的层,具体操作为:在调用 `paddleslim.quant.quant_post_static` 时加入参数 `skip_tensor_list`,将需要去掉的层传入即可。 diff --git a/example/post_training_quantization/detection/analysis.py b/example/post_training_quantization/detection/analysis.py new file mode 100644 index 000000000..ba65d7291 --- /dev/null +++ b/example/post_training_quantization/detection/analysis.py @@ -0,0 +1,179 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +from tqdm import tqdm +import paddle +from ppdet.core.workspace import load_config, merge_config +from ppdet.core.workspace import create +from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval +from keypoint_utils import keypoint_post_process +from post_process import PPYOLOEPostProcess +from paddleslim.quant.analysis import AnalysisQuant + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of analysis config.", + required=True) + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + return parser + + +def reader_wrapper(reader, input_list): + def gen(): + for data in reader: + in_dict = {} + if isinstance(input_list, list): + for input_name in input_list: + in_dict[input_name] = data[input_name] + elif isinstance(input_list, dict): + for input_name in input_list.keys(): + in_dict[input_list[input_name]] = data[input_name] + yield in_dict + + return gen + + +def convert_numpy_data(data, metric): + data_all = {} + data_all = {k: np.array(v) for k, v in data.items()} + if isinstance(metric, VOCMetric): + for k, v in data_all.items(): + if not isinstance(v[0], np.ndarray): + tmp_list = [] + for t in v: + tmp_list.append(np.array(t)) + data_all[k] = np.array(tmp_list) + else: + data_all = {k: np.array(v) for k, v in data.items()} + return data_all + + +def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for batch_id, data in enumerate(val_loader): + data_all = convert_numpy_data(data, metric) + data_input = {} + for k, v in data.items(): + if isinstance(config['input_list'], list): + if k in test_feed_names: + data_input[k] = np.array(v) + elif isinstance(config['input_list'], dict): + if k in config['input_list'].keys(): + data_input[config['input_list'][k]] = np.array(v) + outs = exe.run(compiled_test_program, + feed=data_input, + fetch_list=test_fetch_list, + return_numpy=False) + res = {} + if 'arch' in config and config['arch'] == 'keypoint': + res = keypoint_post_process(data, data_input, exe, + compiled_test_program, + test_fetch_list, outs) + if 'arch' in config and config['arch'] == 'PPYOLOE': + postprocess = PPYOLOEPostProcess( + score_threshold=0.01, nms_threshold=0.6) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + else: + for out in outs: + v = np.array(out) + if len(v.shape) > 1: + res['bbox'] = v + else: + res['bbox_num'] = v + + metric.update(data_all, res) + t.update() + + metric.accumulate() + metric.log() + map_res = metric.get_results() + metric.reset() + map_key = 'keypoint' if 'arch' in config and config[ + 'arch'] == 'keypoint' else 'bbox' + return map_res[map_key][0] + + +def main(): + + global config + config = load_config(FLAGS.config_path) + ptq_config = config['PTQ'] + + data_loader = create('EvalReader')(config['EvalDataset'], + config['worker_num'], + return_list=True) + data_loader = reader_wrapper(data_loader, config['input_list']) + + dataset = config['EvalDataset'] + global val_loader + _eval_batch_sampler = paddle.io.BatchSampler( + dataset, batch_size=config['EvalReader']['batch_size']) + val_loader = create('EvalReader')(dataset, + config['worker_num'], + batch_sampler=_eval_batch_sampler, + return_list=True) + global metric + if config['metric'] == 'COCO': + clsid2catid = {v: k for k, v in dataset.catid2clsid.items()} + anno_file = dataset.get_anno() + metric = COCOMetric( + anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox') + elif config['metric'] == 'VOC': + metric = VOCMetric( + label_list=dataset.get_label_list(), + class_num=config['num_classes'], + map_type=config['map_type']) + elif config['metric'] == 'KeyPointTopDownCOCOEval': + anno_file = dataset.get_anno() + metric = KeyPointTopDownCOCOEval(anno_file, + len(dataset), 17, 'output_eval') + else: + raise ValueError("metric currently only supports COCO and VOC.") + + analyzer = AnalysisQuant( + model_dir=config["model_dir"], + model_filename=config["model_filename"], + params_filename=config["params_filename"], + eval_function=eval_function, + data_loader=data_loader, + save_dir=config['save_dir'], + ptq_config=ptq_config) + analyzer.analysis() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/detection/configs/picodet_s_analysis.yaml b/example/post_training_quantization/detection/configs/picodet_s_analysis.yaml new file mode 100644 index 000000000..6c640795c --- /dev/null +++ b/example/post_training_quantization/detection/configs/picodet_s_analysis.yaml @@ -0,0 +1,47 @@ +input_list: ['image', 'scale_factor'] +model_dir: ./picodet_s_416_coco_lcnet/ +model_filename: model.pdmodel +params_filename: model.pdiparams +save_dir: ./analysis_results +metric: COCO +num_classes: 80 + +PTQ: + quantizable_op_type: ["conv2d", "depthwise_conv2d"] + weight_quantize_type: 'abs_max' + activation_quantize_type: 'moving_average_abs_max' + is_full_quantize: False + batch_size: 10 + batch_nums: 10 + +# Datset configuration +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: /dataset/coco/ + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: /dataset/coco/ + +eval_height: &eval_height 416 +eval_width: &eval_width 416 +eval_size: &eval_size [*eval_height, *eval_width] + +worker_num: 0 + +EvalReader: + inputs_def: + image_shape: [1, 3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_size: 32 + + + diff --git a/example/post_training_quantization/detection/configs/picodet_s_ptq.yaml b/example/post_training_quantization/detection/configs/picodet_s_ptq.yaml new file mode 100644 index 000000000..005c0d46c --- /dev/null +++ b/example/post_training_quantization/detection/configs/picodet_s_ptq.yaml @@ -0,0 +1,38 @@ +input_list: ['image', 'scale_factor'] +model_dir: ./picodet_s_416_coco_lcnet/ +model_filename: model.pdmodel +params_filename: model.pdiparams +skip_tensor_list: None + +metric: COCO +num_classes: 80 + +# Datset configuration +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: /dataset/coco/ + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: /dataset/coco/ + +eval_height: &eval_height 416 +eval_width: &eval_width 416 +eval_size: &eval_size [*eval_height, *eval_width] + +worker_num: 0 + +EvalReader: + inputs_def: + image_shape: [1, 3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_size: 32 + diff --git a/example/auto_compression/pytorch_yolov5/eval.py b/example/post_training_quantization/detection/eval.py similarity index 84% rename from example/auto_compression/pytorch_yolov5/eval.py rename to example/post_training_quantization/detection/eval.py index 8cc252cd2..fc0c09ae4 100644 --- a/example/auto_compression/pytorch_yolov5/eval.py +++ b/example/post_training_quantization/detection/eval.py @@ -19,10 +19,10 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create -from ppdet.metrics import COCOMetric, VOCMetric -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config - -from post_process import YOLOv5PostProcess +from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval +from paddleslim.common import load_config as load_slim_config +from keypoint_utils import keypoint_post_process +from post_process import PPYOLOEPostProcess def argsparser(): @@ -42,13 +42,6 @@ def argsparser(): return parser -def print_arguments(args): - print('----------- Running Arguments -----------') - for arg, value in sorted(vars(args).items()): - print('%s: %s' % (arg, value)) - print('------------------------------------------') - - def reader_wrapper(reader, input_list): def gen(): for data in reader: @@ -84,8 +77,8 @@ def eval(): place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() exe = paddle.static.Executor(place) - val_program, feed_target_names, fetch_targets = paddle.fluid.io.load_inference_model( - global_config["model_dir"], + val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + global_config["model_dir"].rstrip('/'), exe, model_filename=global_config["model_filename"], params_filename=global_config["params_filename"]) @@ -108,9 +101,12 @@ def eval(): fetch_list=fetch_targets, return_numpy=False) res = {} - if 'arch' in global_config and global_config['arch'] == 'YOLOv5': - postprocess = YOLOv5PostProcess( - score_threshold=0.001, nms_threshold=0.6, multi_label=True) + if 'arch' in global_config and global_config['arch'] == 'keypoint': + res = keypoint_post_process(data, data_input, exe, val_program, + fetch_targets, outs) + if 'arch' in global_config and global_config['arch'] == 'PPYOLOE': + postprocess = PPYOLOEPostProcess( + score_threshold=0.01, nms_threshold=0.6) res = postprocess(np.array(outs[0]), data_all['scale_factor']) else: for out in outs: @@ -149,6 +145,10 @@ def main(): label_list=dataset.get_label_list(), class_num=reader_cfg['num_classes'], map_type=reader_cfg['map_type']) + elif reader_cfg['metric'] == 'KeyPointTopDownCOCOEval': + anno_file = dataset.get_anno() + metric = KeyPointTopDownCOCOEval(anno_file, + len(dataset), 17, 'output_eval') else: raise ValueError("metric currently only supports COCO and VOC.") global_config['metric'] = metric @@ -160,8 +160,6 @@ def main(): paddle.enable_static() parser = argsparser() FLAGS = parser.parse_args() - print_arguments(FLAGS) - assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] paddle.set_device(FLAGS.devices) diff --git a/example/post_training_quantization/detection/keypoint_utils.py b/example/post_training_quantization/detection/keypoint_utils.py new file mode 100644 index 000000000..d17095f45 --- /dev/null +++ b/example/post_training_quantization/detection/keypoint_utils.py @@ -0,0 +1,307 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import cv2 +import copy +from paddleslim.common import get_logger + +logger = get_logger(__name__, level=logging.INFO) + +__all__ = ['keypoint_post_process'] + + +def flip_back(output_flipped, matched_parts): + assert output_flipped.ndim == 4,\ + 'output_flipped should be [batch_size, num_joints, height, width]' + + output_flipped = output_flipped[:, :, :, ::-1] + + for pair in matched_parts: + tmp = output_flipped[:, pair[0], :, :].copy() + output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] + output_flipped[:, pair[1], :, :] = tmp + + return output_flipped + + +def get_affine_transform(center, + input_size, + rot, + output_size, + shift=(0., 0.), + inv=False): + """Get the affine transform matrix, given the center/scale/rot/output_size. + Args: + center (np.ndarray[2, ]): Center of the bounding box (x, y). + input_size (np.ndarray[2, ]): Size of input feature (width, height). + rot (float): Rotation angle (degree). + output_size (np.ndarray[2, ]): Size of the destination heatmaps. + shift (0-100%): Shift translation ratio wrt the width/height. + Default (0., 0.). + inv (bool): Option to inverse the affine transform direction. + (inv=False: src->dst or inv=True: dst->src) + Returns: + np.ndarray: The transform matrix. + """ + assert len(center) == 2 + assert len(output_size) == 2 + assert len(shift) == 2 + + if not isinstance(input_size, (np.ndarray, list)): + input_size = np.array([input_size, input_size], dtype=np.float32) + scale_tmp = input_size + + shift = np.array(shift) + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = rotate_point([0., src_w * -0.5], rot_rad) + dst_dir = np.array([0., dst_w * -0.5]) + + src = np.zeros((3, 2), dtype=np.float32) + + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + src[2, :] = _get_3rd_point(src[0, :], src[1, :]) + + dst = np.zeros((3, 2), dtype=np.float32) + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir + dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def _get_3rd_point(a, b): + """To calculate the affine matrix, three pairs of points are required. This + function is used to get the 3rd point, given 2D points a & b. + The 3rd point is defined by rotating vector `a - b` by 90 degrees + anticlockwise, using b as the rotation center. + Args: + a (np.ndarray): point(x,y) + b (np.ndarray): point(x,y) + Returns: + np.ndarray: The 3rd point. + """ + assert len( + a) == 2, 'input of _get_3rd_point should be point with length of 2' + assert len( + b) == 2, 'input of _get_3rd_point should be point with length of 2' + direction = a - b + third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32) + + return third_pt + + +def rotate_point(pt, angle_rad): + """Rotate a point by an angle. + Args: + pt (list[float]): 2 dimensional point to be rotated + angle_rad (float): rotation angle by radian + Returns: + list[float]: Rotated point. + """ + assert len(pt) == 2 + sn, cs = np.sin(angle_rad), np.cos(angle_rad) + new_x = pt[0] * cs - pt[1] * sn + new_y = pt[0] * sn + pt[1] * cs + rotated_pt = [new_x, new_y] + + return rotated_pt + + +def affine_transform(pt, t): + new_pt = np.array([pt[0], pt[1], 1.]).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def transform_preds(coords, center, scale, output_size): + target_coords = np.zeros(coords.shape) + trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1) + for p in range(coords.shape[0]): + target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) + return target_coords + + +class HRNetPostProcess(object): + def __init__(self, use_dark=True): + self.use_dark = use_dark + + def get_max_preds(self, heatmaps): + '''get predictions from score maps + Args: + heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) + Returns: + preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords + maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints + ''' + assert isinstance(heatmaps, + np.ndarray), 'heatmaps should be numpy.ndarray' + assert heatmaps.ndim == 4, 'batch_images should be 4-ndim' + + batch_size = heatmaps.shape[0] + num_joints = heatmaps.shape[1] + width = heatmaps.shape[3] + heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1)) + idx = np.argmax(heatmaps_reshaped, 2) + maxvals = np.amax(heatmaps_reshaped, 2) + + maxvals = maxvals.reshape((batch_size, num_joints, 1)) + idx = idx.reshape((batch_size, num_joints, 1)) + + preds = np.tile(idx, (1, 1, 2)).astype(np.float32) + + preds[:, :, 0] = (preds[:, :, 0]) % width + preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) + + pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) + pred_mask = pred_mask.astype(np.float32) + + preds *= pred_mask + + return preds, maxvals + + def gaussian_blur(self, heatmap, kernel): + border = (kernel - 1) // 2 + batch_size = heatmap.shape[0] + num_joints = heatmap.shape[1] + height = heatmap.shape[2] + width = heatmap.shape[3] + for i in range(batch_size): + for j in range(num_joints): + origin_max = np.max(heatmap[i, j]) + dr = np.zeros((height + 2 * border, width + 2 * border)) + dr[border:-border, border:-border] = heatmap[i, j].copy() + dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) + heatmap[i, j] = dr[border:-border, border:-border].copy() + heatmap[i, j] *= origin_max / np.max(heatmap[i, j]) + return heatmap + + def dark_parse(self, hm, coord): + heatmap_height = hm.shape[0] + heatmap_width = hm.shape[1] + px = int(coord[0]) + py = int(coord[1]) + if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2: + dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1]) + dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px]) + dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2]) + dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \ + + hm[py-1][px-1]) + dyy = 0.25 * ( + hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px]) + derivative = np.matrix([[dx], [dy]]) + hessian = np.matrix([[dxx, dxy], [dxy, dyy]]) + if dxx * dyy - dxy**2 != 0: + hessianinv = hessian.I + offset = -hessianinv * derivative + offset = np.squeeze(np.array(offset.T), axis=0) + coord += offset + return coord + + def dark_postprocess(self, hm, coords, kernelsize): + ''' + DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + ''' + hm = self.gaussian_blur(hm, kernelsize) + hm = np.maximum(hm, 1e-10) + hm = np.log(hm) + for n in range(coords.shape[0]): + for p in range(coords.shape[1]): + coords[n, p] = self.dark_parse(hm[n][p], coords[n][p]) + return coords + + def get_final_preds(self, heatmaps, center, scale, kernelsize=3): + """ + The highest heatvalue location with a quarter offset in the + direction from the highest response to the second highest response. + Args: + heatmaps (numpy.ndarray): The predicted heatmaps + center (numpy.ndarray): The boxes center + scale (numpy.ndarray): The scale factor + Returns: + preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords + maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints + """ + coords, maxvals = self.get_max_preds(heatmaps) + + heatmap_height = heatmaps.shape[2] + heatmap_width = heatmaps.shape[3] + + if self.use_dark: + coords = self.dark_postprocess(heatmaps, coords, kernelsize) + else: + for n in range(coords.shape[0]): + for p in range(coords.shape[1]): + hm = heatmaps[n][p] + px = int(math.floor(coords[n][p][0] + 0.5)) + py = int(math.floor(coords[n][p][1] + 0.5)) + if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: + diff = np.array([ + hm[py][px + 1] - hm[py][px - 1], + hm[py + 1][px] - hm[py - 1][px] + ]) + coords[n][p] += np.sign(diff) * .25 + preds = coords.copy() + + # Transform back + for i in range(coords.shape[0]): + preds[i] = transform_preds(coords[i], center[i], scale[i], + [heatmap_width, heatmap_height]) + + return preds, maxvals + + def __call__(self, output, center, scale): + preds, maxvals = self.get_final_preds(np.array(output), center, scale) + outputs = [[ + np.concatenate( + (preds, maxvals), axis=-1), np.mean( + maxvals, axis=1) + ]] + return outputs + + +def keypoint_post_process(data, data_input, exe, val_program, fetch_targets, + outs): + data_input['image'] = np.flip(data_input['image'], [3]) + output_flipped = exe.run(val_program, + feed=data_input, + fetch_list=fetch_targets, + return_numpy=False) + + output_flipped = np.array(output_flipped[0]) + flip_perm = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], + [15, 16]] + output_flipped = flip_back(output_flipped, flip_perm) + output_flipped[:, :, :, 1:] = copy.copy(output_flipped)[:, :, :, 0:-1] + hrnet_outputs = (np.array(outs[0]) + output_flipped) * 0.5 + imshape = ( + np.array(data['im_shape']))[:, ::-1] if 'im_shape' in data else None + center = np.array(data['center']) if 'center' in data else np.round( + imshape / 2.) + scale = np.array(data['scale']) if 'scale' in data else imshape / 200. + post_process = HRNetPostProcess() + outputs = post_process(hrnet_outputs, center, scale) + return {'keypoint': outputs} diff --git a/example/post_training_quantization/detection/post_process.py b/example/post_training_quantization/detection/post_process.py new file mode 100644 index 000000000..eea2f0195 --- /dev/null +++ b/example/post_training_quantization/detection/post_process.py @@ -0,0 +1,157 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cv2 + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + """ + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + top_k: keep top_k results. If k <= 0, keep all the results. + candidate_size: only consider the candidates with the highest scores. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + indexes = np.argsort(scores) + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of( + rest_boxes, + np.expand_dims( + current_box, axis=0), ) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +def iou_of(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def area_of(left_top, right_bottom): + """Compute the areas of rectangles given two corners. + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + Returns: + area (N): return the area. + """ + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +class PPYOLOEPostProcess(object): + """ + Args: + input_shape (int): network input image size + scale_factor (float): scale factor of ori image + """ + + def __init__(self, + score_threshold=0.4, + nms_threshold=0.5, + nms_top_k=10000, + keep_top_k=300): + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + + def _non_max_suppression(self, prediction, scale_factor): + batch_size = prediction.shape[0] + out_boxes_list = [] + box_num_list = [] + for batch_id in range(batch_size): + bboxes, confidences = prediction[batch_id][..., :4], prediction[ + batch_id][..., 4:] + # nms + picked_box_probs = [] + picked_labels = [] + for class_index in range(0, confidences.shape[1]): + probs = confidences[:, class_index] + mask = probs > self.score_threshold + probs = probs[mask] + if probs.shape[0] == 0: + continue + subset_boxes = bboxes[mask, :] + box_probs = np.concatenate( + [subset_boxes, probs.reshape(-1, 1)], axis=1) + box_probs = hard_nms( + box_probs, + iou_threshold=self.nms_threshold, + top_k=self.nms_top_k) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.shape[0]) + + if len(picked_box_probs) == 0: + out_boxes_list.append(np.empty((0, 4))) + + else: + picked_box_probs = np.concatenate(picked_box_probs) + # resize output boxes + picked_box_probs[:, 0] /= scale_factor[batch_id][1] + picked_box_probs[:, 2] /= scale_factor[batch_id][1] + picked_box_probs[:, 1] /= scale_factor[batch_id][0] + picked_box_probs[:, 3] /= scale_factor[batch_id][0] + + # clas score box + out_box = np.concatenate( + [ + np.expand_dims( + np.array(picked_labels), axis=-1), np.expand_dims( + picked_box_probs[:, 4], axis=-1), + picked_box_probs[:, :4] + ], + axis=1) + if out_box.shape[0] > self.keep_top_k: + out_box = out_box[out_box[:, 1].argsort()[::-1] + [:self.keep_top_k]] + out_boxes_list.append(out_box) + box_num_list.append(out_box.shape[0]) + + out_boxes_list = np.concatenate(out_boxes_list, axis=0) + box_num_list = np.array(box_num_list) + return out_boxes_list, box_num_list + + def __call__(self, outs, scale_factor): + out_boxes_list, box_num_list = self._non_max_suppression(outs, + scale_factor) + return {'bbox': out_boxes_list, 'bbox_num': box_num_list} diff --git a/example/post_training_quantization/detection/post_quant.py b/example/post_training_quantization/detection/post_quant.py new file mode 100644 index 000000000..a0c010364 --- /dev/null +++ b/example/post_training_quantization/detection/post_quant.py @@ -0,0 +1,101 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +from ppdet.core.workspace import load_config, merge_config +from ppdet.core.workspace import create +from paddleslim.quant import quant_post_static + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='ptq_out', + help="directory to save compressed model.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + parser.add_argument( + '--algo', type=str, default='KL', help="post quant algo.") + + return parser + + +def reader_wrapper(reader, input_list): + def gen(): + for data in reader: + in_dict = {} + if isinstance(input_list, list): + for input_name in input_list: + in_dict[input_name] = data[input_name] + elif isinstance(input_list, dict): + for input_name in input_list.keys(): + in_dict[input_list[input_name]] = data[input_name] + yield in_dict + + return gen + + +def main(): + global config + config = load_config(FLAGS.config_path) + + train_loader = create('EvalReader')(config['TrainDataset'], + config['worker_num'], + return_list=True) + train_loader = reader_wrapper(train_loader, config['input_list']) + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + quant_post_static( + executor=exe, + model_dir=config["model_dir"], + quantize_model_path=FLAGS.save_dir, + data_loader=train_loader, + model_filename=config["model_filename"], + params_filename=config["params_filename"], + batch_size=4, + batch_nums=64, + algo=FLAGS.algo, + hist_percent=0.999, + is_full_quantize=False, + bias_correction=False, + onnx_format=False, + skip_tensor_list=config['skip_tensor_list'] + if 'skip_tensor_list' in config else None) + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/pytorch_yolo_series/README.md b/example/post_training_quantization/pytorch_yolo_series/README.md new file mode 100644 index 000000000..1df18c4a6 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/README.md @@ -0,0 +1,150 @@ +# YOLO系列离线量化示例 + +目录: +- [1.简介](#1简介) +- [2.Benchmark](#2Benchmark) +- [3.离线量化流程](#离线量化流程) + - [3.1 准备环境](#31-准备环境) + - [3.2 准备数据集](#32-准备数据集) + - [3.3 准备预测模型](#33-准备预测模型) + - [3.4 离线量化并产出模型](#34-离线量化并产出模型) + - [3.5 测试模型精度](#35-测试模型精度) + - [3.6 提高离线量化精度](#36-提高离线量化精度) +- [4.预测部署](#4预测部署) +- [5.FAQ](5FAQ) + + +本示例将以[ultralytics/yolov5](https://github.com/ultralytics/yolov5),[meituan/YOLOv6](https://github.com/meituan/YOLOv6) 和 [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) YOLO系列目标检测模型为例,将PyTorch框架产出的推理模型转换为Paddle推理模型,使用离线量化功能进行压缩,并使用敏感度分析功能提升离线量化精度。离线量化产出的模型可以用PaddleInference部署,也可以导出为ONNX格式模型文件,并用TensorRT部署。 + + +## 2.Benchmark +| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 | +| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: | +| YOLOv5s | Base模型 | 640*640 | 37.4 | 5.95ms | 2.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | +| YOLOv5s | KL离线量化 | 640*640 | 36.0 | - | - | 1.87ms | - | - | +| | | | | | | | | | +| YOLOv6s | Base模型 | 640*640 | 42.4 | 9.06ms | 2.90ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | +| YOLOv6s | KL离线量化(量化分析前) | 640*640 | 30.3 | - | - | 1.83ms | - | - | +| YOLOv6s | KL离线量化(量化分析后) | 640*640 | 39.7 | - | - | - | - | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_analyzed_ptq.tar) | +| | | | | | | | | | +| YOLOv7 | Base模型 | 640*640 | 51.1 | 26.84ms | 7.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | +| YOLOv7 | KL离线量化 | 640*640 | 50.2 | - | - | 4.55ms | - | - | + +说明: +- mAP的指标均在COCO val2017数据集中评测得到。 + +## 3. 离线量化流程 + +#### 3.1 准备环境 +- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) +- PaddleSlim > 2.3版本 +- opencv-python + +(1)安装paddlepaddle: +```shell +# CPU +pip install paddlepaddle +# GPU +pip install paddlepaddle-gpu +``` + +(2)安装paddleslim: +```shell +pip install paddleslim +``` + +#### 3.2 准备数据集 +本示例默认以COCO数据进行自动压缩实验,可以从 [MS COCO官网](https://cocodataset.org) 下载 [Train](http://images.cocodataset.org/zips/train2017.zip)、[Val](http://images.cocodataset.org/zips/val2017.zip)、[annotation](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)。 + +目录格式如下: +``` +dataset/coco/ +├── annotations +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ | ... +├── train2017 +│ ├── 000000000009.jpg +│ ├── 000000580008.jpg +│ | ... +├── val2017 +│ ├── 000000000139.jpg +│ ├── 000000000285.jpg +``` + +#### 3.3 准备预测模型 +(1)准备ONNX模型: + +- YOLOv5:可通过[ultralytics/yolov5](https://github.com/ultralytics/yolov5) 官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型,也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。 + +- YOLOv6:可通过[WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)的导出脚本来准备ONNX模型,也可以直接下载我们已经准备好的[yolov7.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx)。 + +- YOLOv7:可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型,也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。 + + +#### 3.4 离线量化并产出模型 +离线量化示例通过post_quant.py脚本启动,会使用接口```paddleslim.quant.quant_post_static```对模型进行量化。配置config文件中模型路径、数据路径和量化相关的参数,配置完成后便可对模型进行离线量化。具体运行命令为: +- YOLOv5 + +```shell +python post_quant.py --config_path=./configs/yolov5s_ptq.yaml --save_dir=./yolov5s_ptq_out +``` + +- YOLOv6 + +```shell +python post_quant.py --config_path=./configs/yolov6s_ptq.yaml --save_dir=./yolov6s_ptq_out +``` + +- YOLOv7 + +```shell +python post_quant.py --config_path=./configs/yolov7s_ptq.yaml --save_dir=./yolov7s_ptq_out +``` + + +#### 3.5 测试模型精度 + +修改 [yolov5s_ptq.yaml](./configs/yolov5s_ptq.yaml) 中`model_dir`字段为模型存储路径,然后使用eval.py脚本得到模型的mAP: + +```shell +export CUDA_VISIBLE_DEVICES=0 +python eval.py --config_path=./configs/yolov5s_ptq.yaml +``` + + +#### 3.6 提高离线量化精度 +本节介绍如何使用量化分析工具提升离线量化精度。离线量化功能仅需使用少量数据,且使用简单、能快速得到量化模型,但往往会造成较大的精度损失。PaddleSlim提供量化分析工具,会使用接口```paddleslim.quant.AnalysisQuant```,可视化展示出不适合量化的层,通过跳过这些层,提高离线量化模型精度。 + +由于YOLOv6离线量化效果较差,以YOLOv6为例,量化分析工具具体使用方法如下: + +```shell +python analysis.py --config_path=./configs/yolov6s_analysis.yaml +``` + +如下图,经过量化分析之后,可以发现`conv2d_2.w_0`, `conv2d_11.w_0`,`conv2d_15.w_0`, `conv2d_46.w_0`, `conv2d_49.w_0` 这些层会导致较大的精度损失。 + +

+
+

+ + + +对比权重直方分布图后,可以发现量化损失较小的层数值分布相对平稳,数值处于-0.25到0.25之间,而量化损失较大的层数值分布非常极端,绝大部分值趋近于0,且数值处于-0.1到0.1之间,尽管看上去都是正太分布,但大量值为0是不利于量化统计scale值的。 + +

+
+

+ + +经此分析,在进行离线量化时,可以跳过这些导致精度下降较多的层,可使用 [yolov6s_analyzed_ptq.yaml](./configs/yolov6s_analyzed_ptq.yaml),然后再次进行离线量化。跳过这些层后,离线量化精度上升9.4个点。 + +```shell +python post_quant.py --config_path=./configs/yolov6s_analyzed_ptq.yaml --save_dir=./yolov6s_analyzed_ptq_out +``` + +## 4.预测部署 + + +## 5.FAQ +- 如果想对模型进行自动压缩,可进入[YOLO系列模型自动压缩示例](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/auto_compression/pytorch_yolo_series)中进行实验。 diff --git a/example/post_training_quantization/pytorch_yolo_series/analysis.py b/example/post_training_quantization/pytorch_yolo_series/analysis.py new file mode 100644 index 000000000..1c8cbcb5b --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/analysis.py @@ -0,0 +1,115 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +from tqdm import tqdm +from post_process import YOLOv6PostProcess, coco_metric +from dataset import COCOValDataset, COCOTrainDataset +from paddleslim.common import load_config, load_onnx_model +from paddleslim.quant.analysis import AnalysisQuant + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of analysis config.", + required=True) + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + return parser + + +def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(compiled_test_program, + feed={test_feed_names[0]: data_all['image']}, + fetch_list=test_fetch_list, + return_numpy=False) + res = {} + postprocess = YOLOv6PostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + map_res = coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + return map_res[0] + + +def main(): + + global config + config = load_config(FLAGS.config_path) + ptq_config = config['PTQ'] + + input_name = 'x2paddle_image_arrays' if config[ + 'arch'] == 'YOLOv6' else 'x2paddle_images' + dataset = COCOTrainDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path'], + input_name=input_name) + data_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0) + + global val_loader + dataset = COCOValDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=0) + + load_onnx_model(config["model_dir"]) + inference_model_path = config["model_dir"].rstrip().rstrip( + '.onnx') + '_infer' + analyzer = AnalysisQuant( + model_dir=inference_model_path, + model_filename='model.pdmodel', + params_filename='model.pdiparams', + eval_function=eval_function, + data_loader=data_loader, + save_dir=config['save_dir'], + ptq_config=ptq_config) + analyzer.analysis() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml new file mode 100644 index 000000000..eb9f792b1 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml @@ -0,0 +1,8 @@ +arch: YOLOv5 +model_dir: ./yolov5s.onnx +dataset_dir: dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensors: None # you can set it after analysis diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml new file mode 100644 index 000000000..a99198a44 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml @@ -0,0 +1,15 @@ +arch: YOLOv6 +model_dir: ./yolov6s.onnx +save_dir: ./analysis_results +dataset_dir: /dataset/coco/ +val_image_dir: val2017 +val_anno_path: annotations/instances_val2017.json + +PTQ: + quantizable_op_type: ["conv2d", "depthwise_conv2d"] + weight_quantize_type: 'abs_max' + activation_quantize_type: 'moving_average_abs_max' + is_full_quantize: False + batch_size: 10 + batch_nums: 10 + diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml new file mode 100644 index 000000000..fa9585d32 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml @@ -0,0 +1,8 @@ +arch: YOLOv6 +model_dir: ./yolov6s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensor_list: ['conv2d_2.w_0', 'conv2d_15.w_0', 'conv2d_46.w_0', 'conv2d_11.w_0', 'conv2d_49.w_0'] diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml new file mode 100644 index 000000000..9ec6b6a68 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml @@ -0,0 +1,8 @@ +arch: YOLOv6 +model_dir: ./yolov6s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensor_list: None diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml new file mode 100644 index 000000000..4d110b4e2 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml @@ -0,0 +1,7 @@ +arch: YOLOv7 +model_dir: ./yolov7s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json diff --git a/example/post_training_quantization/pytorch_yolo_series/dataset.py b/example/post_training_quantization/pytorch_yolo_series/dataset.py new file mode 100644 index 000000000..326c5ecf6 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/dataset.py @@ -0,0 +1,115 @@ +from pycocotools.coco import COCO +import cv2 +import os +import numpy as np +import paddle + + +class COCOValDataset(paddle.io.Dataset): + def __init__(self, + dataset_dir=None, + image_dir=None, + anno_path=None, + img_size=[640, 640], + input_name='x2paddle_images'): + self.dataset_dir = dataset_dir + self.image_dir = image_dir + self.img_size = img_size + self.input_name = input_name + self.ann_file = os.path.join(dataset_dir, anno_path) + self.coco = COCO(self.ann_file) + ori_ids = list(sorted(self.coco.imgs.keys())) + # check gt bbox + clean_ids = [] + for idx in ori_ids: + ins_anno_ids = self.coco.getAnnIds(imgIds=[idx], iscrowd=False) + instances = self.coco.loadAnns(ins_anno_ids) + num_bbox = 0 + for inst in instances: + if inst.get('ignore', False): + continue + if 'bbox' not in inst.keys(): + continue + elif not any(np.array(inst['bbox'])): + continue + else: + num_bbox += 1 + if num_bbox > 0: + clean_ids.append(idx) + self.ids = clean_ids + + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return { + 'image': img, + 'im_id': np.array([img_id]), + 'scale_factor': scale_factor + } + + def __len__(self): + return len(self.ids) + + def _get_img_data_from_img_id(self, img_id): + img_info = self.coco.loadImgs(img_id)[0] + img_path = os.path.join(self.dataset_dir, self.image_dir, + img_info['file_name']) + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img + + def _generate_scale(self, im, target_shape, keep_ratio=True): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + if keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(target_shape) + target_size_max = np.max(target_shape) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = target_shape + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + def image_preprocess(self, img, target_shape): + # Resize image + im_scale_y, im_scale_x = self._generate_scale(img, target_shape) + img = cv2.resize( + img, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + # Pad + im_h, im_w = img.shape[:2] + h, w = target_shape[:] + if h != im_h or w != im_w: + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32) + canvas[0:im_h, 0:im_w, :] = img.astype(np.float32) + img = canvas + img = np.transpose(img / 255, [2, 0, 1]) + scale_factor = np.array([im_scale_y, im_scale_x]) + return img.astype(np.float32), scale_factor + + +class COCOTrainDataset(COCOValDataset): + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return {self.input_name: img} \ No newline at end of file diff --git a/example/post_training_quantization/pytorch_yolo_series/eval.py b/example/post_training_quantization/pytorch_yolo_series/eval.py new file mode 100644 index 000000000..e105bb788 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/eval.py @@ -0,0 +1,101 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +from tqdm import tqdm +import paddle +from paddleslim.common import load_config as load_slim_config +from paddleslim.common import load_inference_model +from post_process import YOLOPostProcess, coco_metric +from dataset import COCOValDataset + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--batch_size', type=int, default=1, help="Batch size of model input.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + + return parser + + +def eval(): + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + + val_program, feed_target_names, fetch_targets = load_inference_model( + config["model_dir"], exe, "model.pdmodel", "model.pdiparams") + + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(val_program, + feed={feed_target_names[0]: data_all['image']}, + fetch_list=fetch_targets, + return_numpy=False) + postprocess = YOLOPostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + + coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + + +def main(): + global config + config = load_slim_config(FLAGS.config_path) + + global val_loader + dataset = COCOValDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader( + dataset, batch_size=FLAGS.batch_size, drop_last=True) + + eval() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/pytorch_yolo_series/images/hist_compare.png b/example/post_training_quantization/pytorch_yolo_series/images/hist_compare.png new file mode 100644 index 000000000..e895bd261 Binary files /dev/null and b/example/post_training_quantization/pytorch_yolo_series/images/hist_compare.png differ diff --git a/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png new file mode 100644 index 000000000..1eab297a1 Binary files /dev/null and b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png differ diff --git a/example/post_training_quantization/pytorch_yolo_series/post_process.py b/example/post_training_quantization/pytorch_yolo_series/post_process.py new file mode 100644 index 000000000..5988258b8 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/post_process.py @@ -0,0 +1,231 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cv2 +import json +import sys + + +def box_area(boxes): + """ + Args: + boxes(np.ndarray): [N, 4] + return: [N] + """ + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +def box_iou(box1, box2): + """ + Args: + box1(np.ndarray): [N, 4] + box2(np.ndarray): [M, 4] + return: [N, M] + """ + area1 = box_area(box1) + area2 = box_area(box2) + lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2]) + rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:]) + wh = rb - lt + wh = np.maximum(0, wh) + inter = wh[:, :, 0] * wh[:, :, 1] + iou = inter / (area1[:, np.newaxis] + area2 - inter) + return iou + + +def nms(boxes, scores, iou_threshold): + """ + Non Max Suppression numpy implementation. + args: + boxes(np.ndarray): [N, 4] + scores(np.ndarray): [N, 1] + iou_threshold(float): Threshold of IoU. + """ + idxs = scores.argsort() + keep = [] + while idxs.size > 0: + max_score_index = idxs[-1] + max_score_box = boxes[max_score_index][None, :] + keep.append(max_score_index) + if idxs.size == 1: + break + idxs = idxs[:-1] + other_boxes = boxes[idxs] + ious = box_iou(max_score_box, other_boxes) + idxs = idxs[ious[0] <= iou_threshold] + + keep = np.array(keep) + return keep + + +class YOLOPostProcess(object): + """ + Post process of YOLO serise network. + args: + score_threshold(float): Threshold to filter out bounding boxes with low + confidence score. If not provided, consider all boxes. + nms_threshold(float): The threshold to be used in NMS. + multi_label(bool): Whether keep multi label in boxes. + keep_top_k(int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + """ + + def __init__(self, + score_threshold=0.25, + nms_threshold=0.5, + multi_label=False, + keep_top_k=300): + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + self.multi_label = multi_label + self.keep_top_k = keep_top_k + + def _xywh2xyxy(self, x): + # Convert from [x, y, w, h] to [x1, y1, x2, y2] + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + def _non_max_suppression(self, prediction): + max_wh = 4096 # (pixels) minimum and maximum box width and height + nms_top_k = 30000 + + cand_boxes = prediction[..., 4] > self.score_threshold # candidates + output = [np.zeros((0, 6))] * prediction.shape[0] + + for batch_id, boxes in enumerate(prediction): + # Apply constraints + boxes = boxes[cand_boxes[batch_id]] + if not boxes.shape[0]: + continue + # Compute conf (conf = obj_conf * cls_conf) + boxes[:, 5:] *= boxes[:, 4:5] + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + convert_box = self._xywh2xyxy(boxes[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if self.multi_label: + i, j = (boxes[:, 5:] > self.score_threshold).nonzero() + boxes = np.concatenate( + (convert_box[i], boxes[i, j + 5, None], + j[:, None].astype(np.float32)), + axis=1) + else: + conf = np.max(boxes[:, 5:], axis=1) + j = np.argmax(boxes[:, 5:], axis=1) + re = np.array(conf.reshape(-1) > self.score_threshold) + conf = conf.reshape(-1, 1) + j = j.reshape(-1, 1) + boxes = np.concatenate((convert_box, conf, j), axis=1)[re] + + num_box = boxes.shape[0] + if not num_box: + continue + elif num_box > nms_top_k: + boxes = boxes[boxes[:, 4].argsort()[::-1][:nms_top_k]] + + # Batched NMS + c = boxes[:, 5:6] * max_wh + clean_boxes, scores = boxes[:, :4] + c, boxes[:, 4] + keep = nms(clean_boxes, scores, self.nms_threshold) + # limit detection box num + if keep.shape[0] > self.keep_top_k: + keep = keep[:self.keep_top_k] + output[batch_id] = boxes[keep] + return output + + def __call__(self, outs, scale_factor): + preds = self._non_max_suppression(outs) + bboxs, box_nums = [], [] + for i, pred in enumerate(preds): + if len(pred.shape) > 2: + pred = np.squeeze(pred) + if len(pred.shape) == 1: + pred = pred[np.newaxis, :] + pred_bboxes = pred[:, :4] + scale = np.tile(scale_factor[i][::-1], (2)) + pred_bboxes /= scale + bbox = np.concatenate( + [ + pred[:, -1][:, np.newaxis], pred[:, -2][:, np.newaxis], + pred_bboxes + ], + axis=-1) + bboxs.append(bbox) + box_num = bbox.shape[0] + box_nums.append(box_num) + bboxs = np.concatenate(bboxs, axis=0) + box_nums = np.array(box_nums) + return {'bbox': bboxs, 'bbox_num': box_nums} + + +def coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list): + try: + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + except: + print( + "[ERROR] Not found pycocotools, please install by `pip install pycocotools`" + ) + sys.exit(1) + + coco_gt = COCO(anno_file) + cats = coco_gt.loadCats(coco_gt.getCatIds()) + clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} + results = [] + for bboxes, bbox_nums, image_id in zip(bboxes_list, bbox_nums_list, + image_id_list): + results += _get_det_res(bboxes, bbox_nums, image_id, clsid2catid) + + output = "bbox.json" + with open(output, 'w') as f: + json.dump(results, f) + + coco_dt = coco_gt.loadRes(output) + coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval.stats + + +def _get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map): + det_res = [] + k = 0 + for i in range(len(bbox_nums)): + cur_image_id = int(image_id[i][0]) + det_nums = bbox_nums[i] + for j in range(det_nums): + dt = bboxes[k] + k = k + 1 + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] + w = xmax - xmin + h = ymax - ymin + bbox = [xmin, ymin, w, h] + dt_res = { + 'image_id': cur_image_id, + 'category_id': category_id, + 'bbox': bbox, + 'score': score + } + det_res.append(dt_res) + return det_res diff --git a/example/post_training_quantization/pytorch_yolo_series/post_quant.py b/example/post_training_quantization/pytorch_yolo_series/post_quant.py new file mode 100644 index 000000000..ec4fbdbc4 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/post_quant.py @@ -0,0 +1,98 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +from paddleslim.common import load_config, load_onnx_model +from paddleslim.quant import quant_post_static +from dataset import COCOTrainDataset + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of post training quantization config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='ptq_out', + help="directory to save compressed model.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + parser.add_argument( + '--algo', type=str, default='KL', help="post quant algo.") + + return parser + + +def main(): + global config + config = load_config(FLAGS.config_path) + + input_name = 'x2paddle_image_arrays' if config[ + 'arch'] == 'YOLOv6' else 'x2paddle_images' + dataset = COCOTrainDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path'], + input_name=input_name) + train_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0) + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + + # since the type pf model converted from pytorch is onnx, + # use load_onnx_model firstly and rename the model_dir + load_onnx_model(config["model_dir"]) + inference_model_path = config["model_dir"].rstrip().rstrip( + '.onnx') + '_infer' + + quant_post_static( + executor=exe, + model_dir=inference_model_path, + quantize_model_path=FLAGS.save_dir, + data_loader=train_loader, + model_filename='model.pdmodel', + params_filename='model.pdiparams', + batch_size=32, + batch_nums=10, + algo=FLAGS.algo, + hist_percent=0.999, + is_full_quantize=False, + bias_correction=False, + onnx_format=True, + skip_tensor_list=config['skip_tensor_list'] + if 'skip_tensor_list' in config else None) + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/paddleslim/analysis/_utils.py b/paddleslim/analysis/_utils.py index 0b6fd1b85..82bedee53 100644 --- a/paddleslim/analysis/_utils.py +++ b/paddleslim/analysis/_utils.py @@ -135,8 +135,8 @@ def save_cls_model(model, input_shape, save_dir, data_type): weight_bits=8, activation_bits=8) - model_file = os.path.join(quantize_model_path, '__model__') - param_file = os.path.join(quantize_model_path, '__params__') + model_file = os.path.join(quantize_model_path, 'model.pdmodel') + param_file = os.path.join(quantize_model_path, 'model.pdiparams') return model_file, param_file diff --git a/paddleslim/analysis/latency_predictor.py b/paddleslim/analysis/latency_predictor.py index c67475118..f55b413a7 100644 --- a/paddleslim/analysis/latency_predictor.py +++ b/paddleslim/analysis/latency_predictor.py @@ -15,6 +15,9 @@ # limitations under the License. import os +import pip +import platform +import logging import pickle import shutil import subprocess @@ -25,9 +28,12 @@ from .parse_ops import get_key_from_op from .extract_features import get_data_from_tables, get_features_from_paramkey from ._utils import opt_model, load_predictor, nearest_interpolate, _get_download +from ..common import get_logger from ..core import GraphWrapper __all__ = ["LatencyPredictor", "TableLatencyPredictor"] +_logger = get_logger(__name__, level=logging.INFO) + TABLE_URL = 'https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/' @@ -71,6 +77,7 @@ class TableLatencyPredictor(LatencyPredictor): hardware_list = ['SD625', 'SD710', 'RK3288'] def __init__(self, table_file='SD710'): + self._check_opt_model() self.table_file = table_file self.table_dict = {} self.hardware = None @@ -83,6 +90,22 @@ def __init__(self, table_file='SD710'): def add_hardware(cls, hardware): cls.hardware_list.append(hardware) + def _check_opt_model(self): + if platform.system().lower() == 'windows': + raise NotImplementedError( + 'latency predictor does NOT support running on Windows.') + elif platform.system().lower() == 'darwin': + py_version = platform.python_version().split('.') + if int(py_version[0]) != 3 or int(py_version[1]) != 9: + raise NotImplementedError( + 'Latency predictor does NOT support running on macOS when python version is not 3.9.' + ) + + _logger.info("pip install paddleslim-opt-tools") + out = shutil.which('paddle_lite_opt') + if out is None: + pip.main(['install', 'paddleslim-opt-tools']) + def _initial_table(self): if self.table_file in TableLatencyPredictor.hardware_list: self.hardware = self.table_file diff --git a/paddleslim/auto_compression/__init__.py b/paddleslim/auto_compression/__init__.py index 990ad37b7..cfc26259d 100644 --- a/paddleslim/auto_compression/__init__.py +++ b/paddleslim/auto_compression/__init__.py @@ -19,8 +19,14 @@ from .utils import * __all__ = [ - "AutoCompression", "Quantization", "Distillation", - "MultiTeacherDistillation", "HyperParameterOptimization", "Prune", - "UnstructurePrune", "ProgramInfo", "TrainConfig", "save_config", - "load_config", "predict_compressed_model" + "AutoCompression", + "Quantization", + "Distillation", + "MultiTeacherDistillation", + "HyperParameterOptimization", + "Prune", + "UnstructurePrune", + "ProgramInfo", + "TrainConfig", + "predict_compressed_model", ] diff --git a/paddleslim/auto_compression/auto_strategy.py b/paddleslim/auto_compression/auto_strategy.py index a1eb0b0f2..eab962add 100644 --- a/paddleslim/auto_compression/auto_strategy.py +++ b/paddleslim/auto_compression/auto_strategy.py @@ -47,8 +47,10 @@ # default quant config, can be used by ptq&hpo and qat&distillation default_quant_config = { - 'quantize_op_types': - ['conv2d', 'depthwise_conv2d', 'mul', 'matmul', 'matmul_v2'], + 'quantize_op_types': [ + 'conv2d', 'depthwise_conv2d', 'conv2d_transpose', 'mul', 'matmul', + 'matmul_v2' + ], 'weight_bits': 8, 'activation_bits': 8, "is_full_quantize": False, @@ -77,8 +79,8 @@ MAGIC_SPARSE_RATIO = 0.75 ### TODO: 0.02 threshold maybe not suitable, need to check ### NOTE: reduce magic data to choose quantization aware training. -MAGIC_MAX_EMD_DISTANCE = 0.0002 #0.02 -MAGIC_MIN_EMD_DISTANCE = 0.0001 #0.01 +MAGIC_MAX_EMD_DISTANCE = 0.00002 #0.02 +MAGIC_MIN_EMD_DISTANCE = 0.00001 #0.01 DEFAULT_TRANSFORMER_STRATEGY = 'prune_0.25_int8' DEFAULT_STRATEGY = 'origin_int8' @@ -241,16 +243,6 @@ def prepare_strategy(executor, def get_final_quant_config(ptq_loss, model_type=None): """ transform quantization tester config to real quantization config """ - ### use ptq & hpo when model_type is transformer - if model_type == 'transformer': - quant_config = Quantization(**default_quant_config) - hpo_config = HyperParameterOptimization(**default_hpo_config) - configs = [{ - 'Quantization': quant_config, - 'HyperParameterOptimization': hpo_config - }] - return configs - ### if emd loss less than MAGIC_MIN_EMD_DISTANCE, final compress. if ptq_loss < MAGIC_MIN_EMD_DISTANCE: return None diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index 7d21bbe7a..b5459c76d 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -29,13 +29,15 @@ from ..common.recover_program import recover_inference_program from ..common import get_logger from ..common.patterns import get_patterns +from ..common.load_model import load_inference_model, get_model_dir, export_onnx +from ..common.dataloader import wrap_dataloader, get_feed_vars +from ..common.config_helper import load_config from ..analysis import TableLatencyPredictor from .create_compressed_program import build_distill_program, build_quant_program, build_prune_program, remove_unused_var_nodes from .strategy_config import TrainConfig, ProgramInfo, merge_config from .auto_strategy import prepare_strategy, get_final_quant_config, create_strategy_config, create_train_config -from .config_helpers import load_config, extract_strategy_config, extract_train_config +from .config_helpers import extract_strategy_config, extract_train_config from .utils.predict import with_variable_shape -from .utils import get_feed_vars, wrap_dataloader, load_inference_model _logger = get_logger(__name__, level=logging.INFO) @@ -49,10 +51,10 @@ class AutoCompression: def __init__(self, model_dir, - model_filename, - params_filename, - save_dir, train_dataloader, + model_filename=None, + params_filename=None, + save_dir='./output', config=None, input_shapes=None, target_speedup=None, @@ -66,13 +68,13 @@ def __init__(self, model_dir(str): The path of inference model that will be compressed, and the model and params that saved by ``paddle.static.save_inference_model`` are under the path. + train_dataloader(Python Generator, Paddle.io.DataLoader): The + Generator or Dataloader provides train data, and it could + return a batch every time. model_filename(str): The name of model file. params_filename(str): The name of params file. save_dir(str): The path to save compressed model. The models in this directory will be overwrited after calling 'compress()' function. - train_data_loader(Python Generator, Paddle.io.DataLoader): The - Generator or Dataloader provides train data, and it could - return a batch every time. input_shapes(dict|tuple|list): It is used when the model has implicit dimensions except batch size. If it is a dict, the key is the name of input and the value is the shape. Given the input shape of input "X" is [-1, 3, -1, -1] which means the batch size, hight @@ -117,18 +119,8 @@ def __init__(self, deploy_hardware(str, optional): The hardware you want to deploy. Default: 'gpu'. """ self.model_dir = model_dir.rstrip('/') - - if model_filename == 'None': - model_filename = None - self.model_filename = model_filename - if params_filename == 'None': - params_filename = None - self.params_filename = params_filename - - if params_filename is None and model_filename is not None: - raise NotImplementedError( - "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first." - ) + self.updated_model_dir, self.model_filename, self.params_filename = get_model_dir( + model_dir, model_filename, params_filename) self.final_dir = save_dir if not os.path.exists(self.final_dir): @@ -143,6 +135,8 @@ def __init__(self, self.train_config = TrainConfig(**config.pop('TrainConfig')) else: self.train_config = None + else: + self.train_config = None self.strategy_config = extract_strategy_config(config) # prepare dataloader @@ -151,8 +145,9 @@ def __init__(self, self.train_dataloader = wrap_dataloader(train_dataloader, self.feed_vars) self.eval_dataloader = wrap_dataloader(eval_dataloader, self.feed_vars) - if eval_dataloader is None: - eval_dataloader = self._get_eval_dataloader(self.train_dataloader) + if self.eval_dataloader is None: + self.eval_dataloader = self._get_eval_dataloader( + self.train_dataloader) self.target_speedup = target_speedup self.eval_function = eval_callback @@ -160,8 +155,7 @@ def __init__(self, paddle.enable_static() self._exe, self._places = self._prepare_envs() - self.model_type = self._get_model_type(self._exe, self.model_dir, - model_filename, params_filename) + self.model_type = self._get_model_type() if self.train_config is not None and self.train_config.use_fleet: fleet.init(is_collective=True) @@ -246,8 +240,8 @@ def _infer_shape(self, model_dir, model_filename, params_filename, paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) [inference_program, feed_target_names, - fetch_targets] = (load_inference_model(model_dir, exe, model_filename, - params_filename)) + fetch_targets] = load_inference_model(model_dir, exe, model_filename, + params_filename) if type(input_shapes) in [list, tuple]: assert len( @@ -307,23 +301,26 @@ def _prepare_envs(self): exe = paddle.static.Executor(places) return exe, places - def _get_model_type(self, exe, model_dir, model_filename, params_filename): - [inference_program, _, _]= (load_inference_model( \ - model_dir, \ - model_filename=model_filename, params_filename=params_filename, - executor=exe)) + def _get_model_type(self): + [inference_program, _, _] = (load_inference_model( + self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + executor=self._exe)) _, _, model_type = get_patterns(inference_program) if self.model_filename is None: - new_model_filename = '__new_model__' + opt_model_filename = '__opt_model__' else: - new_model_filename = 'new_' + self.model_filename + opt_model_filename = 'opt_' + self.model_filename program_bytes = inference_program._remove_training_info( clip_extra=False).desc.serialize_to_string() - with open(os.path.join(self.model_dir, new_model_filename), "wb") as f: + with open( + os.path.join(self.updated_model_dir, opt_model_filename), + "wb") as f: f.write(program_bytes) shutil.move( - os.path.join(self.model_dir, new_model_filename), - os.path.join(self.model_dir, self.model_filename)) + os.path.join(self.updated_model_dir, opt_model_filename), + os.path.join(self.updated_model_dir, self.model_filename)) _logger.info(f"Detect model type: {model_type}") return model_type @@ -392,7 +389,7 @@ def _prepare_strategy(self, strategy_config): config.append(merge_config(quant_config, hpo_config)) ### case6: quant_config & distill config ==> QAT & Distill - if quant_config is not None and self._distill_config is not None: + if quant_config is not None and self._distill_config is not None and 'ptq_hpo' not in strategy: only_distillation = False strategy.append('qat_dis') config.append(merge_config(quant_config, self._distill_config)) @@ -465,10 +462,10 @@ def _prepare_program(self, program, feed_target_names, fetch_targets, 'train_config must has `epochs` or `train_iter` field.') config_dict['gmp_config'] = { 'stable_iterations': 0, - 'pruning_iterations': 0.45 * total_iters, - 'tunning_iterations': 0.45 * total_iters, + 'pruning_iterations': max(0.45 * total_iters, 30), + 'tunning_iterations': max(0.45 * total_iters, 30), 'resume_iteration': -1, - 'pruning_steps': 100, + 'pruning_steps': 100 if (0.45 * total_iters) > 1000 else 1, 'initial_ratio': 0.15, } ### add prune program @@ -551,7 +548,7 @@ def _compiled_program(self, program_info, strategy): def create_tmp_dir(self, base_dir, prefix="tmp"): # create a new temp directory in final dir - s_datetime = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + s_datetime = strftime("%Y_%m_%d_%H_%M_%S", gmtime()) tmp_base_name = "_".join([prefix, str(os.getpid()), s_datetime]) tmp_dir = os.path.join(base_dir, tmp_base_name) if not os.path.exists(tmp_dir): @@ -586,42 +583,31 @@ def compress(self): tmp_model_path = os.path.join( self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))) final_model_path = os.path.join(self.final_dir) - if not os.path.exists(final_model_path): - os.makedirs(final_model_path) - - tmp_model_file = ".".join([tmp_model_path, "pdmodel"]) - if not os.path.exists(tmp_model_file): - tmp_model_file = os.path.join(tmp_model_path, self.model_filename) - - tmp_params_file = ".".join([tmp_model_path, "pdiparams"]) - if not os.path.exists(tmp_params_file): - tmp_params_file = os.path.join(tmp_model_path, self.params_filename) - - if self.model_filename is None: - self.model_filename = "infer.pdmodel" - if self.params_filename is None: - self.params_filename = "infer.pdiparams" - - final_model_file = os.path.join(final_model_path, self.model_filename) - final_params_file = os.path.join(final_model_path, self.params_filename) - if paddle.distributed.get_rank() == 0: - shutil.move(tmp_model_file, final_model_file) - shutil.move(tmp_params_file, final_params_file) + for _file in os.listdir(tmp_model_path): + _file_path = os.path.join(tmp_model_path, _file) + if os.path.isfile(_file_path): + shutil.copy(_file_path, final_model_path) shutil.rmtree(self.tmp_dir) _logger.info( "==> The ACT compression has been completed and the final model is saved in `{}`". format(final_model_path)) - os._exit(0) def single_strategy_compress(self, strategy, config, strategy_idx, train_config): # start compress, including train/eval model # TODO: add the emd loss of evaluation model. + if self.updated_model_dir != self.model_dir: + # If model is ONNX, convert it to inference model firstly. + load_inference_model( + self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + executor=self._exe) if strategy == 'quant_post': quant_post( self._exe, - model_dir=self.model_dir, + model_dir=self.updated_model_dir, quantize_model_path=os.path.join( self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))), data_loader=self.train_dataloader, @@ -647,11 +633,17 @@ def single_strategy_compress(self, strategy, config, strategy_idx, if platform.system().lower() != 'linux': raise NotImplementedError( "post-quant-hpo is not support in system other than linux") - + if self.updated_model_dir != self.model_dir: + # If model is ONNX, convert it to inference model firstly. + load_inference_model( + self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + executor=self._exe) post_quant_hpo.quant_post_hpo( self._exe, self._places, - model_dir=self.model_dir, + model_dir=self.updated_model_dir, quantize_model_path=os.path.join( self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))), train_dataloader=self.train_dataloader, @@ -722,7 +714,10 @@ def _start_train(self, train_program_info, test_program_info, strategy, best_metric = -1.0 total_epochs = train_config.epochs if train_config.epochs else 100 total_train_iter = 0 + stop_training = False for epoch_id in range(total_epochs): + if stop_training: + break for batch_id, data in enumerate(self.train_dataloader()): np_probs_float, = self._exe.run(train_program_info.program, \ feed=data, \ @@ -768,6 +763,10 @@ def _start_train(self, train_program_info, test_program_info, strategy, abs(best_metric - self.metric_before_compressed) ) / self.metric_before_compressed <= 0.005: + _logger.info( + "The error rate between the compressed model and original model is less than 5%. The training process ends." + ) + stop_training = True break else: _logger.info( @@ -775,14 +774,18 @@ def _start_train(self, train_program_info, test_program_info, strategy, format(epoch_id, metric, best_metric)) if train_config.target_metric is not None: if metric > float(train_config.target_metric): + stop_training = True + _logger.info( + "The metric of compressed model has reached the target metric. The training process ends." + ) break else: _logger.warning( "Not set eval function, so unable to test accuracy performance." ) - if train_config.train_iter and total_train_iter >= train_config.train_iter: - epoch_id = total_epochs + if (train_config.train_iter and total_train_iter >= + train_config.train_iter) or stop_training: break if 'unstructure' in self._strategy or train_config.sparse_model: @@ -802,22 +805,32 @@ def _save_model(self, test_program_info, strategy, strategy_idx): os.remove(os.path.join(self.tmp_dir, 'best_model.pdopt')) os.remove(os.path.join(self.tmp_dir, 'best_model.pdparams')) - if 'qat' in strategy: - test_program, int8_program = convert(test_program, self._places, self._quant_config, \ - scope=paddle.static.global_scope(), \ - save_int8=True) - model_dir = os.path.join(self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))) if not os.path.exists(model_dir): os.makedirs(model_dir) + + if 'qat' in strategy: + test_program = convert( + test_program, + self._places, + self._quant_config, + scope=paddle.static.global_scope(), + save_clip_ranges_path=self.final_dir) + feed_vars = [ test_program.global_block().var(name) for name in test_program_info.feed_target_names ] - model_name = '.'.join(self.model_filename.split( - '.')[:-1]) if self.model_filename is not None else 'model' + model_name = None + if self.model_filename is None: + model_name = "model" + elif self.model_filename.endswith(".pdmodel"): + model_name = self.model_filename.rsplit(".", 1)[0] + else: + model_name = self.model_filename + path_prefix = os.path.join(model_dir, model_name) paddle.static.save_inference_model( path_prefix=path_prefix, @@ -825,3 +838,20 @@ def _save_model(self, test_program_info, strategy, strategy_idx): fetch_vars=test_program_info.fetch_targets, executor=self._exe, program=test_program) + + def export_onnx(self, + model_name='quant_model.onnx', + deploy_backend='tensorrt'): + infer_model_path = os.path.join(self.final_dir, self.model_filename) + assert os.path.exists( + infer_model_path), 'Not found {}, please check it.'.format( + infer_model_path) + onnx_save_path = os.path.join(self.final_dir, 'ONNX') + if not os.path.exists(onnx_save_path): + os.makedirs(onnx_save_path) + export_onnx( + self.final_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + save_file_path=os.path.join(onnx_save_path, model_name), + deploy_backend=deploy_backend) diff --git a/paddleslim/auto_compression/config_helpers.py b/paddleslim/auto_compression/config_helpers.py index ebc5b45c8..b1e426cc2 100644 --- a/paddleslim/auto_compression/config_helpers.py +++ b/paddleslim/auto_compression/config_helpers.py @@ -14,42 +14,7 @@ import yaml import os from paddleslim.auto_compression.strategy_config import * - -__all__ = ['save_config', 'load_config'] - - -def print_arguments(args, level=0): - if level == 0: - print('----------- Running Arguments -----------') - for arg, value in sorted(args.items()): - if isinstance(value, dict): - print('\t' * level, '%s:' % arg) - print_arguments(value, level + 1) - else: - print('\t' * level, '%s: %s' % (arg, value)) - if level == 0: - print('------------------------------------------') - - -def load_config(config): - """Load configurations from yaml file into dict. - Fields validation is skipped for loading some custom information. - Args: - config(str): The path of configuration file. - Returns: - dict: A dict storing configuration information. - """ - if config is None: - return None - assert isinstance( - config, - str), f"config should be str but got type(config)={type(config)}" - assert os.path.exists(config) and os.path.isfile( - config), f"{config} not found or it is not a file." - with open(config) as f: - cfg = yaml.load(f, Loader=yaml.FullLoader) - print_arguments(cfg) - return cfg +from ..common.config_helper import load_config def extract_strategy_config(config): @@ -101,12 +66,3 @@ def extract_train_config(config): **value) if value is not None else TrainConfig() # return default training config when it is not set return TrainConfig() - - -def save_config(config, config_path): - """ - convert dict config to yaml. - """ - f = open(config_path, "w") - yaml.dump(config, f) - f.close() diff --git a/paddleslim/auto_compression/create_compressed_program.py b/paddleslim/auto_compression/create_compressed_program.py index 30276bbf6..8a6c7db2f 100644 --- a/paddleslim/auto_compression/create_compressed_program.py +++ b/paddleslim/auto_compression/create_compressed_program.py @@ -23,7 +23,7 @@ from ..common.recover_program import recover_inference_program, _remove_fetch_node from ..common import get_logger from .strategy_config import ProgramInfo -from .utils import load_inference_model +from ..common.load_model import load_inference_model _logger = get_logger(__name__, level=logging.INFO) __all__ = [ @@ -52,7 +52,8 @@ def _create_optimizer(train_config): optimizer_builder = train_config['optimizer_builder'] assert isinstance( optimizer_builder, dict - ), f"Value of 'optimizer_builder' in train_config should be dict but got {type(optimizer_builder)}" + ), "Value of 'optimizer_builder' in train_config should be dict but got {}".format( + type(optimizer_builder)) if 'grad_clip' in optimizer_builder: g_clip_params = optimizer_builder['grad_clip'] g_clip_type = g_clip_params.pop('type') @@ -444,9 +445,8 @@ def build_prune_program(executor, "####################channel pruning##########################") for param in pruned_program.global_block().all_parameters(): if param.name in original_shapes: - _logger.info( - f"{param.name}, from {original_shapes[param.name]} to {param.shape}" - ) + _logger.info("{}, from {} to {}".format( + param.name, original_shapes[param.name], param.shape)) _logger.info( "####################channel pruning end##########################") train_program_info.program = pruned_program diff --git a/paddleslim/auto_compression/strategy_config.py b/paddleslim/auto_compression/strategy_config.py index 5226a7c84..d8b3e90ce 100644 --- a/paddleslim/auto_compression/strategy_config.py +++ b/paddleslim/auto_compression/strategy_config.py @@ -53,7 +53,8 @@ def __init__(self, name): class Quantization(BaseStrategy): def __init__(self, quantize_op_types=[ - 'conv2d', 'depthwise_conv2d', 'mul', 'matmul', 'matmul_v2' + 'conv2d', 'depthwise_conv2d', 'conv2d_transpose', 'mul', + 'matmul', 'matmul_v2' ], weight_bits=8, activation_bits=8, @@ -65,6 +66,7 @@ def __init__(self, window_size=10000, moving_rate=0.9, for_tensorrt=False, + onnx_format=False, is_full_quantize=False): """ Quantization Config. @@ -80,6 +82,7 @@ def __init__(self, window_size(int): Window size for 'range_abs_max' quantization. Default: 10000. moving_rate(float): The decay coefficient of moving average. Default: 0.9. for_tensorrt(bool): If True, 'quantize_op_types' will be TENSORRT_OP_TYPES. Default: False. + onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is False. is_full_quantize(bool): If True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES. Default: False. """ super(Quantization, self).__init__("Quantization") @@ -95,6 +98,7 @@ def __init__(self, self.window_size = window_size self.moving_rate = moving_rate self.for_tensorrt = for_tensorrt + self.onnx_format = onnx_format self.is_full_quantize = is_full_quantize diff --git a/paddleslim/auto_compression/utils/__init__.py b/paddleslim/auto_compression/utils/__init__.py index aa4f3ec07..e3c3a49d7 100644 --- a/paddleslim/auto_compression/utils/__init__.py +++ b/paddleslim/auto_compression/utils/__init__.py @@ -14,11 +14,5 @@ from __future__ import absolute_import from .predict import predict_compressed_model -from .dataloader import * -from . import dataloader -from .load_model import * -from . import load_model __all__ = ["predict_compressed_model"] -__all__ += dataloader.__all__ -__all__ += load_model.__all__ diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py index fbecc224f..e86dd8486 100644 --- a/paddleslim/auto_compression/utils/fake_ptq.py +++ b/paddleslim/auto_compression/utils/fake_ptq.py @@ -12,7 +12,7 @@ TRANSFORM_PASS_OP_TYPES = QuantizationTransformPass._supported_quantizable_op_type QUANT_DEQUANT_PASS_OP_TYPES = AddQuantDequantPass._supported_quantizable_op_type -from .load_model import load_inference_model +from ...common.load_model import load_inference_model def post_quant_fake(executor, diff --git a/paddleslim/auto_compression/utils/load_model.py b/paddleslim/auto_compression/utils/load_model.py deleted file mode 100644 index bb61ab562..000000000 --- a/paddleslim/auto_compression/utils/load_model.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import paddle - -__all__ = ['load_inference_model'] - - -def load_inference_model(path_prefix, - executor, - model_filename=None, - params_filename=None): - if model_filename is not None and params_filename is not None: - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=path_prefix, - executor=executor, - model_filename=model_filename, - params_filename=params_filename)) - else: - model_name = '.'.join(model_filename.split('.') - [:-1]) if model_filename is not None else 'model' - if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')): - model_path_prefix = os.path.join(path_prefix, model_name) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=model_path_prefix, executor=executor)) - else: - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=path_prefix, executor=executor)) - - return [inference_program, feed_target_names, fetch_targets] diff --git a/paddleslim/auto_compression/utils/predict.py b/paddleslim/auto_compression/utils/predict.py index 8fd852b32..5b8c6adb1 100644 --- a/paddleslim/auto_compression/utils/predict.py +++ b/paddleslim/auto_compression/utils/predict.py @@ -4,7 +4,7 @@ from ...analysis import TableLatencyPredictor from .prune_model import get_sparse_model, get_prune_model from .fake_ptq import post_quant_fake -from .load_model import load_inference_model +from ...common.load_model import load_inference_model def with_variable_shape(model_dir, model_filename=None, params_filename=None): @@ -53,7 +53,7 @@ def predict_compressed_model(executor, latency_dict(dict): The latency latency of the model under various compression strategies. """ local_rank = paddle.distributed.get_rank() - quant_model_path = f'quant_model_rank_{local_rank}_tmp' + quant_model_path = 'quant_model_rank_{}_tmp'.format(local_rank) prune_model_path = f'prune_model_rank_{local_rank}_tmp' sparse_model_path = f'sparse_model_rank_{local_rank}_tmp' @@ -62,7 +62,12 @@ def predict_compressed_model(executor, model_file = os.path.join(model_dir, model_filename) param_file = os.path.join(model_dir, params_filename) - predictor = TableLatencyPredictor(hardware) + try: + predictor = TableLatencyPredictor(hardware) + except NotImplementedError: + raise NotImplementedError( + "Latency predictor cannot used on the platform: {}. That means you can not use latency predictor to select compress strategy automatically, you can set deploy_hardware to None or set compress strategy in the yaml". + format(platform.system())) latency = predictor.predict( model_file=model_file, param_file=param_file, data_type='fp32') latency_dict.update({'origin_fp32': latency}) diff --git a/paddleslim/auto_compression/utils/prune_model.py b/paddleslim/auto_compression/utils/prune_model.py index 426a1859c..c0da14ca9 100644 --- a/paddleslim/auto_compression/utils/prune_model.py +++ b/paddleslim/auto_compression/utils/prune_model.py @@ -5,7 +5,7 @@ import paddle.static as static from ...prune import Pruner from ...core import GraphWrapper -from .load_model import load_inference_model +from ...common.load_model import load_inference_model __all__ = ["get_sparse_model", "get_prune_model"] @@ -19,9 +19,10 @@ def get_sparse_model(executor, places, model_file, param_file, ratio, ratio(float): The ratio to prune the model. save_path(str): The save path of pruned model. """ - assert os.path.exists(model_file), f'{model_file} does not exist.' + assert os.path.exists(model_file), '{} does not exist.'.format(model_file) assert os.path.exists( - param_file) or param_file is None, f'{param_file} does not exist.' + param_file) or param_file is None, '{} does not exist.'.format( + param_file) paddle.enable_static() SKIP = ['image', 'feed', 'pool2d_0.tmp_0'] diff --git a/paddleslim/common/__init__.py b/paddleslim/common/__init__.py index c3e40415b..8b1ffc02c 100644 --- a/paddleslim/common/__init__.py +++ b/paddleslim/common/__init__.py @@ -25,11 +25,16 @@ from . import wrapper_function from . import recover_program from . import patterns +from .load_model import load_inference_model, get_model_dir, load_onnx_model, export_onnx +from .dataloader import wrap_dataloader, get_feed_vars +from .config_helper import load_config, save_config __all__ = [ 'EvolutionaryController', 'SAController', 'get_logger', 'ControllerServer', 'ControllerClient', 'lock', 'unlock', 'cached_reader', 'AvgrageMeter', - 'Server', 'Client', 'RLBaseController', 'VarCollector' + 'Server', 'Client', 'RLBaseController', 'VarCollector', 'load_onnx_model', + 'load_inference_model', 'get_model_dir', 'wrap_dataloader', 'get_feed_vars', + 'load_config', 'save_config' ] __all__ += wrapper_function.__all__ diff --git a/paddleslim/common/config_helper.py b/paddleslim/common/config_helper.py new file mode 100644 index 000000000..486fa9b44 --- /dev/null +++ b/paddleslim/common/config_helper.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import yaml +import os + +__all__ = ['load_config', 'save_config'] + + +def print_arguments(args, level=0): + if level == 0: + print('----------- Running Arguments -----------') + for arg, value in sorted(args.items()): + if isinstance(value, dict): + print('\t' * level, '%s:' % arg) + print_arguments(value, level + 1) + else: + print('\t' * level, '%s: %s' % (arg, value)) + if level == 0: + print('------------------------------------------') + + +def load_config(config): + """Load configurations from yaml file into dict. + Fields validation is skipped for loading some custom information. + Args: + config(str): The path of configuration file. + Returns: + dict: A dict storing configuration information. + """ + if config is None: + return None + assert isinstance( + config, + str), f"config should be str but got type(config)={type(config)}" + assert os.path.exists(config) and os.path.isfile( + config), f"{config} not found or it is not a file." + with open(config) as f: + cfg = yaml.load(f, Loader=yaml.FullLoader) + print_arguments(cfg) + return cfg + + +def save_config(config, config_path): + """ + convert dict config to yaml. + """ + f = open(config_path, "w") + yaml.dump(config, f) + f.close() diff --git a/paddleslim/auto_compression/utils/dataloader.py b/paddleslim/common/dataloader.py similarity index 95% rename from paddleslim/auto_compression/utils/dataloader.py rename to paddleslim/common/dataloader.py index f0f36716c..31e375de2 100644 --- a/paddleslim/auto_compression/utils/dataloader.py +++ b/paddleslim/common/dataloader.py @@ -3,6 +3,7 @@ import numpy as np import paddle from collections.abc import Iterable +from .load_model import load_inference_model __all__ = ["wrap_dataloader", "get_feed_vars"] @@ -13,7 +14,7 @@ def get_feed_vars(model_dir, model_filename, params_filename): paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( + load_inference_model( model_dir, exe, model_filename=model_filename, diff --git a/paddleslim/common/load_model.py b/paddleslim/common/load_model.py new file mode 100644 index 000000000..d6339b962 --- /dev/null +++ b/paddleslim/common/load_model.py @@ -0,0 +1,226 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import logging +import os +import shutil +import sys +import pkg_resources as pkg +import paddle + +from . import get_logger +_logger = get_logger(__name__, level=logging.INFO) + +__all__ = [ + 'load_inference_model', 'get_model_dir', 'load_onnx_model', 'export_onnx' +] + + +def load_inference_model(path_prefix, + executor, + model_filename=None, + params_filename=None): + # Load onnx model to Inference model. + if path_prefix.endswith('.onnx'): + inference_program, feed_target_names, fetch_targets = load_onnx_model( + path_prefix) + return [inference_program, feed_target_names, fetch_targets] + # Load Inference model. + # TODO: clean code + if model_filename is not None and model_filename.endswith('.pdmodel'): + model_name = '.'.join(model_filename.split('.')[:-1]) + assert os.path.exists( + os.path.join(path_prefix, model_name + '.pdmodel') + ), 'Please check {}, or fix model_filename parameter.'.format( + os.path.join(path_prefix, model_name + '.pdmodel')) + assert os.path.exists( + os.path.join(path_prefix, model_name + '.pdiparams') + ), 'Please check {}, or fix params_filename parameter.'.format( + os.path.join(path_prefix, model_name + '.pdiparams')) + model_path_prefix = os.path.join(path_prefix, model_name) + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=model_path_prefix, executor=executor)) + elif model_filename is not None and params_filename is not None: + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=path_prefix, + executor=executor, + model_filename=model_filename, + params_filename=params_filename)) + else: + model_name = '.'.join(model_filename.split('.') + [:-1]) if model_filename is not None else 'model' + if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')): + model_path_prefix = os.path.join(path_prefix, model_name) + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=model_path_prefix, executor=executor)) + else: + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=path_prefix, executor=executor)) + + return [inference_program, feed_target_names, fetch_targets] + + +def get_model_dir(model_dir, model_filename, params_filename): + if model_dir.endswith('.onnx'): + updated_model_dir = model_dir.rstrip().rstrip('.onnx') + '_infer' + else: + updated_model_dir = model_dir.rstrip('/') + + if model_filename == None: + updated_model_filename = 'model.pdmodel' + else: + updated_model_filename = model_filename + + if params_filename == None: + updated_params_filename = 'model.pdiparams' + else: + updated_params_filename = params_filename + + if params_filename is None and model_filename is not None: + raise NotImplementedError( + "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first." + ) + return updated_model_dir, updated_model_filename, updated_params_filename + + +def load_onnx_model(model_path, disable_feedback=False): + assert model_path.endswith( + '.onnx' + ), '{} does not end with .onnx suffix and cannot be loaded.'.format( + model_path) + inference_model_path = model_path.rstrip().rstrip('.onnx') + '_infer' + exe = paddle.static.Executor(paddle.CPUPlace()) + if os.path.exists(os.path.join( + inference_model_path, 'model.pdmodel')) and os.path.exists( + os.path.join(inference_model_path, 'model.pdiparams')): + val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + os.path.join(inference_model_path, 'model'), exe) + _logger.info('Loaded model from: {}'.format(inference_model_path)) + return val_program, feed_target_names, fetch_targets + else: + # onnx to paddle inference model. + assert os.path.exists( + model_path), 'Not found `{}`, please check model path.'.format( + model_path) + try: + pkg.require('x2paddle') + except: + from pip._internal import main + main(['install', 'x2paddle']) + # check onnx installation and version + try: + pkg.require('onnx') + import onnx + version = onnx.version.version + v0, v1, v2 = version.split('.') + version_sum = int(v0) * 100 + int(v1) * 10 + int(v2) + if version_sum < 160: + _logger.error( + "onnx>=1.6.0 is required, please use \"pip install onnx\".") + except: + from pip._internal import main + main(['install', 'onnx==1.12.0']) + + from x2paddle.decoder.onnx_decoder import ONNXDecoder + from x2paddle.op_mapper.onnx2paddle.onnx_op_mapper import ONNXOpMapper + from x2paddle.optimizer.optimizer import GraphOptimizer + from x2paddle.utils import ConverterCheck + time_info = int(time.time()) + if not disable_feedback: + ConverterCheck( + task="ONNX", time_info=time_info, convert_state="Start").start() + # support distributed convert model + model_idx = paddle.distributed.get_rank( + ) if paddle.distributed.get_world_size() > 1 else 0 + try: + _logger.info("Now translating model from onnx to paddle.") + model = ONNXDecoder(model_path) + mapper = ONNXOpMapper(model) + mapper.paddle_graph.build() + graph_opt = GraphOptimizer(source_frame="onnx") + graph_opt.optimize(mapper.paddle_graph) + _logger.info("Model optimized.") + onnx2paddle_out_dir = os.path.join( + inference_model_path, 'onnx2paddle_{}'.format(model_idx)) + mapper.paddle_graph.gen_model(onnx2paddle_out_dir) + _logger.info("Successfully exported Paddle static graph model!") + if not disable_feedback: + ConverterCheck( + task="ONNX", time_info=time_info, + convert_state="Success").start() + except Exception as e: + _logger.warning(e) + _logger.error( + "x2paddle threw an exception, you can ask for help at: https://github.com/PaddlePaddle/X2Paddle/issues" + ) + sys.exit(1) + + if paddle.distributed.get_rank() == 0: + shutil.move( + os.path.join(onnx2paddle_out_dir, 'inference_model', + 'model.pdmodel'), + os.path.join(inference_model_path, 'model.pdmodel')) + shutil.move( + os.path.join(onnx2paddle_out_dir, 'inference_model', + 'model.pdiparams'), + os.path.join(inference_model_path, 'model.pdiparams')) + load_model_path = inference_model_path + else: + load_model_path = os.path.join(onnx2paddle_out_dir, + 'inference_model') + + paddle.enable_static() + val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + os.path.join(load_model_path, 'model'), exe) + _logger.info('Loaded model from: {}'.format(load_model_path)) + # Clean up the file storage directory + shutil.rmtree( + os.path.join(inference_model_path, 'onnx2paddle_{}'.format( + model_idx))) + return val_program, feed_target_names, fetch_targets + + +def export_onnx(model_dir, + model_filename=None, + params_filename=None, + save_file_path='output.onnx', + opset_version=13, + deploy_backend='tensorrt'): + if not model_filename: + model_filename = 'model.pdmodel' + if not params_filename: + params_filename = 'model.pdiparams' + try: + os.system(' python -m pip install -U paddle2onnx==1.0.0rc4') + except: + from pip._internal import main + main(['install', 'paddle2onnx==1.0.0rc4']) + import paddle2onnx + paddle2onnx.command.c_paddle_to_onnx( + model_file=os.path.join(model_dir, model_filename), + params_file=os.path.join(model_dir, params_filename), + save_file=save_file_path, + opset_version=opset_version, + enable_onnx_checker=True, + deploy_backend=deploy_backend, + scale_file=os.path.join(model_dir, 'calibration_table.txt'), + calibration_file=os.path.join( + save_file_path.rstrip(os.path.split(save_file_path)[-1]), + 'calibration.cache')) + _logger.info('Convert model to ONNX: {}'.format(save_file_path)) diff --git a/paddleslim/dygraph/prune/pruning_plan.py b/paddleslim/dygraph/prune/pruning_plan.py index d9cd8e4a2..cd669ffd5 100644 --- a/paddleslim/dygraph/prune/pruning_plan.py +++ b/paddleslim/dygraph/prune/pruning_plan.py @@ -220,7 +220,7 @@ def imperative_apply(self, model, opt=None): t_value = param.value().get_tensor() value = np.array(t_value).astype("float32") groups = _mask._op.attr('groups') - if dims == 1 and groups is not None and groups > 1 and len( + if groups is not None and groups > 1 and len( value.shape) == 4: filter_size = value.shape[1] except_num = np.sum(bool_mask) @@ -230,7 +230,6 @@ def imperative_apply(self, model, opt=None): sub_layer._groups = new_groups _logger.info("change groups from {} to {} for {}.". format(groups, new_groups, param.name)) - continue # The name of buffer can not contains "." backup_name = param.name.replace(".", "_") + "_backup" diff --git a/paddleslim/dygraph/quant/qat.py b/paddleslim/dygraph/quant/qat.py index 280fe94a3..34b1ae7ba 100644 --- a/paddleslim/dygraph/quant/qat.py +++ b/paddleslim/dygraph/quant/qat.py @@ -243,8 +243,8 @@ def quantize(self, model, inplace=True): """ assert isinstance(model, paddle.nn.Layer), \ "The model must be the instance of paddle.nn.Layer." - - self._model = copy.deepcopy(model) + if self.weight_preprocess is not None or self.act_preprocess is not None: + self._model = copy.deepcopy(model) if inplace: quantize_model = self.imperative_qat.quantize(model) diff --git a/paddleslim/prune/prune_worker.py b/paddleslim/prune/prune_worker.py index 25703c66d..c9c69f622 100644 --- a/paddleslim/prune/prune_worker.py +++ b/paddleslim/prune/prune_worker.py @@ -522,7 +522,6 @@ def _prune(self, var, pruned_axis, transforms): channel_axis = 1 if data_format == "NHWC": channel_axis = 3 - if var == _in_var: assert pruned_axis == channel_axis, "The Input of conv2d can only be pruned at channel axis, but got {}".format( pruned_axis) @@ -533,7 +532,6 @@ def _prune(self, var, pruned_axis, transforms): "repeat": repeat }]) # kernel_number * groups will be pruned by reducing groups - self.append_pruned_vars(_filter, 1, transforms) self._visit_and_search(_filter, 0, transforms + [{ "repeat": repeat }]) @@ -546,14 +544,13 @@ def _prune(self, var, pruned_axis, transforms): }]) elif var == _filter: assert pruned_axis == 0, "The filter of depthwise conv2d can only be pruned at axis 0." - self.append_pruned_vars(_filter, 1, transforms) + self.append_pruned_vars(_filter, 0, transforms) self._visit_and_search(_in_var, channel_axis, transforms) self._visit_and_search(_out, channel_axis, transforms) elif var == _out: assert pruned_axis == channel_axis, "The Input of conv2d can only be pruned at channel axis, but got {}".format( pruned_axis) self.append_pruned_vars(_filter, 0, transforms) - self.append_pruned_vars(_filter, 1, transforms) self._visit_and_search(_filter, 0, transforms) # It will not pruning number of kernels in depthwise conv2d, # so it is not neccesary to search succeed operators. diff --git a/paddleslim/prune/pruner.py b/paddleslim/prune/pruner.py index 1926c8cbb..4c58c2e1d 100644 --- a/paddleslim/prune/pruner.py +++ b/paddleslim/prune/pruner.py @@ -117,7 +117,7 @@ def prune(self, _groups = 1 if not lazy: # update groups of conv2d - if pruned_axis == 1: + if pruned_axis == 1 or pruned_axis == 0: for op in param.outputs(): if op.type() in [ "conv2d", "conv2d_grad", "depthwise_conv2d", @@ -132,7 +132,7 @@ def prune(self, f"change groups of {op.type()}({param.name()}) from {op.attr('groups')} to {new_groups};" ) op.set_attr("groups", new_groups) - if _groups == 1: + origin_shape = copy.deepcopy(param.shape()) if param_shape_backup is not None: param_shape_backup[param.name()] = origin_shape diff --git a/paddleslim/quant/__init__.py b/paddleslim/quant/__init__.py index 8b472e72d..69e542962 100644 --- a/paddleslim/quant/__init__.py +++ b/paddleslim/quant/__init__.py @@ -35,7 +35,7 @@ except Exception as e: _logger.warning(e) _logger.warning( - f"If you want to use training-aware and post-training quantization, " - "please use Paddle >= {min_paddle_version} or develop version") + "If you want to use training-aware and post-training quantization, " + "please use Paddle >= {} or develop version".format(min_paddle_version)) from .quant_embedding import quant_embedding diff --git a/paddleslim/quant/analysis.py b/paddleslim/quant/analysis.py new file mode 100644 index 000000000..d81e80f8e --- /dev/null +++ b/paddleslim/quant/analysis.py @@ -0,0 +1,312 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import pickle +import copy +import logging +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import numpy as np + +import paddle +from paddle.fluid import core +from paddle.fluid import framework +from paddle.fluid.framework import IrGraph +from paddle.fluid.executor import global_scope +from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +from paddle.fluid.contrib.slim.quantization.utils import _get_op_input_var_names, load_variable_data +from .quanter import quant_post +from ..core import GraphWrapper +from ..common import get_logger +from ..common import get_feed_vars, wrap_dataloader, load_inference_model, get_model_dir + +_logger = get_logger(__name__, level=logging.INFO) + +__all__ = ["AnalysisQuant"] + + +class AnalysisQuant(object): + def __init__(self, + model_dir, + model_filename=None, + params_filename=None, + eval_function=None, + data_loader=None, + save_dir='analysis_results', + checkpoint_name='analysis_checkpoint.pkl', + num_histogram_plots=10, + ptq_config=None): + """ + AnalysisQuant provides to analysis the sensitivity of each op in the model. + + Args: + model_dir(str): the path of fp32 model that will be quantized, it can also be '.onnx' + model_filename(str, optional): the model file name of the fp32 model + params_filename(str, optional): the parameter file name of the fp32 model + eval_function(function): eval function, define by yourself to return the metric of the inference program, can be used to judge the metric of quantized model. (TODO: optional) + data_loader(Python Generator, Paddle.io.DataLoader, optional): the + Generator or Dataloader provides calibrate data, and it could + return a batch every time + save_dir(str, optional): the output dir that stores the analyzed information + checkpoint_name(str, optional): the name of checkpoint file that saves analyzed information and avoids break off while ananlyzing + ptq_config(dict, optional): the args that can initialize PostTrainingQuantization + + """ + if model_filename is None: + model_filename = 'model.pdmodel' + if params_filename is None: + params_filename = 'model.pdiparams' + self.model_dir = model_dir + self.model_filename = model_filename + self.params_filename = params_filename + self.histogram_bins = 1000 + self.save_dir = save_dir + self.eval_function = eval_function + self.quant_layer_names = [] + self.checkpoint_name = os.path.join(save_dir, checkpoint_name) + self.quant_layer_metrics = {} + self.num_histogram_plots = num_histogram_plots + self.ptq_config = ptq_config + self.batch_nums = ptq_config[ + 'batch_nums'] if 'batch_nums' in ptq_config else 10 + + if not os.path.exists(self.save_dir): + os.mkdir(self.save_dir) + + devices = paddle.device.get_device().split(':')[0] + self.places = paddle.device._convert_to_place(devices) + executor = paddle.static.Executor(self.places) + + # load model + [program, self.feed_list, self.fetch_list]= load_inference_model( \ + self.model_dir, \ + executor=executor, \ + model_filename=self.model_filename, \ + params_filename=self.params_filename) + + # create data_loader + self.data_loader = wrap_dataloader(data_loader, self.feed_list) + + # evaluate before quant + # TODO: self.eval_function can be None + if self.eval_function is not None: + self.base_metric = self.eval_function( + executor, program, self.feed_list, self.fetch_list) + _logger.info('before quantized, the accuracy of the model is: {}'. + format(self.base_metric)) + + # quant and evaluate after quant (skip_list = None) + post_training_quantization = PostTrainingQuantization( + executor=executor, + data_loader=self.data_loader, + model_dir=self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + skip_tensor_list=None, + algo='avg', #fastest + **self.ptq_config) + program = post_training_quantization.quantize() + self.quant_metric = self.eval_function(executor, program, + self.feed_list, self.fetch_list) + _logger.info('after quantized, the accuracy of the model is: {}'.format( + self.quant_metric)) + + # get quantized weight and act var name + self.quantized_weight_var_name = post_training_quantization._quantized_weight_var_name + self.quantized_act_var_name = post_training_quantization._quantized_act_var_name + executor.close() + + # load tobe_analyized_layer from checkpoint + self.load_checkpoint() + self.tobe_analyized_layer = self.quantized_weight_var_name - set( + list(self.quant_layer_metrics.keys())) + self.tobe_analyized_layer = sorted(list(self.tobe_analyized_layer)) + + def analysis(self): + self.compute_quant_sensitivity() + self.sensitivity_ranklist = sorted( + self.quant_layer_metrics, + key=self.quant_layer_metrics.get, + reverse=False) + + _logger.info('Finished computing the sensitivity of the model.') + for name in self.sensitivity_ranklist: + _logger.info("quant layer name: {}, eval metric: {}".format( + name, self.quant_layer_metrics[name])) + + analysis_file = os.path.join(self.save_dir, "analysis.txt") + with open(analysis_file, "w") as analysis_ret_f: + for name in self.sensitivity_ranklist: + analysis_ret_f.write( + "quant layer name: {}, eval metric: {}\n".format( + name, self.quant_layer_metrics[name])) + _logger.info('Analysis file is saved in {}'.format(analysis_file)) + self.calculate_histogram() + + def save_checkpoint(self): + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir) + with open(self.checkpoint_name, 'wb') as f: + pickle.dump(self.quant_layer_metrics, f) + _logger.info('save checkpoint to {}'.format(self.checkpoint_name)) + + def load_checkpoint(self): + if not os.path.exists(self.checkpoint_name): + return False + with open(self.checkpoint_name, 'rb') as f: + self.quant_layer_metrics = pickle.load(f) + _logger.info('load checkpoint from {}'.format(self.checkpoint_name)) + return True + + def compute_quant_sensitivity(self): + ''' + For each layer, quantize the weight op and evaluate the quantized model. + ''' + for i, layer_name in enumerate(self.tobe_analyized_layer): + _logger.info('checking {}/{} quant model: quant layer {}'.format( + i + 1, len(self.tobe_analyized_layer), layer_name)) + skip_list = copy.copy(list(self.quantized_weight_var_name)) + skip_list.remove(layer_name) + + executor = paddle.static.Executor(self.places) + post_training_quantization = PostTrainingQuantization( + executor=executor, + data_loader=self.data_loader, + model_dir=self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + skip_tensor_list=skip_list, + algo='avg', #fastest + **self.ptq_config) + program = post_training_quantization.quantize() + + _logger.info('Evaluating...') + quant_metric = self.eval_function(executor, program, self.feed_list, + self.fetch_list) + executor.close() + _logger.info( + "quant layer name: {}, eval metric: {}, the loss caused by this layer: {}". + format(layer_name, quant_metric, self.base_metric - + quant_metric)) + self.quant_layer_metrics[layer_name] = quant_metric + self.save_checkpoint() + + def get_act_name_by_weight(self, program, weight_names, + persistable_var_names): + act_ops_names = [] + for op_name in weight_names: + for block_id in range(len(program.blocks)): + for op in program.blocks[block_id].ops: + var_name_list = _get_op_input_var_names(op) + if op_name in var_name_list: + for var_name in var_name_list: + if var_name not in persistable_var_names: + act_ops_names.append(var_name) + return act_ops_names + + def get_hist_ops_name(self, graph, program): + if self.num_histogram_plots <= 0: + return [] + + best_weight_ops = self.sensitivity_ranklist[::-1][:self. + num_histogram_plots] + worst_weight_ops = self.sensitivity_ranklist[:self.num_histogram_plots] + + persistable_var_names = [] + for var in program.list_vars(): + if var.persistable: + persistable_var_names.append(var.name) + + best_act_ops = self.get_act_name_by_weight(program, best_weight_ops, + persistable_var_names) + worst_act_ops = self.get_act_name_by_weight(program, worst_weight_ops, + persistable_var_names) + return [best_weight_ops, best_act_ops, worst_weight_ops, worst_act_ops] + + def collect_ops_histogram(self, scope, ops): + hist = {} + for var_name in ops: + var_tensor = load_variable_data(scope, var_name) + var_tensor = np.array(var_tensor) + min_v = float(np.min(var_tensor)) + max_v = float(np.max(var_tensor)) + var_tensor = var_tensor.flatten() + _, hist_edges = np.histogram( + var_tensor.copy(), + bins=self.histogram_bins, + range=(min_v, max_v)) + hist[var_name] = [var_tensor, hist_edges] + return hist + + def calculate_histogram(self): + ''' + Sample histograms for the weight and corresponding act tensors + ''' + devices = paddle.device.get_device().split(':')[0] + places = paddle.device._convert_to_place(devices) + executor = paddle.static.Executor(places) + + [program, feed_list, fetch_list]= load_inference_model( \ + self.model_dir, \ + executor=executor, \ + model_filename=self.model_filename, \ + params_filename=self.params_filename) + + scope = global_scope() + + graph = IrGraph(core.Graph(program.desc), for_test=False) + ops_tobe_draw_hist = self.get_hist_ops_name(graph, program) + if not ops_tobe_draw_hist: + return + + for var in program.list_vars(): + if var.name in self.quantized_act_var_name: + var.persistable = True + + # sample before collect histogram + batch_id = 0 + for data in self.data_loader(): + executor.run(program=program, + feed=data, + fetch_list=fetch_list, + return_numpy=False, + scope=scope) + batch_id += 1 + if batch_id >= self.batch_nums: + break + + pdf_names = [ + 'best_weight_hist_result.pdf', + 'best_act_hist_result.pdf', + 'worst_weight_hist_result.pdf', + 'worst_act_hist_result.pdf', + ] + for ops, save_pdf_name in zip(ops_tobe_draw_hist, pdf_names): + hist_data = self.collect_ops_histogram(scope, ops) + self.draw_pdf(hist_data, save_pdf_name) + + def draw_pdf(self, hist_data, save_pdf_name): + pdf_path = os.path.join(self.save_dir, save_pdf_name) + with PdfPages(pdf_path) as pdf: + for name in hist_data: + plt.hist(hist_data[name][0], bins=hist_data[name][1]) + plt.xlabel(name) + plt.ylabel("frequency") + plt.title("Hist of variable {}".format(name)) + plt.show() + pdf.savefig() + plt.close() + _logger.info('Histogram plot is saved in {}'.format(pdf_path)) diff --git a/paddleslim/quant/post_quant_hpo.py b/paddleslim/quant/post_quant_hpo.py index e92742d09..455e723d8 100755 --- a/paddleslim/quant/post_quant_hpo.py +++ b/paddleslim/quant/post_quant_hpo.py @@ -17,6 +17,7 @@ import sys import math import time +from time import gmtime, strftime import numpy as np import shutil import paddle @@ -28,19 +29,12 @@ import glob from scipy.stats import wasserstein_distance -# smac -from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ - UniformFloatHyperparameter, UniformIntegerHyperparameter -from smac.configspace import ConfigurationSpace -from smac.facade.smac_hpo_facade import SMAC4HPO -from smac.scenario.scenario import Scenario - from paddleslim.common import get_logger from paddleslim.quant import quant_post _logger = get_logger(__name__, level=logging.INFO) -SMAC_TMP_FILE_PATTERN = "smac3-output*" +SMAC_TMP_FILE_PATTERN = "smac3-output_" def remove(path): @@ -241,8 +235,9 @@ def eval_quant_model(): continue try: - out_float = standardization(out_float) - out_quant = standardization(out_quant) + if len(out_float) > 3: + out_float = standardization(out_float) + out_quant = standardization(out_quant) except: continue out_float_list.append(out_float) @@ -415,6 +410,18 @@ def quant_post_hpo( None """ + try: + os.system(' python -m pip install -U smac') + except: + from pip._internal import main + main(['install', 'smac']) + # smac + from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformFloatHyperparameter, UniformIntegerHyperparameter + from smac.configspace import ConfigurationSpace + from smac.facade.smac_hpo_facade import SMAC4HPO + from smac.scenario.scenario import Scenario + global g_quant_config g_quant_config = QuantConfig( executor, place, model_dir, quantize_model_path, algo, hist_percent, @@ -496,6 +503,9 @@ def quant_post_hpo( cs.add_hyperparameters(hyper_params) + s_datetime = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + smac_output_dir = SMAC_TMP_FILE_PATTERN + s_datetime + scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternative runtime) "runcount-limit": @@ -503,7 +513,9 @@ def quant_post_hpo( "cs": cs, # configuration space "deterministic": "True", "limit_resources": "False", - "memory_limit": 4096 # adapt this to reasonable value for your hardware + "memory_limit": + 4096, # adapt this to reasonable value for your hardware + "output_dir": smac_output_dir # output_dir }) # To optimize, we pass the function to the SMAC-object smac = SMAC4HPO( @@ -523,5 +535,5 @@ def quant_post_hpo( inc_value = smac.get_tae_runner().run(incumbent, 1)[1] _logger.info("Optimized Value: %.8f" % inc_value) shutil.rmtree(g_quant_model_cache_path) - remove(SMAC_TMP_FILE_PATTERN) + remove(smac_output_dir) _logger.info("Quantization completed.") diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py index 9e07c03c6..9f8ed323e 100755 --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -16,9 +16,14 @@ import copy import json import logging +import collections +import numpy as np import paddle +from paddle.fluid import core +from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import WeightQuantization from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass @@ -27,18 +32,17 @@ from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass from paddle.fluid.contrib.slim.quantization import OutScaleForInferencePass +from ..common import get_logger +_logger = get_logger(__name__, level=logging.INFO) + try: from paddle.fluid.contrib.slim.quantization import QuantizationTransformPassV2 from paddle.fluid.contrib.slim.quantization import QuantWeightPass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPassV2 except: - pass -from paddle.fluid import core -from paddle.fluid.contrib.slim.quantization import WeightQuantization -from paddle.fluid.layer_helper import LayerHelper - -from ..common import get_logger -_logger = get_logger(__name__, level=logging.INFO) + _logger.warning( + "Some functions fail to import, please update PaddlePaddle version to 2.3+" + ) WEIGHT_QUANTIZATION_TYPES = [ 'abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max' @@ -91,27 +95,54 @@ # if True, 'quantize_op_types' will be TENSORRT_OP_TYPES 'for_tensorrt': False, # if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES - 'is_full_quantize': False + 'is_full_quantize': False, + # if True, use onnx format to quant. + 'onnx_format': False, } -# TODO: Hard-code, remove it when Paddle 2.3.1 -class OutScaleForTrainingPassV2(OutScaleForTrainingPass): - def __init__(self, scope=None, place=None, moving_rate=0.9): - OutScaleForTrainingPass.__init__( - self, scope=scope, place=place, moving_rate=moving_rate) - - def _scale_name(self, var_name): +class OutScaleForInferencePassV2(object): + def __init__(self, scope=None): """ - Return the scale name for the var named `var_name`. + This pass is used for setting output scales of some operators. + These output scales may be used by tensorRT or some other inference engines. + + Args: + scope(fluid.Scope): The scope is used to initialize these new parameters. """ - return "%s@scale" % (var_name) + self._scope = scope + self._teller_set = utils._out_scale_op_list + def apply(self, graph): + """ + Get output scales from the scope and set these scales in op_descs + of operators in the teller_set. -# TODO: Hard-code, remove it when Paddle 2.3.1 -class OutScaleForInferencePassV2(OutScaleForInferencePass): - def __init__(self, scope=None): - OutScaleForInferencePass.__init__(self, scope=scope) + Args: + graph(IrGraph): the target graph. + """ + assert isinstance(graph, + IrGraph), 'graph must be the instance of IrGraph.' + collect_dict = collections.OrderedDict() + op_nodes = graph.all_op_nodes() + for op_node in op_nodes: + if op_node.name() in self._teller_set: + var_names = utils._get_op_output_var_names(op_node) + for var_name in var_names: + in_node = graph._find_node_by_name(op_node.outputs, + var_name) + if in_node.dtype() not in \ + [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: + continue + + collect_dict[var_name] = {} + scale_name = self._scale_name(var_name) + scale_var = self._scope.find_var(scale_name) + assert scale_var is not None, \ + "Can not find {} variable in the scope".format(scale_name) + scale_value = np.array(scale_var.get_tensor())[0] + collect_dict[var_name]['scale'] = float(scale_value) + return graph, collect_dict def _scale_name(self, var_name): """ @@ -222,7 +253,6 @@ def quant_aware(program, act_preprocess_func=None, optimizer_func=None, executor=None, - onnx_format=False, return_program=False, draw_graph=False): """Add quantization and dequantization operators to "program" @@ -236,7 +266,9 @@ def quant_aware(program, Default: None. scope(paddle.static.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use - `paddle.static.global_scope `_. When ``None`` will use `paddle.static.global_scope() `_ . Default: ``None``. + `paddle.static.global_scope `_. + When ``None`` will use `paddle.static.global_scope() `_ . + Default: ``None``. for_test(bool): If the 'program' parameter is a test program, this parameter should be set to ``True``. Otherwise, set to ``False``.Default: False weight_quantize_func(function): Function that defines how to quantize weight. Using this @@ -291,7 +323,8 @@ def quant_aware(program, elif op_type in QUANT_DEQUANT_PASS_OP_TYPES: quant_dequant_ops.append(op_type) if len(transform_pass_ops) > 0: - trannsform_func = 'QuantizationTransformPassV2' if onnx_format else 'QuantizationTransformPass' + trannsform_func = 'QuantizationTransformPassV2' if config[ + 'onnx_format'] else 'QuantizationTransformPass' transform_pass = eval(trannsform_func)( scope=scope, place=place, @@ -313,7 +346,8 @@ def quant_aware(program, transform_pass.apply(main_graph) if len(quant_dequant_ops) > 0: - qdq_func = 'AddQuantDequantPassV2' if onnx_format else 'AddQuantDequantPass' + qdq_func = 'AddQuantDequantPassV2' if config[ + 'onnx_format'] else 'AddQuantDequantPass' quant_dequant_pass = eval(qdq_func)( scope=scope, place=place, @@ -323,7 +357,7 @@ def quant_aware(program, quantizable_op_type=quant_dequant_ops) quant_dequant_pass.apply(main_graph) - out_scale_training_pass = OutScaleForTrainingPassV2( + out_scale_training_pass = OutScaleForTrainingPass( scope=scope, place=place, moving_rate=config['moving_rate']) out_scale_training_pass.apply(main_graph) @@ -356,8 +390,8 @@ def quant_post_static( data_loader=None, model_filename=None, params_filename=None, - save_model_filename='__model__', - save_params_filename='__params__', + save_model_filename='model.pdmodel', + save_params_filename='model.pdiparams', batch_size=1, batch_nums=None, scope=None, @@ -405,9 +439,9 @@ def quant_post_static( When all parameters are saved in a single file, set it as filename. If parameters are saved in separate files, set it as 'None'. Default : 'None'. - save_model_filename(str): The name of model file to save the quantized inference program. Default: '__model__'. + save_model_filename(str): The name of model file to save the quantized inference program. Default: 'model.pdmodel'. save_params_filename(str): The name of file to save all related parameters. - If it is set None, parameters will be saved in separate files. Default: '__params__'. + If it is set None, parameters will be saved in separate files. Default: 'model.pdiparams'. batch_size(int, optional): The batch size of DataLoader, default is 1. batch_nums(int, optional): If batch_nums is not None, the number of calibrate data is 'batch_size*batch_nums'. If batch_nums is None, use all data @@ -508,6 +542,22 @@ def quant_post_static( quantize_model_path, model_filename=save_model_filename, params_filename=save_params_filename) + if onnx_format: + try: + collect_dict = post_training_quantization._calibration_scales + save_quant_table_path = os.path.join(quantize_model_path, + 'calibration_table.txt') + with open(save_quant_table_path, 'w') as txt_file: + for tensor_name in collect_dict.keys(): + write_line = '{} {}'.format( + tensor_name, collect_dict[tensor_name]['scale']) + '\n' + txt_file.write(write_line) + _logger.info("Quantization clip ranges of tensors is save in: {}". + format(save_quant_table_path)) + except: + _logger.warning( + "Unable to generate `calibration_table.txt`, please update PaddlePaddle >= 2.3.3" + ) # We have changed the quant_post to quant_post_static. @@ -521,7 +571,7 @@ def convert(program, config=None, scope=None, save_int8=False, - onnx_format=False): + save_clip_ranges_path='./'): """ convert quantized and well-trained ``program`` to final quantized ``program``that can be used to save ``inference model``. @@ -543,6 +593,7 @@ def convert(program, save_int8: Whether to return ``program`` which model parameters' dtype is ``int8``. This parameter can only be used to get model size. Default: ``False``. + save_clip_ranges_path: If config.onnx_format=True, quantization clip ranges will be saved locally. Returns: Tuple : freezed program which can be used for inference. @@ -560,11 +611,22 @@ def convert(program, _logger.info("convert config {}".format(config)) test_graph = IrGraph(core.Graph(program.desc), for_test=True) - if onnx_format: + if config['onnx_format']: quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(test_graph) - else: out_scale_infer_pass = OutScaleForInferencePassV2(scope=scope) + _, collect_dict = out_scale_infer_pass.apply(test_graph) + save_quant_table_path = os.path.join(save_clip_ranges_path, + 'calibration_table.txt') + with open(save_quant_table_path, 'w') as txt_file: + for tensor_name in collect_dict.keys(): + write_line = '{} {}'.format( + tensor_name, collect_dict[tensor_name]['scale']) + '\n' + txt_file.write(write_line) + _logger.info("Quantization clip ranges of tensors is save in: {}". + format(save_quant_table_path)) + else: + out_scale_infer_pass = OutScaleForInferencePass(scope=scope) out_scale_infer_pass.apply(test_graph) # Freeze the graph after training by adjusting the quantize # operators' order for the inference. diff --git a/requirements.txt b/requirements.txt index 2f9bde468..d558b2216 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,6 @@ -#paddlepaddle == 1.6.0rc0 tqdm pyzmq matplotlib pillow pyyaml scikit-learn -smac -paddleslim-opt-tools diff --git a/setup.py b/setup.py index 0fa97fef9..11a8e9bca 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,22 @@ from __future__ import print_function import platform +import subprocess from setuptools import find_packages from setuptools import setup -slim_version = "2.3.0" +if 'develop' in subprocess.getoutput('git branch'): + slim_version = '0.0.0_dev' +else: + tag_list = subprocess.getoutput('git tag').split('\n') + if 'rc' in tag_list[-1]: + if tag_list[-1].split('-')[0] == tag_list[-2]: + slim_version = tag_list[-2] + else: + slim_version = tag_list[-1] + else: + slim_version = tag_list[-1] with open('./requirements.txt') as f: setup_requires = f.read().splitlines() diff --git a/tests/act/test_act_api.py b/tests/act/test_act_api.py index ec73d46e3..2b2a8b6c3 100644 --- a/tests/act/test_act_api.py +++ b/tests/act/test_act_api.py @@ -8,7 +8,8 @@ import numpy as np from paddle.io import Dataset from paddleslim.auto_compression import AutoCompression -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config +from paddleslim.common import load_inference_model, export_onnx class RandomEvalDataset(Dataset): @@ -120,5 +121,36 @@ def test_compress(self): ac.compress() +class TestLoadONNXModel(ACTBase): + def __init__(self, *args, **kwargs): + super(TestLoadONNXModel, self).__init__(*args, **kwargs) + os.system( + 'wget https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx') + self.model_dir = 'yolov5s.onnx' + + def test_compress(self): + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + _, _, _ = load_inference_model( + self.model_dir, + executor=exe, + model_filename='model.pdmodel', + params_filename='model.paiparams') + # reload model + _, _, _ = load_inference_model( + self.model_dir, + executor=exe, + model_filename='model.pdmodel', + params_filename='model.paiparams') + # convert onnx + export_onnx( + self.model_dir, + model_filename='model.pdmodel', + params_filename='model.paiparams', + save_file_path='output.onnx', + opset_version=13, + deploy_backend='tensorrt') + + if __name__ == '__main__': unittest.main() diff --git a/tests/act/test_demo.py b/tests/act/test_demo.py new file mode 100644 index 000000000..635d8a016 --- /dev/null +++ b/tests/act/test_demo.py @@ -0,0 +1,69 @@ +import os +import sys +import unittest +sys.path.append("../../") +import paddle +from PIL import Image +from paddle.vision.datasets import DatasetFolder +from paddle.vision.transforms import transforms +from paddleslim.auto_compression import AutoCompression +paddle.enable_static() + + +class ImageNetDataset(DatasetFolder): + def __init__(self, path, image_size=224): + super(ImageNetDataset, self).__init__(path) + normalize = transforms.Normalize( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) + self.transform = transforms.Compose([ + transforms.Resize(256), transforms.CenterCrop(image_size), + transforms.Transpose(), normalize + ]) + + def __getitem__(self, idx): + img_path, _ = self.samples[idx] + return self.transform(Image.open(img_path).convert('RGB')) + + def __len__(self): + return len(self.samples) + + +class ACTDemo(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(ACTDemo, self).__init__(*args, **kwargs) + os.system( + 'wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' + ) + os.system('tar -xf MobileNetV1_infer.tar') + os.system( + 'wget https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz' + ) + os.system('tar -xf ILSVRC2012_data_demo.tar.gz') + + def test_demo(self): + train_dataset = ImageNetDataset( + "./ILSVRC2012_data_demo/ILSVRC2012/train/") + image = paddle.static.data( + name='inputs', shape=[None] + [3, 224, 224], dtype='float32') + train_loader = paddle.io.DataLoader( + train_dataset, feed_list=[image], batch_size=32, return_list=False) + + ac = AutoCompression( + model_dir="./MobileNetV1_infer", + model_filename="inference.pdmodel", + params_filename="inference.pdiparams", + save_dir="MobileNetV1_quant", + config={ + 'Quantization': {}, + "HyperParameterOptimization": { + 'ptq_algo': ['avg'], + 'max_quant_count': 3 + } + }, + train_dataloader=train_loader, + eval_dataloader=train_loader) + ac.compress() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_prune_walker.py b/tests/test_prune_walker.py index e0375c8b6..f2f2d2fc1 100644 --- a/tests/test_prune_walker.py +++ b/tests/test_prune_walker.py @@ -473,9 +473,9 @@ def define_layer(self, input): def set_cases(self): weight_var = self.graph.var('conv1.w_0') - self.cases.append((self.in_var, 1, {'conv1.w_0': [0, 1]})) - self.cases.append((self.out_var, 1, {'conv1.w_0': [0, 1]})) - self.cases.append((weight_var, 0, {'conv1.w_0': [1]})) + self.cases.append((self.in_var, 1, {'conv1.w_0': [0]})) + self.cases.append((self.out_var, 1, {'conv1.w_0': [0]})) + self.cases.append((weight_var, 0, {'conv1.w_0': [0]})) def test_prune(self): self.check_in_out() diff --git a/tests/test_quant_post.py b/tests/test_quant_post.py index 39c3a2fe0..31eed36e2 100644 --- a/tests/test_quant_post.py +++ b/tests/test_quant_post.py @@ -132,8 +132,8 @@ def test(program, outputs=[avg_cost, acc_top1, acc_top5]): quant_post_prog, feed_target_names, fetch_targets = paddle.fluid.io.load_inference_model( dirname='./test_quant_post_inference', executor=exe, - model_filename='__model__', - params_filename='__params__') + model_filename='model.pdmodel', + params_filename='model.pdiparams') top1_2, top5_2 = test(quant_post_prog, fetch_targets) print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))