diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 8ff2c10cc50..270a245bb65 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -45,7 +45,7 @@ parameters: - ssd_mobilenet_v1_ckpt # - ssd_resnet50_v1_ckpt - inception_v1 - - resnet50_fashion + # - resnet50_fashion - darknet19 - densenet-121 - resnet-101 @@ -156,7 +156,8 @@ stages: cd ${OUT_SCRIPT_PATH} mkdir generated mkdir last_generated - python -u collect_log_all.py --logs_dir $(OUT_SCRIPT_PATH) --output_dir generated + pip install requests + python -u collect_log_all.py --logs_dir $(OUT_SCRIPT_PATH) --output_dir generated --build_id=$(Build.BuildId) displayName: "Collect all logs" - task: DownloadPipelineArtifact@2 continueOnError: true diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh index b15da8c91b3..938c5ecdc6c 100644 --- a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh +++ b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh @@ -10,13 +10,13 @@ pip install -r /neural-compressor/requirements.txt pip install torch==1.12.0 python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto \ ---ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \ +--ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \ > $log_dir/pylint.json exit_code=$? $BOLD_YELLOW && echo " ----------------- Current pylint cmd start --------------------------" && $RESET -echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json" +echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json" $BOLD_YELLOW && echo " ----------------- Current pylint cmd end --------------------------" && $RESET $BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" && $RESET diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt similarity index 96% rename from .azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt rename to .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 1a6ac05ed08..24535777e79 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -55,6 +55,7 @@ amazonlinux Amodei AmpConf AMX +amx analytics Analytics Anastasiia @@ -149,6 +150,7 @@ berts bertsquad BertTokenizer bfloat +blockwise BFP BGR Bianchi @@ -326,6 +328,7 @@ convolutional Convolutional ConvPerStage ConvReLU +cooldown copt coreml CoreML @@ -546,6 +549,7 @@ ensp entrypoint enum env +environ eq erf Erf @@ -696,6 +700,7 @@ Goyal gpg GPG gpt +GPTJ gpu gpus GPUs @@ -738,6 +743,7 @@ horovodrun hostfile Hounsfield howpublished +hyp HqEgzS href html @@ -787,6 +793,7 @@ IML impl ImportError IMS +ibean inceptionresnetv InceptionResNetV inceptionv @@ -831,6 +838,7 @@ ipc ipex IPEX ipynb +ipynbrun ipython ir irv @@ -843,6 +851,7 @@ IssueQueryThreads iter IteratorGetNext iters +intrinsics Jäger jemalloc Jens @@ -1173,6 +1182,7 @@ ngatang NGPUS ngram NHWC +ni NIC nifti niftis @@ -1234,8 +1244,11 @@ nvidia NVIDIA NVIDIA's nvme +nw Nx +NxM nyu +oc ok ol Omer @@ -1245,6 +1258,7 @@ oneapi oneAPI onednn oneDNN +oneshot onlinedocs onnx ONNX @@ -1783,6 +1797,7 @@ TestSettings tf TF TFBertForSequenceClassification +tfhub tflite tfp tfrecord @@ -1878,6 +1893,7 @@ UI UID uint uk +ultralytics un uncomment uncompress @@ -1888,6 +1904,7 @@ unidecode uniq unittest unref +unscale unsqueeze unstack upenn @@ -2114,6 +2131,7 @@ tensorrt hardwares BenchmarkConf PruningConf +Pruning's DistillationConf grey ModelZoo @@ -2379,3 +2397,66 @@ grappler amsgrad qoperator apis +CPz +PostTrainingQuantConfig +dgpu +Nsh +UmK +fe +vmware +keepachangelog +vscode +IntelNeuralCompressor +SettingsPython +VSCode +argparse +autoEnabling +clickAuto +clickEnable +clickSetting +connectSSH +enableHistory +historyDetail +itemName +leftIcon +outPut +settingPath +topRight +visualstudio +amodio +dbaeumer +dropdown +eslint +registerCommand +tsl +viewlet +PythonLauncher +BigDL +BigDLNanoSupport +Nano +bigdl +inferenceoptimizer +nano +SageMaker +bb +beba +ccdb +ceba +deeb +ebbce +efe +npmjs +AWSSageMakerSupport +sagemaker +xpu +dgpu +BenchmarkConfig +QuantizationAwareTrainingConfig +Startup +doesn +startup +Ajanthan +WeightPruningConfig +Namhoon +Thalaiyasingam +Torr diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/pyspelling_conf.yaml b/.azure-pipelines/scripts/codeScan/pyspelling/pyspelling_conf.yaml index 209e60ee4dd..3cf19530020 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/pyspelling_conf.yaml +++ b/.azure-pipelines/scripts/codeScan/pyspelling/pyspelling_conf.yaml @@ -4,8 +4,8 @@ matrix: d: en_US.ISO8859-15 dictionary: wordlists: - - ${DICT_DIR}/lpot_dict.txt - output: ${DICT_DIR}/lpot_dict.dic + - ${DICT_DIR}/inc_dict.txt + output: ${DICT_DIR}/inc_dict.dic sources: - ${REPO_DIR}/docs/source/*.md - ${REPO_DIR}/*.md diff --git a/.azure-pipelines/scripts/models/collect_log_all.py b/.azure-pipelines/scripts/models/collect_log_all.py index fb9db0d6721..85d47cf89e9 100644 --- a/.azure-pipelines/scripts/models/collect_log_all.py +++ b/.azure-pipelines/scripts/models/collect_log_all.py @@ -1,9 +1,11 @@ import argparse import os +import requests parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--logs_dir", type=str, default=".") parser.add_argument("--output_dir", type=str, default=".") +parser.add_argument("--build_id", type=str, default="0") args = parser.parse_args() print(args) @@ -12,6 +14,7 @@ def main(): file_dir = args.logs_dir summary_content = ['OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n'] tuning_info_content = ['OS;Platform;Framework;Version;Model;Strategy;Tune_time\n'] + url_dict = parse_download_url() # get full path of all files for root, dirs, files in os.walk(file_dir): for name in files: @@ -19,13 +22,13 @@ def main(): print(file_name) if '_summary.log' in name: for line in open(file_name, "r"): - # print(line) if 'linux' in line: + line = line.replace("", parse_summary_log(line, url_dict)) summary_content.append(line) if '_tuning_info.log' in name: for line in open(file_name, "r"): - # print(line) if 'linux' in line: + line = line.replace("", parse_tuning_log(line, url_dict)) tuning_info_content.append(line) f = open(args.output_dir + '/summary.log', "a") for summary in summary_content: @@ -35,5 +38,39 @@ def main(): f2.writelines(str(tuning_info)) +def parse_tuning_log(line, url_dict): + """Parsing {Framework}-{Model}-tune.log to get tuning result""" + result = line.split(";") + OS, Platform, Framework, Version, Model, Strategy, Tune_time, Tuning_trials, URL, __ = result + file_name = f"{Framework}-{Model}-tune.log" + download_url = url_dict.get(f"{Framework}_{Model}") + download_url = f"{download_url}{file_name}" + return download_url + + +def parse_summary_log(line, url_dict): + """Parse {Framework}-{Model}-tune.log to get benchmarking accuracy result""" + result = line.split(";") + OS, Platform, Framework, Version, Precision, Model, Mode, Type, BS, Value, Url = result + file_name = f"{Framework}-{Model}-tune.log" + download_url = url_dict.get(f"{Framework}_{Model}") + download_url = f"{download_url}{file_name}" + return download_url + + +def parse_download_url(): + """Get azure artifact information""" + azure_artifact_api_url = f'https://dev.azure.com/lpot-inc/neural-compressor/_apis/build/builds/{args.build_id}/artifacts?api-version=5.1' + azure_artifacts_data = dict(requests.get(azure_artifact_api_url).json().items()) + artifact_count = azure_artifacts_data.get("count") + artifact_value = azure_artifacts_data.get("value") + url_dict = {} + for item in artifact_value: + artifact_download_url = item.get("resource").get("downloadUrl") + artifact_download_url = f"{artifact_download_url[:-3]}file&subPath=%2F" + url_dict[item.get("name")] = artifact_download_url + return url_dict + + if __name__ == '__main__': main() diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index b26adf52bd6..c73623fa1d0 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -133,9 +133,9 @@ def collect_log(): parse_tuning_line(line, tmp) print(tmp) - results.append('{};{};{};{};FP32;{};Inference;Accuracy;1;{};{}\n'.format(OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['fp32_acc'], URL)) - results.append('{};{};{};{};INT8;{};Inference;Accuracy;1;{};{}\n'.format(OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['int8_acc'], URL)) - tuning_infos.append(';'.join([OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['strategy'], str(tmp['tune_time']), str(tmp['tuning_trials']), URL, f"{round(tmp['max_mem_size'] / tmp['total_mem_size'] * 100, 4)}%"])+'\n') + results.append('{};{};{};{};FP32;{};Inference;Accuracy;1;{};{}\n'.format(OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['fp32_acc'], "")) + results.append('{};{};{};{};INT8;{};Inference;Accuracy;1;{};{}\n'.format(OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['int8_acc'], "")) + tuning_infos.append(';'.join([OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['strategy'], str(tmp['tune_time']), str(tmp['tuning_trials']), "", f"{round(tmp['max_mem_size'] / tmp['total_mem_size'] * 100, 4)}%"])+'\n') # get model benchmark results for precision in ['int8', 'fp32']: throughput = 0.0 diff --git a/.azure-pipelines/scripts/models/generate_report.sh b/.azure-pipelines/scripts/models/generate_report.sh index 9271008d2e8..714c718d2f9 100644 --- a/.azure-pipelines/scripts/models/generate_report.sh +++ b/.azure-pipelines/scripts/models/generate_report.sh @@ -198,7 +198,7 @@ function generate_html_core { printf("%.2f", target); }else if(target < 1) { printf("%.2f", target); - job_status = "fail" + perf_status = "fail" }else{ printf("%.2f", target); } @@ -233,11 +233,11 @@ function generate_html_core { printf("%.2f %", status_png, target*100); } else { target = new_result / previous_result; - if(target <= 1.104 && target >= 0.895) { + if(target <= 1.054 && target >= 0.945) { status_png = "background-color:#90EE90"; } else { status_png = "background-color:#FFD2D2"; - job_status = "fail" + perf_status = "fail" } printf("%.2f", status_png, target); } @@ -265,7 +265,7 @@ function generate_html_core { status_png = "background-color:#90EE90"; } else { status_png = "background-color:#FFD2D2"; - job_status = "fail" + ratio_status = "fail" } printf("%.2f", status_png, target); } else { @@ -273,7 +273,7 @@ function generate_html_core { printf(""); } else { if (new_result == nan) { - job_status = "fail" + ratio_status = "fail" status_png = "background-color:#FFD2D2"; printf("", status_png); } else { @@ -285,6 +285,8 @@ function generate_html_core { BEGIN { job_status = "pass" + perf_status = "pass" + ratio_status = "pass" // issue list jira_mobilenet = "https://jira01.devtools.intel.com/browse/PADDLEQ-384"; jira_resnext = "https://jira01.devtools.intel.com/browse/PADDLEQ-387"; @@ -378,8 +380,11 @@ function generate_html_core { printf("\n"); + status = (perf_status == "fail" && ratio_status == "fail") ? "fail" : "pass" + status = (job_status == "fail") ? "fail" : status + } END{ - printf("\n%s", job_status); + printf("\n%s", status); } ' >> ${output_dir}/report.html job_state=$(tail -1 ${WORKSPACE}/report.html) diff --git a/.azure-pipelines/scripts/ut/collect_log.sh b/.azure-pipelines/scripts/ut/collect_log.sh index 58900c280d4..2eb80ac1633 100644 --- a/.azure-pipelines/scripts/ut/collect_log.sh +++ b/.azure-pipelines/scripts/ut/collect_log.sh @@ -1,10 +1,13 @@ +source /neural-compressor/.azure-pipelines/scripts/change_color.sh + pip install coverage export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file coverage_log="/neural-compressor/log_dir/coverage_log" coverage_log_base="/neural-compressor/log_dir/coverage_log_base" coverage_compare="/neural-compressor/log_dir/coverate_compare.html" cd /neural-compressor/log_dir -echo "collect coverage for PR branch" + +$BOLD_YELLOW && echo "collect coverage for PR branch" && $RESET mkdir -p coverage_PR cp ut-coverage-adaptor/.coverage.adaptor ./coverage_PR/ cp ut-coverage-tfnewapi/.coverage.tfnewapi ./coverage_PR/ @@ -18,7 +21,8 @@ coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log} coverage html -d log_dir/coverage_PR/htmlcov --rcfile=${COVERAGE_RCFILE} coverage xml -o log_dir/coverage_PR/coverage.xml --rcfile=${COVERAGE_RCFILE} ls -l log_dir/coverage_PR/htmlcov -echo "collect coverage for baseline" + +$BOLD_YELLOW && echo "collect coverage for baseline" && $RESET coverage erase cd /neural-compressor/log_dir mkdir -p coverage_base @@ -34,28 +38,28 @@ coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_base} coverage html -d log_dir/coverage_base/htmlcov --rcfile=${COVERAGE_RCFILE} coverage xml -o log_dir/coverage_base/coverage.xml --rcfile=${COVERAGE_RCFILE} ls -l log_dir/coverage_base/htmlcov -echo "compare coverage" + +$BOLD_YELLOW && echo "compare coverage" && $RESET coverage_PR_total=$(cat ${coverage_log} | grep TOTAL | awk '{print $NF}' | sed "s|%||g") coverage_base_total=$(cat ${coverage_log_base} | grep TOTAL | awk '{print $NF}' | sed "s|%||g") -echo "clear upload path" + +$BOLD_YELLOW && echo "clear upload path" && $RESET rm -fr log_dir/coverage_PR/.coverage* rm -fr log_dir/coverage_base/.coverage* rm -fr log_dir/ut-coverage-* if [[ ${coverage_PR_total} -lt ${coverage_base_total} ]]; then - decreate=$(($coverage_PR_total - $coverage_base_total)) - rate=$(awk 'BEGIN{printf "%.2f%\n",'$decreate/100'}') - echo "Unit Test failed with covereage decrese ${rate}%" - echo "compare coverage to give detail info" + decrease=$(($coverage_PR_total - $coverage_base_total)) + rate=$(awk 'BEGIN{printf "%.2f%\n",'$decrease/100'}') + $BOLD_RED && echo "Unit Test failed with coverage decrease ${rate}%" && $RESET + $BOLD_RED && echo "compare coverage to give detail info" && $RESET bash -x /neural-compressor/.azure-pipelines/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "FAILED" exit 1 else - echo "Unit Test success with coverage ${coverage_PR_total}%" - echo "compare coverage to give detail info" + $BOLD_GREEN && echo "Unit Test success with coverage ${coverage_PR_total}%" && $RESET + $BOLD_GREEN && echo "compare coverage to give detail info" && $RESET bash -x /neural-compressor/.azure-pipelines/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "SUCCESS" #sed "1i\Unit Test success with coverage ${coverage_PR_total}\n" ${coverage_log} fi - #rm -r ${coverage_log} #rm -r ${coverage_log_base} - diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh index 6a9fd879fad..0aaedcf169b 100644 --- a/.azure-pipelines/scripts/ut/env_setup.sh +++ b/.azure-pipelines/scripts/ut/env_setup.sh @@ -20,7 +20,7 @@ echo "mxnet version is $mxnet_version" if [[ "${tensorflow_version}" == *"-official" ]]; then pip install tensorflow==${tensorflow_version%-official} elif [[ "${tensorflow_version}" == "spr-base" ]]; then - pip install /tf_dataset/tf_binary/221125/tensorflow*.whl + pip install /tf_dataset/tf_binary/221212/tensorflow*.whl if [[ $? -ne 0 ]]; then exit 1 fi @@ -28,7 +28,10 @@ elif [[ "${tensorflow_version}" != "" ]]; then pip install intel-tensorflow==${tensorflow_version} fi -if [[ "${itex_version}" != "" ]]; then +if [[ "${itex_version}" == "nightly" ]]; then + pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow-1.1.0-cp38-cp38-linux_x86_64.whl + pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow_lib-1.1.0.0-cp38-cp38-linux_x86_64.whl +elif [[ "${itex_version}" != "" ]]; then pip install --upgrade intel-extension-for-tensorflow[cpu]==${itex_version} fi diff --git a/.azure-pipelines/scripts/ut/run_basic_adaptor.sh b/.azure-pipelines/scripts/ut/run_basic_adaptor.sh index d9a9fd2d990..d5510bbd177 100644 --- a/.azure-pipelines/scripts/ut/run_basic_adaptor.sh +++ b/.azure-pipelines/scripts/ut/run_basic_adaptor.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic adaptor" @@ -23,11 +22,13 @@ ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.adaptor -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh b/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh index 2b687e633d3..ebd861efeb2 100644 --- a/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh +++ b/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic adaptor tfnewapi" @@ -19,11 +18,13 @@ ut_log_name=${LOG_DIR}/ut_tf_newapi.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.tfnewapi -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_ipex.sh b/.azure-pipelines/scripts/ut/run_basic_ipex.sh index 9e22bc01be3..edc2b5d3aeb 100644 --- a/.azure-pipelines/scripts/ut/run_basic_ipex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_ipex.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic ipex" @@ -20,11 +19,13 @@ ut_log_name=${LOG_DIR}/ut_ipex.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.ipex -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh index da9b9923ce9..c937992b7be 100644 --- a/.azure-pipelines/scripts/ut/run_basic_itex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_itex.sh @@ -1,10 +1,9 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic itex" echo "specify fwk version..." -export itex_version='1.0.0' +export itex_version='nightly' export tensorflow_version='2.10.0-official' echo "set up UT env..." @@ -19,11 +18,13 @@ ut_log_name=${LOG_DIR}/ut_itex.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.itex -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_others.sh b/.azure-pipelines/scripts/ut/run_basic_others.sh index 9789802a75a..60ae3f09bf6 100644 --- a/.azure-pipelines/scripts/ut/run_basic_others.sh +++ b/.azure-pipelines/scripts/ut/run_basic_others.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic others" @@ -23,17 +22,23 @@ sed -i '/ neural_coder\//d' run.sh sed -i '/ ipex\//d' run.sh sed -i '/ itex\//d' run.sh +echo "copy model for dynas..." +mkdir -p .torch/ofa_nets || true +cp -r /tf_dataset/ut-localfile/ofa_mbv3_d234_e346_k357_w1.2 .torch/ofa_nets || true + LOG_DIR=/neural-compressor/log_dir mkdir -p ${LOG_DIR} ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.others -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_ncoder.sh b/.azure-pipelines/scripts/ut/run_ncoder.sh index aef05d13e3f..bb3e3212494 100644 --- a/.azure-pipelines/scripts/ut/run_ncoder.sh +++ b/.azure-pipelines/scripts/ut/run_ncoder.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run coder" @@ -15,9 +14,12 @@ ut_log_name=${LOG_DIR}/ut_neural_coder.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} +echo "------UT end -------" if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_ux.sh b/.azure-pipelines/scripts/ut/run_ux.sh index ceb1c7fcefd..e7041cbacce 100644 --- a/.azure-pipelines/scripts/ut/run_ux.sh +++ b/.azure-pipelines/scripts/ut/run_ux.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run ux" @@ -21,9 +20,12 @@ ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} +echo "------UT end -------" if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/ut-basic.yml b/.azure-pipelines/ut-basic.yml index bdeb6dd6d6f..b57b97b8b62 100644 --- a/.azure-pipelines/ut-basic.yml +++ b/.azure-pipelines/ut-basic.yml @@ -11,6 +11,7 @@ pr: - neural_compressor - test - setup.py + - .azure-pipelines/scripts/ut exclude: - neural_compressor/ux - test/ux @@ -194,6 +195,7 @@ stages: displayName: 'collect logs' - task: PublishPipelineArtifact@1 + condition: succeededOrFailed() inputs: targetPath: $(UPLOAD_PATH) artifact: $(ARTIFACT_NAME) @@ -205,4 +207,4 @@ stages: targetType: "inline" script: | docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true" - displayName: "Docker clean up" \ No newline at end of file + displayName: "Docker clean up" diff --git a/.azure-pipelines/ut-ncoder.yml b/.azure-pipelines/ut-ncoder.yml index 16ee8cdb6ba..9663aeae21d 100644 --- a/.azure-pipelines/ut-ncoder.yml +++ b/.azure-pipelines/ut-ncoder.yml @@ -11,6 +11,7 @@ pr: - neural_coder - test/neural_coder - setup.py + - .azure-pipelines/scripts/ut pool: ICX-16C @@ -29,4 +30,3 @@ stages: utScriptFileName: 'run_ncoder' uploadPath: $(UPLOAD_PATH) utArtifact: 'ut-ncoder' - diff --git a/.azure-pipelines/ut-ux.yml b/.azure-pipelines/ut-ux.yml index 2b79fb4b9b4..25b5e5a94c0 100644 --- a/.azure-pipelines/ut-ux.yml +++ b/.azure-pipelines/ut-ux.yml @@ -11,6 +11,7 @@ pr: - neural_compressor/ux - test/ux - setup.py + - .azure-pipelines/scripts/ut pool: ICX-16C diff --git a/.gitignore b/.gitignore index d039b651c1f..509d3f1d1a9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .idea /venv/ */__pycache__ +.ipynb_checkpoints/ *.snapshot *.csv *.pb @@ -17,4 +18,4 @@ build/ _build lpot_workspace/ .torch/ -node_modules \ No newline at end of file +node_modules diff --git a/README.md b/README.md index 0fa14c497e6..dee2976da37 100644 --- a/README.md +++ b/README.md @@ -226,13 +226,15 @@ Intel® Neural Compressor validated 420+ [examples](./examples) for quantization ## Selected Publications/Events +* [Intel together with Tencent deepens the cooperation to build a cloud foundation for digital and intelligent industry](https://mp.weixin.qq.com/s/CPz9-5Nsh-5N9Q8-UmK--w) (Dec 2022) +* [Running Fast Transformers on CPUs: Intel Approach Achieves Significant Speed Ups and SOTA Performance](https://medium.com/syncedreview/running-fast-transformers-on-cpus-intel-approach-achieves-significant-speed-ups-and-sota-448521704c5e) (Nov 2022) +* [Intel Neural Compressor for TF Virtual Appliance packaged by Bitnami](https://marketplace.cloud.vmware.com/services/details/e9c3d891-ca51-4f07-a5aa-3fe6394f15ae) (Nov 2022) * [Neural Compressor: an open-source Python library for network compression](https://cloud.tencent.com/developer/article/2165895) (Nov 2022) * [Running Fast Transformers on CPUs: Intel Approach Achieves Significant Speed Ups and SOTA Performance](https://medium.com/syncedreview/running-fast-transformers-on-cpus-intel-approach-achieves-significant-speed-ups-and-sota-448521704c5e) (Nov 2022) * [Personalized Stable Diffusion with Few-Shot Fine-Tuning](https://medium.com/intel-analytics-software/personalized-stable-diffusion-with-few-shot-fine-tuning-on-a-single-cpu-f01a3316b13) (Nov 2022) +* [Fast DistilBERT on CPUs](https://arxiv.org/abs/2211.07715) (Oct 2022) * [Meet the Innovation of Intel AI Software: Intel® Extension for TensorFlow*](https://www.intel.com/content/www/us/en/developer/articles/technical/innovation-of-ai-software-extension-tensorflow.html) (Oct 2022) * [PyTorch* Inference Acceleration with Intel® Neural Compressor](https://www.intel.com/content/www/us/en/developer/articles/technical/pytorch-inference-with-intel-neural-compressor.html#gs.gnq0cj) (Oct 2022) -* Neural Coder, a new plug-in for Intel Neural Compressor was covered by [Twitter](https://twitter.com/IntelDevTools/status/1583629213697212416), [LinkedIn](https://www.linkedin.com/posts/intel-software_oneapi-ai-deeplearning-activity-6989377309917007872-Dbzg?utm_source=share&utm_medium=member_desktop), and [Intel Developer Zone](https://mp.weixin.qq.com/s/LL-4eD-R0YagFgODM23oQA) from Intel, and [Twitter](https://twitter.com/IntelDevTools/status/1583629213697212416/retweets) and [LinkedIn](https://www.linkedin.com/feed/update/urn:li:share:6990377841435574272/) from Hugging Face. (Oct 2022) -* Intel Neural Compressor successfully landed on [GCP](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [AWS](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel) marketplace. (Oct 2022) > View our [full publication list](./docs/source/publication_list.md). diff --git a/docs/source/NAS.md b/docs/source/NAS.md index 98eac4d8217..8ad4a43554a 100644 --- a/docs/source/NAS.md +++ b/docs/source/NAS.md @@ -81,7 +81,7 @@ class NASBase(object): def search(self, res_save_path=None): # NAS search process. - ... + ... def estimate(self, model): # pragma: no cover # Estimate performance of the model. Depends on specific NAS algorithm. @@ -175,3 +175,5 @@ Following examples are supported in Intel® Neural Compressor: - DyNAS MobileNetV3 supernet Example: - [DyNAS MobileNetV3 supernet Example](../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset. +- DyNAS Transformer LT supernet Example: + - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset. diff --git a/docs/source/_static/imgs/pruning/Pruning_patterns.JPG b/docs/source/_static/imgs/pruning/Pruning_patterns.JPG new file mode 100644 index 00000000000..38c061489c8 Binary files /dev/null and b/docs/source/_static/imgs/pruning/Pruning_patterns.JPG differ diff --git a/docs/source/_static/imgs/pruning/Pruning_patterns.PNG b/docs/source/_static/imgs/pruning/Pruning_patterns.PNG new file mode 100644 index 00000000000..0bb10d43906 Binary files /dev/null and b/docs/source/_static/imgs/pruning/Pruning_patterns.PNG differ diff --git a/docs/source/_static/imgs/pruning/Pruning_schedule.JPG b/docs/source/_static/imgs/pruning/Pruning_schedule.JPG new file mode 100644 index 00000000000..9e5063381a1 Binary files /dev/null and b/docs/source/_static/imgs/pruning/Pruning_schedule.JPG differ diff --git a/docs/source/_static/imgs/pruning/Regularization.JPG b/docs/source/_static/imgs/pruning/Regularization.JPG new file mode 100644 index 00000000000..94de6c74816 Binary files /dev/null and b/docs/source/_static/imgs/pruning/Regularization.JPG differ diff --git a/docs/source/_static/imgs/pruning/pruning.PNG b/docs/source/_static/imgs/pruning/pruning.PNG new file mode 100644 index 00000000000..0c6c53295ab Binary files /dev/null and b/docs/source/_static/imgs/pruning/pruning.PNG differ diff --git a/docs/source/_static/imgs/pruning/pruning_criteria.PNG b/docs/source/_static/imgs/pruning/pruning_criteria.PNG new file mode 100644 index 00000000000..a91fcbabb5f Binary files /dev/null and b/docs/source/_static/imgs/pruning/pruning_criteria.PNG differ diff --git a/docs/source/_static/imgs/pruning/pruning_patterns.png b/docs/source/_static/imgs/pruning/pruning_patterns.png index 872c6cf8b35..d453622ed5a 100644 Binary files a/docs/source/_static/imgs/pruning/pruning_patterns.png and b/docs/source/_static/imgs/pruning/pruning_patterns.png differ diff --git a/docs/source/_static/imgs/pruning/pruning_schedule.PNG b/docs/source/_static/imgs/pruning/pruning_schedule.PNG new file mode 100644 index 00000000000..abd07603d5d Binary files /dev/null and b/docs/source/_static/imgs/pruning/pruning_schedule.PNG differ diff --git a/docs/source/_static/imgs/pruning/regularization.PNG b/docs/source/_static/imgs/pruning/regularization.PNG new file mode 100644 index 00000000000..2feb6ae276e Binary files /dev/null and b/docs/source/_static/imgs/pruning/regularization.PNG differ diff --git a/docs/source/dataloader.md b/docs/source/dataloader.md index e89a79f4a5f..fab91d72368 100644 --- a/docs/source/dataloader.md +++ b/docs/source/dataloader.md @@ -100,7 +100,7 @@ calib_data = mx.io.ImageRecordIter(path_imgrec=dataset, ctx=args.ctx, **combine_mean_std) -from neural_compressor import Quantization, common +from neural_compressor.experimental import Quantization, common quantizer = Quantization('conf.yaml') quantizer.model = fp32_model quantizer.calib_dataloader = calib_data diff --git a/docs/source/dataset.md b/docs/source/dataset.md index b92bb828b9f..8d51cbdf723 100644 --- a/docs/source/dataset.md +++ b/docs/source/dataset.md @@ -96,7 +96,7 @@ class Dataset(object): After defining the dataset class, pass it to the quantizer: ```python -from neural_compressor import Quantization, common +from neural_compressor.experimental import Quantization, common quantizer = Quantization(yaml_file) quantizer.calib_dataloader = common.DataLoader(dataset) # user can pass more optional args to dataloader such as batch_size and collate_fn quantizer.model = graph diff --git a/docs/source/pruning.md b/docs/source/pruning.md index 89e6567737e..fe951fc98e4 100644 --- a/docs/source/pruning.md +++ b/docs/source/pruning.md @@ -32,7 +32,7 @@ Neural network pruning (briefly known as pruning or sparsity) is one of the most Pruning patterns defines the rules of pruned weights' arrangements in space. - Sparsity Pattern + Sparsity Pattern diff --git a/docs/source/pruning_details.md b/docs/source/pruning_details.md new file mode 100644 index 00000000000..e55cd4fdca3 --- /dev/null +++ b/docs/source/pruning_details.md @@ -0,0 +1,316 @@ +Pruning details + +============ + + + + + + +1. [Introduction](#introduction) + + + + + + +>>>[Neural Network Pruning](#neural-network-pruning) + + + + + + +>>>[Pruning Patterns](#pruning-patterns) + + + + + + +>>>[Pruning Criteria](#pruning-criteria) + + + + + + +>>>[Pruning Schedule](#pruning-schedule) + + + + + + +>>>[Pruning Type](#pruning-type) + + + + + + +>>>[Regularization](#regularization) + + + + + + + + +2. [Pruning examples](#examples) + + + + + + +3. [Reference](#reference) + + + + + + +## Introduction + + + + + + +### Neural Network Pruning + +Neural network pruning is a promising model compression technique that removes the least important parameters in the network and achieves compact architectures with minimal accuracy drop and maximal inference acceleration. As state-of-the-art model sizes have grown at an unprecedented speed, pruning has become increasingly crucial for reducing the computational and memory footprint that huge neural networks require. + + + + + + + +### Pruning Patterns + + + + + +- Unstructured Pruning + + + + + +Unstructured pruning means pruning the least salient connections in the model. The nonzero patterns are irregular and could be anywhere in the matrix. + + + + + +- Structured Pruning + + + + + +Structured pruning means pruning parameters in groups and deleting entire blocks, filters, or channels according to some pruning criterions. In general, structured pruning leads to lower accuracy due to restrictive structure compared to unstructured pruning but it can significantly accelerate the model execution as it fits better with hardware designs. + + + + + + + + + +### Pruning Criteria + + + + + + +Pruning criteria determines how should the weights of a neural network be scored and pruned. The magnitude and gradient are widely used to score the weights. + + + + + +- Magnitude + + + + + + The algorithm prunes the weight by the lowest absolute value at each layer with given sparsity target. + + + + + +- Gradient + + + + + The algorithm prunes the weight by the lowest gradient value at each layer with given sparsity target. + + + + +- SNIP + + + + + + The algorithm prunes the dense model at its initialization, by analyzing the weights' effect to the loss function when they are masked. Please refer to the original [paper](https://arxiv.org/abs/1810.02340) for details + + + + + +- SNIP with momentum + + + + + + The algorithm improves original SNIP algorithms and introduces weights' score maps which updates in a momentum way.\ + + In the following formula, $n$ is the pruning step and $W$ and $G$ are model's weights and gradients respectively. + + $$Score_{n} = 1.0 \times Score_{n-1} + 0.9 \times |W_{n} \times G_{n}|$$ + + + + + + +### Pruning Schedule + + + + + +Pruning schedule defines the way the model reach the target sparsity (the ratio of pruned weights). + + + + + +- One-shot Pruning + + + + + + One-shot pruning means the model is pruned to its target sparsity with one single step. This pruning method often works at model's initialization step. It can easily cause accuracy drop, but save much training time. + + + + + + +- Iterative Pruning + + + + + + Iterative pruning means the model is gradually pruned to its target sparsity during a training process. The pruning process contains several pruning steps, and each step raises model's sparsity to a higher value. In the final pruning step, the model reaches target sparsity and the pruning process ends. + + + + + + + +### Pruning Type + + + + + + +- Pattern_lock Pruning + + + + + +Pattern_lock pruning type uses masks of a fixed pattern during the pruning process. + + + + + +- Progressive Pruning + + + + + +Progressive pruning aims at smoothing the structured pruning by automatically interpolating a group of interval masks during the pruning process. In this method, a sequence of masks are generated to enable a more flexible pruning process and those masks would gradually change into ones to fit the target pruning structure. +Progressive pruning is used mainly for channel-wise pruning and currently only supports NxM pruning pattern. + + + + + +### Regularization + + + + + +Regularization is a technique that discourages learning a more complex model and therefore performs variable-selection. + + + + + +- Group Lasso + + + + + + The Group-lasso algorithm is used to prune entire rows, columns or blocks of parameters that result in a smaller dense network. + + + + + + + +## Pruning Examples + + + + +We validate the pruning technique on typical models across various domains (including CV and NLP). + + + + +## Reference + + + + +[1] Namhoon Lee, Thalaiyasingam Ajanthan, and Philip Torr. SNIP: SINGLE-SHOT NETWORK + +PRUNING BASED ON CONNECTION SENSITIVITY. In International Conference on + +Learning Representations, 2019. + + + + + + + + + + diff --git a/docs/source/publication_list.md b/docs/source/publication_list.md index cabe32daa7a..de88c8359df 100644 --- a/docs/source/publication_list.md +++ b/docs/source/publication_list.md @@ -1,9 +1,13 @@ -Full Publications/Events (45) +Full Publications/Events (49) ========== -## 2022 (27) +## 2022 (31) +* [Running Fast Transformers on CPUs: Intel Approach Achieves Significant Speed Ups and SOTA Performance](https://medium.com/syncedreview/running-fast-transformers-on-cpus-intel-approach-achieves-significant-speed-ups-and-sota-448521704c5e) (Nov 2022) +* [Intel together with Tencent deepens the cooperation to build a cloud foundation for digital and intelligent industry](https://mp.weixin.qq.com/s/CPz9-5Nsh-5N9Q8-UmK--w) (Dec 2022) +* [Intel Neural Compressor for TF Virtual Appliance packaged by Bitnami](https://marketplace.cloud.vmware.com/services/details/e9c3d891-ca51-4f07-a5aa-3fe6394f15ae) (Nov 2022) * [Neural Compressor: an open-source Python library for network compression](https://cloud.tencent.com/developer/article/2165895) (Nov 2022) * [Running Fast Transformers on CPUs: Intel Approach Achieves Significant Speed Ups and SOTA Performance](https://medium.com/syncedreview/running-fast-transformers-on-cpus-intel-approach-achieves-significant-speed-ups-and-sota-448521704c5e) (Nov 2022) * [Personalized Stable Diffusion with Few-Shot Fine-Tuning](https://medium.com/intel-analytics-software/personalized-stable-diffusion-with-few-shot-fine-tuning-on-a-single-cpu-f01a3316b13) (Nov 2022) +* [Fast DistilBERT on CPUs](https://arxiv.org/abs/2211.07715) (Oct 2022) * [Meet the Innovation of Intel AI Software: Intel® Extension for TensorFlow*](https://www.intel.com/content/www/us/en/developer/articles/technical/innovation-of-ai-software-extension-tensorflow.html) (Oct 2022) * [PyTorch* Inference Acceleration with Intel® Neural Compressor](https://www.intel.com/content/www/us/en/developer/articles/technical/pytorch-inference-with-intel-neural-compressor.html#gs.gnq0cj) (Oct 2022) * Neural Coder, a new plug-in for Intel Neural Compressor was covered by [Twitter](https://twitter.com/IntelDevTools/status/1583629213697212416), [LinkedIn](https://www.linkedin.com/posts/intel-software_oneapi-ai-deeplearning-activity-6989377309917007872-Dbzg?utm_source=share&utm_medium=member_desktop), and [Intel Developer Zone](https://mp.weixin.qq.com/s/LL-4eD-R0YagFgODM23oQA) from Intel, and [Twitter](https://twitter.com/IntelDevTools/status/1583629213697212416/retweets) and [LinkedIn](https://www.linkedin.com/feed/update/urn:li:share:6990377841435574272/) from Hugging Face. (Oct 2022) diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md index 6e11941559a..f922b77feed 100644 --- a/docs/source/tuning_strategies.md +++ b/docs/source/tuning_strategies.md @@ -200,6 +200,39 @@ tuning: random_seed: 9527 ``` +### MSE_v2 + +#### Design + +`MSE_v2` is a two-stage fallback strategy for few-shot mixed quantization, +which is composed of three key components. First, a multi-batch order +combination based on per-layer fallback MSE values helps evaluate layer +sensitivity with few-shot. Second, a sensitivity gradient is proposed to +better evaluate the sensitivity, together with the beam search to solve +the local optimum problem. Third, a quantize-again procedure is introduced +to remove redundancy in fallback layers to protect performance. MSE_v2 performs +better especially in models with a long full-dataset evaluation time and a +large number of tuning counts. + +#### Usage +`MSE_v2` is similar to `MSE` in usage. To use the `MSE_v2` tuning strategy, +the specific strategy name of `mse_v2` must be included. Also, the option +`confidence_batches` can be included optionally to specify the count of batches +in sensitivity calculation process. + + +```yaml +tuning: + strategy: + name: mse_v2 + confidence_batches: 2 + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 +``` + ### TPE #### Design diff --git a/docs/source/validated_model_list.md b/docs/source/validated_model_list.md index 7a8e50fbfbe..2e143a25d7e 100644 --- a/docs/source/validated_model_list.md +++ b/docs/source/validated_model_list.md @@ -1864,18 +1864,18 @@ Performance varies by use, configuration and other factors. See [platform config ## Validated Knowledge Distillation Examples -| Example Name | Dataset | Student
(Metrics) | Teacher
(Metrics) | Student With Distillation
(Metrics Improvement) | -|---------------------|-----------|--------------------------------------|------------------------------------|-----------------------------------------------------| -| MobileNet example | CIFAR-10 | MobileNetV2-0.35
(0.7965 ACC) | WideResNet40-2
(0.9522 ACC) | 0.8178 ACC
(0.0213 ACC) | -| CNN example | CIFAR-100 | CNN-2
(0.5494 ACC) | CNN-10
(0.7153 ACC) | 0.5540 ACC
(0.0046 ACC) | -| VGG example | CIFAR-100 | VGG-8-BN
(0.7022 ACC) | VGG-13-BN
(0.7415 ACC) | 0.7025 ACC
(0.0003 ACC) | -| ResNet example | ImageNet | ResNet18
(0.6739 ACC) | ResNet50
(0.7399 ACC) | 0.6845 ACC
(0.0106 ACC) | -| BlendCnn example | MRPC | BlendCnn
(0.7034 ACC) | BERT-Base
(0.8382 ACC) | 0.7034 ACC
(0 ACC) | -| BiLSTM example | SST-2 | BiLSTM
(0.8314 ACC) | RoBERTa-Base
(0.9403 ACC) | 0.9048 ACC
(0.0734 ACC) | -|DistilBERT example | SQuAD | DistilBERT
(0.7323/0.8256 EM/F1) | BERT-Base
(0.8084/0.8814 EM/F1) | 0.7442/0.8371 EM/F1
(0.0119/0.0115 EM/F1) | -|TinyBERT example | MNLI | TinyBERT
(0.8018/0.8044 m/mm) | BERT-Base
(0.8363/0.8411 m/mm) | 0.8025/0.8074 m/mm
(0.0007/0.0030 m/mm) | -|BERT-3 example | QQP | BERT-3
(0.8626/0.8213 EM/F1) | BERT-Base
(0.9091/0.8782 EM/F1) | 0.8684/0.8259 EM/F1
(0.0058/0.0046 EM/F1) | -|DistilRoBERTa example| COLA | DistilRoBERTa
(0.6057 ACC) | RoBERTa-Large
(0.6455 ACC) | 0.6187 ACC
(0.0130 ACC) | +| Example Name | Dataset | Student
(Metrics) | Teacher
(Metrics) | Student With Distillation
(Metrics Improvement) | Student With Distributed Distillation
(Metrics Improvement) | +|---------------------|-----------|--------------------------------------|------------------------------------|-----------------------------------------------------|-----------------------------------------------------| +| MobileNet example | CIFAR-10 | MobileNetV2-0.35
(0.7965 ACC) | WideResNet40-2
(0.9522 ACC) | 0.8178 ACC
(0.0213 ACC) | 0.8235 ACC
(0.027 ACC) | +| CNN example | CIFAR-100 | CNN-2
(0.5494 ACC) | CNN-10
(0.7153 ACC) | 0.5540 ACC
(0.0046 ACC) | 0.5523 ACC
(0.0029 ACC) | +| VGG example | CIFAR-100 | VGG-8-BN
(0.7022 ACC) | VGG-13-BN
(0.7415 ACC) | 0.7025 ACC
(0.0003 ACC) | WIP | +| ResNet example | ImageNet | ResNet18
(0.6739 ACC) | ResNet50
(0.7399 ACC) | 0.6845 ACC
(0.0106 ACC) | WIP | +| BlendCnn example | MRPC | BlendCnn
(0.7034 ACC) | BERT-Base
(0.8382 ACC) | 0.7034 ACC
(0 ACC) | WIP | +| BiLSTM example | SST-2 | BiLSTM
(0.8314 ACC) | RoBERTa-Base
(0.9403 ACC) | 0.9048 ACC
(0.0734 ACC) | WIP | +|DistilBERT example | SQuAD | DistilBERT
(0.7323/0.8256 EM/F1) | BERT-Base
(0.8084/0.8814 EM/F1) | 0.7442/0.8371 EM/F1
(0.0119/0.0115 EM/F1) | WIP | +|TinyBERT example | MNLI | TinyBERT
(0.8018/0.8044 m/mm) | BERT-Base
(0.8363/0.8411 m/mm) | 0.8025/0.8074 m/mm
(0.0007/0.0030 m/mm) | WIP | +|BERT-3 example | QQP | BERT-3
(0.8626/0.8213 EM/F1) | BERT-Base
(0.9091/0.8782 EM/F1) | 0.8684/0.8259 EM/F1
(0.0058/0.0046 EM/F1) | WIP | +|DistilRoBERTa example| COLA | DistilRoBERTa
(0.6057 ACC) | RoBERTa-Large
(0.6455 ACC) | 0.6187 ACC
(0.0130 ACC) | WIP | ## Validated ONNX QDQ INT8 models on multiple hardware through ONNX Runtime diff --git a/examples/.config/model_params_onnxrt_win.json b/examples/.config/model_params_onnxrt_win.json index 096db79ef63..c310f10d40f 100644 --- a/examples/.config/model_params_onnxrt_win.json +++ b/examples/.config/model_params_onnxrt_win.json @@ -7,16 +7,16 @@ "yaml": "resnet50_v1_5.yaml", "strategy": "basic", "batch_size": 100, - "new_benchmark": false + "new_benchmark": true }, "bert_base_MRPC_static": { - "model_src_dir": "language_translation/bert/quantization/ptq", + "model_src_dir": "nlp/bert/quantization/ptq", "dataset_location": "glue_data/MRPC", "input_model": "models/onnx/bert.onnx", "yaml": "bert_static.yaml", "strategy": "basic", "batch_size": 8, - "new_benchmark": false + "new_benchmark": true }, "mobilenet_v2": { "model_src_dir": "image_recognition/mobilenet_v2/quantization/ptq", @@ -25,7 +25,7 @@ "yaml": "mobilenet_v2.yaml", "strategy": "basic", "batch_size": 100, - "new_benchmark": false + "new_benchmark": true } } } diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json index 848c1e9f0c6..c33dae62f59 100644 --- a/examples/.config/model_params_pytorch.json +++ b/examples/.config/model_params_pytorch.json @@ -9,6 +9,24 @@ "batch_size": 100, "new_benchmark": false }, + "efficientnet_b0_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "hawq_v2", + "batch_size": 100, + "new_benchmark": false + }, + "efficientnet_b3_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "hawq_v2", + "batch_size": 100, + "new_benchmark": false + }, "resnet18_fx": { "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", @@ -54,15 +72,6 @@ "batch_size": 100, "new_benchmark": false }, - "rnnt_ipex": { - "model_src_dir": "speech_recognition/rnnt/quantization/ptq_static/ipex", - "dataset_location": "/tf_dataset/pytorch/rnnt/convert_dataset/", - "input_model": "/tf_dataset/pytorch/rnnt/rnnt.pt", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 100, - "new_benchmark": false - }, "distilbert_base_ipex":{ "model_src_dir": "nlp/huggingface_models/question-answering/quantization/ptq_static/ipex", "dataset_location": "", @@ -216,6 +225,33 @@ "batch_size": 100, "new_benchmark": false }, + "efficientnet_b0_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "mse_v2", + "batch_size": 100, + "new_benchmark": false + }, + "efficientnet_b3_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "mse_v2", + "batch_size": 100, + "new_benchmark": false + }, + "efficientnet_b7_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "mse_v2", + "batch_size": 100, + "new_benchmark": false + }, "bert_base_MRPC": { "model_src_dir": "nlp/huggingface_models/text-classification/quantization/ptq_static/fx", "dataset_location": "", @@ -310,28 +346,22 @@ "model_src_dir": "recommendation/dlrm/quantization/ptq/eager", "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input", "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 16384, - "new_benchmark": false + "main_script": "dlrm_s_pytorch_tune.py", + "batch_size": 16384 }, "dlrm_fx": { "model_src_dir": "recommendation/dlrm/quantization/ptq/fx", "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input", "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 16384, - "new_benchmark": false + "main_script": "dlrm_s_pytorch_tune.py", + "batch_size": 16384 }, "dlrm_ipex": { "model_src_dir": "recommendation/dlrm/quantization/ptq/ipex", "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input", "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt", - "yaml": "conf_ipex.yaml", - "strategy": "basic", - "batch_size": 16384, - "new_benchmark": false + "main_script" "dlrm_s_pytorch.py", + "batch_size": 16384 }, "blendcnn": { "model_src_dir": "nlp/blendcnn/quantization/ptq/eager", @@ -400,28 +430,36 @@ "model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/eager", "dataset_location": "/tf_dataset/pytorch/rnnt/convert_dataset/", "input_model": "/tf_dataset/pytorch/rnnt/rnnt.pt", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 100, - "new_benchmark": false + "main_script": "run_tune.py", + "batch_size": 100 }, "wav2vec2":{ "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager", "dataset_location": "/tf_dataset2/datasets/speech_data/LibriSpeech/test-clean", "input_model": "wav2vec2", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": false + "main_script": "run_asr.py", + "batch_size": 1 + }, + "wav2vec2_fx":{ + "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_static/fx", + "dataset_location": "/tf_dataset2/datasets/speech_data/LibriSpeech/test-clean", + "input_model": "wav2vec2", + "main_script": "run_asr.py", + "batch_size": 1 }, "hubert":{ "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager", "dataset_location": "/tf_dataset2/datasets/speech_data/LibriSpeech/test-clean", "input_model": "hubert", - "yaml": "conf.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": false + "main_script": "run_asr.py", + "batch_size": 1 + }, + "hubert_fx":{ + "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_static/fx", + "dataset_location": "/tf_dataset2/datasets/speech_data/LibriSpeech/test-clean", + "input_model": "hubert", + "main_script": "run_asr.py", + "batch_size": 1 }, "distilbert_base_MRPC": { "model_src_dir": "nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager", @@ -531,6 +569,15 @@ "batch_size": 64, "new_benchmark": false }, + "gpt_j_wikitext":{ + "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/ptq_static/fx", + "dataset_location": "", + "input_model": "/tf_dataset2/models/pytorch/gpt-j-6B", + "yaml": "conf.yaml", + "strategy": "basic", + "batch_size": 8, + "new_benchmark": false + }, "xlm-roberta-base_MRPC": { "model_src_dir": "nlp/huggingface_models/text-classification/quantization/ptq_static/eager", "dataset_location": "", diff --git a/examples/.config/model_params_tensorflow.json b/examples/.config/model_params_tensorflow.json index fb70465dc90..d2d24762cc0 100644 --- a/examples/.config/model_params_tensorflow.json +++ b/examples/.config/model_params_tensorflow.json @@ -40,10 +40,8 @@ "model_src_dir": "image_recognition/keras_models/resnet101/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/resnet101_keras/saved_model/", - "yaml": "resnet101.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "resnet_v1_50_slim": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq/slim", @@ -121,10 +119,8 @@ "model_src_dir": "image_recognition/keras_models/inception_v3/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model/", - "yaml": "inception_v3.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "inception_v3_slim": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq/slim", @@ -319,10 +315,8 @@ "model_src_dir": "image_recognition/keras_models/resnetv2_101/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/resnetv2_101_keras/saved_model", - "yaml": "resnetv2_101.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "resnetv2_152": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq", @@ -364,10 +358,8 @@ "model_src_dir": "image_recognition/keras_models/mobilenet_v2/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/mobilenet_v2_keras/saved_model/", - "yaml": "mobilenet_v2.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "mobilenetv3": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq", @@ -1944,6 +1936,13 @@ "batch_size": 1, "new_benchmark": false }, + "mnist_keras": { + "model_src_dir": "image_recognition/keras_models/mnist/quantization/qat", + "dataset_location": "", + "input_model": "/tf_dataset2/models/tensorflow/mnist_keras/saved_model/", + "main_script": "main.py", + "batch_size": 32 + }, "resnet50_fashion": { "model_src_dir": "image_recognition/keras_models/resnet50_fashion/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/mnist/FashionMNIST", @@ -1962,6 +1961,13 @@ "batch_size": 1, "new_benchmark": true }, + "resnet50_keras_qat": { + "model_src_dir": "image_recognition/keras_models/resnet50/quantization/qat", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/resnet50_keras/resnet50", + "main_script": "main.py", + "batch_size": 32 + }, "resnet50_keras_h5": { "model_src_dir": "image_recognition/keras_models/resnet50/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", diff --git a/examples/README.md b/examples/README.md index 3edba4b2212..d511050a988 100644 --- a/examples/README.md +++ b/examples/README.md @@ -18,7 +18,8 @@ Intel® Neural Compressor validated examples with multiple compression technique * [BERT Mini SST2 performance boost with INC](/examples/notebook/bert_mini_distillation): train a BERT-Mini model on SST-2 dataset through distillation, and leverage quantization to accelerate the inference while maintaining the accuracy using Intel® Neural Compressor. * [Performance of FP32 Vs. INT8 ResNet50 Model](/examples/notebook/perf_fp32_int8_tf): compare existed FP32 & INT8 ResNet50 model directly. * [Intel® Neural Compressor Sample for PyTorch*](/examples/notebook/pytorch/alexnet_fashion_mnist): an End-To-End pipeline to build up a CNN model by PyTorch to recognize fashion image and speed up AI model by Intel® Neural Compressor. -* [Intel® Neural Compressor Sample for TensorFlow*](/examples/notebook/tensorflow/alexnet_mnist): an End-To-End pipeline to build up a CNN model by TensorFlow to recognize handwriting number and speed up AI model by Intel® Neural Compressor: +* [Intel® Neural Compressor Sample for TensorFlow*](/examples/notebook/tensorflow/alexnet_mnist): an End-To-End pipeline to build up a CNN model by TensorFlow to recognize handwriting number and speed up AI model by Intel® Neural Compressor. +* [Accelerate VGG19 Inference on Intel® Gen4 Xeon® Sapphire Rapids](/examples/notebook/tensorflow/vgg19_ibean): an End-To-End pipeline to train VGG19 model by transfer learning based on pre-trained model from [TensorFlow Hub](https://tfhub.dev); quantize it by Intel® Neural Compressor on Intel® Gen4 Xeon® Sapphire Rapids. # TensorFlow Examples ## Quantization @@ -54,13 +55,13 @@ Intel® Neural Compressor validated examples with multiple compression technique MobileNet V1 Image Recognition Post-Training Static Quantization - pb / SavedModel + pb MobileNet V2 Image Recognition Post-Training Static Quantization - pb / SavedModel / keras + pb / keras MobileNet V3 @@ -236,6 +237,12 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Static Quantization pb + + Transformer LT MLPerf + Natural Language Processing + Post-Training Static Quantization + pb + SSD ResNet50 V1 Object Detection @@ -463,25 +470,25 @@ Intel® Neural Compressor validated examples with multiple compression technique DLRM Recommendation Post-Training Static Quantization - eager / ipex / fx + eager / ipex / fx RNN-T Speech Recognition - Post-Training Dynamic / Static Quantization - eager + Post-Training Dynamic Quantization + eager Wav2Vec2 Speech Recognition - Post-Training Dynamic Quantization - eager + Post-Training Dynamic /Static Quantization + eager / fx HuBERT Speech Recognition - Post-Training Dynamic Quantization - eager + Post-Training Dynamic /Static Quantization + eager / fx BlendCNN @@ -519,6 +526,12 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Dynamic Quantization eager + + GPTJ + Natural Language Processing + Post-Training Static Quantization + fx + diff --git a/examples/keras/mnist/README.md b/examples/keras/mnist/README.md new file mode 100644 index 00000000000..163b7c96a70 --- /dev/null +++ b/examples/keras/mnist/README.md @@ -0,0 +1,41 @@ +Step-by-Step +============ + +This document list steps of reproducing Keras mnist model tuning results via Neural Compressor. +This example can run on Intel CPUs. + +# Prerequisite + +### 1. Installation +Recommend python 3.6 or higher version. + +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### 2. Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Supported Tensorflow version > 2.10.0. + +### 3. Installation Dependency packages +```shell +cd examples/keras/mnist/ +pip install -r requirements.txt +``` + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +# Run + + ```shell + cd examples/keras/mnist/ + python mnist.py + ``` diff --git a/examples/keras/mnist/mnist.py b/examples/keras/mnist/mnist.py new file mode 100644 index 00000000000..8e0fbf411e5 --- /dev/null +++ b/examples/keras/mnist/mnist.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tensorflow as tf +import numpy as np +from tensorflow import keras +from tensorflow.keras import layers +import time + +num_classes = 10 + +def build_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + +class Dataset(): + def __init__(self, ): + _, _ , self.inputs, self.labels = build_dataset() + + def __getitem__(self, idx): + return self.inputs[idx], self.labels[idx] + + def __len__(self): + assert len(self.inputs) == len(self.labels), 'inputs should have equal len with labels' + return len(self.inputs) + +def build_model(x_train, y_train, x_test, y_test): + if os.path.exists('fp32_model'): + model = keras.models.load_model('fp32_model') + return model + # Model / data parameters + input_shape = (28, 28, 1) + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + batch_size = 128 + epochs = 1 + + model.compile(loss="categorical_crossentropy", optimizer="adam", + metrics=["accuracy"], run_eagerly=True) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + model.summary() + if not os.path.exists('fp32_model'): + model.save('fp32_model') + return model + +def eval_func(model): + x_train, y_train, x_test, y_test = build_dataset() + model.compile(metrics=["accuracy"], run_eagerly=False) + score = model.evaluate(x_test, y_test) + return score[1] + +def main(): + x_train, y_train, x_test, y_test = build_dataset() + model = build_model(x_train, y_train, x_test, y_test) + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + quantized_model = fit(model, + conf=config, + calib_dataloader=common.DataLoader(Dataset(), batch_size=10), + eval_func=eval_func) + +if __name__ == '__main__': + main() + diff --git a/examples/keras/mnist/requirements.txt b/examples/keras/mnist/requirements.txt new file mode 100644 index 00000000000..cee1363064f --- /dev/null +++ b/examples/keras/mnist/requirements.txt @@ -0,0 +1,3 @@ +tensorflow +neural-compressor +intel-extension-for-tensorflow[cpu] \ No newline at end of file diff --git a/examples/mxnet/image_recognition/cnn_models/quantization/ptq/imagenet_inference.py b/examples/mxnet/image_recognition/cnn_models/quantization/ptq/imagenet_inference.py index 9bcecc734ab..63b5d95dde6 100644 --- a/examples/mxnet/image_recognition/cnn_models/quantization/ptq/imagenet_inference.py +++ b/examples/mxnet/image_recognition/cnn_models/quantization/ptq/imagenet_inference.py @@ -26,7 +26,7 @@ from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms -from neural_compressor.adaptor.mxnet_utils.util import check_mx_version, get_backend_name +from neural_compressor.adaptor.mxnet_utils.util import check_mx_version, get_framework_name if check_mx_version('2.0.0') or not check_mx_version('1.7.0'): # version >= 2.0.0 or == 1.6.0 from mxnet.contrib.quantization import quantize_net @@ -82,7 +82,7 @@ def quantize(net, ctx, dataloader, batch_size, num_calib_batches, save_path, cal data = next(iter(dataloader))[0].as_in_context(ctx) if check_mx_version('1.7.0'): - qnet.optimize_for(data, backend=get_backend_name(ctx), static_alloc=True, static_shape=True) + qnet.optimize_for(data, backend=get_framework_name(ctx), static_alloc=True, static_shape=True) qnet.export(save_path, 0) logger.info('Saved quantized model to: {}'.format(save_path)) diff --git a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb index 4fdbc291284..cbbd678b4bc 100644 --- a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb +++ b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb @@ -13,7 +13,7 @@ "\n", "#### Super-Networks\n", "\n", - "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", "\n", "#### Methodology\n", "\n", @@ -38,7 +38,25 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" + "!pip -q install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import sys\n", + "# sys.path.insert(0,'')\n", + "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" ] }, { @@ -84,12 +102,16 @@ "metadata": {}, "source": [ "### Define Architecture\n", - "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n", + "We currently support pre-trained super-networks:\n", + "\n", + "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", + "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", "\n", "Super-network options (choose 1): \n", "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", - "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. " + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", + "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." ] }, { @@ -113,7 +135,7 @@ "* `['acc', 'lat']` \n", "\n", "Description:\n", - "* `'acc'` - ImageNet Top-1 Accuracy (%)\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", "* `'lat'` - Latency (inference time) measurement (ms)" ] @@ -137,7 +159,8 @@ "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", - "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php" + "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", + "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." ] }, { @@ -272,8 +295,10 @@ "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", - "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", - "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. " + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", + "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", + "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." ] }, { @@ -300,7 +325,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.11" } }, "nbformat": 4, diff --git a/examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb b/examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb new file mode 100644 index 00000000000..1e7ffcd71b5 --- /dev/null +++ b/examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction\n", + "\n", + "This tutorial demonstrates how to perform a multi-objective neural architecture search (NAS) on a MobileNetV3 one-shot weight-sharing super-network [1] using the Intel® Neural Compressor Dynamic NAS (DyNAS) search approach. \n", + "\n", + "#### Background\n", + "Neural architecture search, the study of automating the discovery of optimal deep neural network architectures for tasks in domains such as computer vision and natural language processing, has seen rapid growth in the machine learning research community. While there have been many recent advancements in NAS, there is still a significant focus on reducing the computational cost incurred when validating discovered architectures by making search more efficient. Evolutionary algorithms, specifically genetic algorithms, have a history of usage in NAS and continue to gain popularity as a highly efficient way to explore the architecture objective space. In this tutorial, we show how evolutionary algorithms [2] can be paired with lightly trained objective predictors in an iterative cycle to accelerate multi-objective architectural exploration. Specifically, we use a bi-level optimization approach [3] denoted as `dynas`. This technique is ~4x more sample efficient than typical one-shot predictor-based NAS approaches. \n", + "\n", + "#### Super-Networks\n", + "\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", + "\n", + "#### Methodology\n", + "\n", + "The flow of the DyNAS approach (`approach='dynas'`) is shown in the following figure. Currently, three pre-trained super-network options for the image classification task are provided. In the first phase of the search, a small population (`config.dynas.population`) of sub-networks are randomly sampled and evaluated (validation measurement) to provide the initial training set for the inner predictor loop. After the predictors are trained, a multi-objective evolutionary search (`search_algorithm`) is performed in the predictor objective space. After an extensive search is performed, the best performing sub-network configurations are selected to be the next iteration's validation population. The cycle continues until the search concludes when the user defined evaluation count (`config.dynas.num_evals`) is met. \n", + " \n", + "
\n", + "
\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "For released version of Neural Compressor:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import sys\n", + "# sys.path.insert(0,'')\n", + "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from neural_compressor.conf.config import NASConfig\n", + "from neural_compressor.experimental.nas import NAS\n", + "from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configure NAS Algorithm\n", + "\n", + "The `NASConfig` class allows us to define the appropriate paramenters for determining how the neural architecture search is performed. Currently, the following multi-objective evolutionary algorithms are supported by the `dynas` approach: \n", + "* `'nsga2'`\n", + "* `'age'`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "config = NASConfig(approach='dynas', search_algorithm='nsga2')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define Architecture\n", + "We currently support pre-trained super-networks:\n", + "\n", + "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", + "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", + "\n", + "Super-network options (choose 1): \n", + "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", + "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", + "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.supernet = 'transformer_lt_wmt_en_de'\n", + "config.seed = 42" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Select performance metrics\n", + "\n", + "Performance metric options are as follows. Currently, the `dynas` approach supports the use exactly 2 objectives.\n", + "* `['acc', 'macs'] `\n", + "* `['acc', 'lat']` \n", + "\n", + "Description:\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", + "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", + "* `'lat'` - Latency (inference time) measurement (ms)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.metrics = ['acc', 'macs']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Search parameters\n", + "\n", + "* `config.dynas.population` - Size of the population for evolutionary/genetic algorithm (50 recommended)\n", + "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", + "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", + "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", + "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", + "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.population = 50\n", + "config.dynas.num_evals = 250\n", + "config.dynas.results_csv_path = 'results_transformerlt_macs.csv'\n", + "config.dynas.batch_size = 64\n", + "config.dynas.dataset_path = '/datasets/hat_dataset/data/binary/wmt16_en_de' # example\n", + "config.dynas.supernet_ckpt_path ='/datasets/hat_dataset/HAT_wmt14ende_super_space0.pt' # example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perform Search\n", + "\n", + "After the DyNAS configuration parameters are set, the search process can be started. Depending on how many evaluations `config.dynas.num_evals` were defined, the search time can vary from hours to days. \n", + "The search process will populate the `config.dynas.results_csv_path` file and will also return a list of the final iteration's best sub-network population recommondation. \n", + "\n", + "Note: example search results are provided for the plotting section if you wish to skip this step for now. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent = NAS(config)\n", + "results = agent.search()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Search Results in the Multi-Objective Space" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAAFOCAYAAAD6qHbYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAACjpElEQVR4nOydd3gc1fWw37NFq94s925jg00zYNOLgdAh9JoQIAmE8Ev7QnolvRcSOqGDaQYbAwZccO+9d1myem/bd2fO98eMZclWtSVbtud9nnm0O7edmV3N2XvvKaKqODg4ODg4HM+4jrQADg4ODg4ORxpHGTo4ODg4HPc4ytDBwcHB4bjHUYYODg4ODsc9jjJ0cHBwcDjucZShg4ODg8Nxj6MMHY4oIvKyiPz+SMvRXYiIisgJR1oOBweHtnGUoUO7iEieiHyhg3XnisjXu2DMr4rIShGpFpEdIvJ7EUlup839tvL50X7nC0Vk4qHK1B2IyFUiMl9EGkSkQkTmicgXj7RcDg7HG44ydOhxiMh/gYnAl1Q1G7gAaAA+FZGkdppXAz8SkbTulRJExHOI7W8D3gVeBQYBfYFfATccunSHJJeIyBF9NoiI+0iO73D84ShDh05hz74WisjfRaRGRHaLyDV22R+Ai4AnRMQvIk/Y508SkZn2LG+biNzRRv9XAgL8ElgjIjXAJ8D7wL+Bn7Uj4hZgCfD9Vvp3ichPRGSXiFSJyDsikm2XTRSRwv3qN86KReQxEZksIq+LSD1wv4icLSJLRKRWREpE5AkRSWhHRkREgH8Cv1PV/6lqnaqaqjpPVR9sIusvRCRfRMpF5FURybDLhtmz4AdEpMD+LB4WkQkist6W54km490vIots+epEZKuIXN6kfK6I/EFEFgFBYERbn5uIXCsim+0ZbZGI/MA+nyMiH9njV4vIgr2KVUTG2OPUisimpjNge7n8aRGZLiIB4NL27qGDQ5eiqs7hHG0eQB7wBfv1/UAMeBBwA98EigGxy+cCX2/SNgUoAB4APMAZQCUw1i5/Gfh9k/rPA1nAMGCufW4S8Kj9+t025LwfWAiMA2qAbPt8ITDRfv1dYCnWTMwHPAu8aZdNBArbuPbH7Gu/CeuHZBJwFnCufW3DsJTx95q0V+CEFmQ9yS4b3sb1fBXYCYwAUrF+ELxmlw2z2z8DJAJXAmFgKtAHGAiUA5c0uTdx4P8BXuBOoK7JPZoL7AFOtq8lo53PrQS4yH6dBZxpv/6TLZPXPi7C+nHjta/lZ0ACcBnWbP/EJt+DOqxVABeQeKS/985xfB3OzNDhYMhX1edV1QBeAfpjLfG1xPVAnqq+pKpxVV0DvAfc3kp9r6rW2K8vFJEwMAR43T4XaE84VV0LzAR+3ELxw8DPVbVQVSNYCu62Tix5LlHVqWrN4kKqukpVl9rXloelXC/pQD+97L8lbdT5EvBPVc1VVT/wU+Cu/WT9naqGVXUG1r15U1XLVbUIWIClxPZSDvxbVWOq+jawDbiuSfnLqrpJVePA1bT9ucWAsSKSrqo1qrq6yfn+wFB7nAWqqlg/GFKBP6tqVFU/Bz4C7m4y/gequsi+t+H2bqCDQ1fiKEOHg6F07wtVDdovU1upOxQ4x14aqxWRWqyHfL9W6sdEJMt+vRAYA2RizU7Amml2hF8B3xSR/ZX0UGBKE1m2AAatK/P9KWj6RkRG28uCpfbS6R+BnA70U2X/7d9GnQFAfpP3+Vj3oamsZU1eh1p43/RzKbIVU9P+BjR53/Ta2vvcbgWuBfJto5/z7PN/w5oBzhCRXBH5SZNrKVBVc7/xB7YyvoPDYcVRhg5dzf5pUAqAeaqa2eRIVdVvttJ+MpZCiQDbVHU38HPgW7bBybYOCaG6FWtZ8ectyHPNfvIk2jOpANBosWobcfRu5/qeBrYCo1Q1HWsZUDog4jZbllvbqFOMpZT2MgRrqbOs5ertMtDeq2zaX3GT902vrc3PTVVXqOqNWEuyU4F37PMNqvqoqo4Avgh8396bLAYG72eYMwQoamV8B4fDiqMMHbqaMqw9rr18BIwWkXtFxGsfE0RkTEuNVfUzIAr8HfiX/fBcCtQD3wL+0AlZfoO155XZ5NwzwB9EZCiAiPQWkRvtsu1AoohcJyJe4BdY+4ptkWbL5heRk7D2UNvFnqF9H/ilbQSTbhvMXCgiz9nV3gT+n4gMF5FUrB8Jb9vLmAdDH+A79mdwO9ase3ordVv93EQkQUS+JCIZqhqzr98EEJHrReQEW+nWYc26TWAZlmHOj+y+JmJZzb51kNfi4NClOMrQoat5HGsPrkZE/qOqDVjGHXdhzQ5Kgb/QhpJR1e8Cs4A3sFwlFmItj16rqqGOCmLPKl+j+dLq48A0rGW8BixFe45dvw54BPgf1owlgGV80xY/AO7BMgZ5Hni7E/JNxjJk+SrWvSkDfg98YFd50ZZ/PrAby0Dm2x3tvwWWAaOwDGH+ANymqlUtVezA53YvkGcvDT+MtYSK3f8swI9l1fuUqs5R1SiW8rvGHv8p4Cv2DN7B4Yiz1wLQwcHhGEZE7sey8r3wSMvi4NATcWaGDg4ODg7HPY4ydHBwcHA47nGWSR0cHBwcjnucmaGDg4ODw3GPowwdjltE5BkR+eWRlqMj2C4gW6X9QOU9BhF5T+y4tQ4OPR1HGTp0KWIFtg7ZAZxrRWSxHUC6x33XVPVhVf1dZ9vZ11guIilNzn1dRObuV0/sKCybW+jjZBGZYQezrhWRVSJybRvD/gQrXFrIbj9XRML2fa632/9ERNrzi9w7fofTXTWpe2cL/fxMrGDtfrttU9eSv2C5ijg49Hh63APK4ZjgBlVNw4qe8mesGKEvHFmRuhw3VtDvtrgYy9F9hIhM2K/sQ6z4qf3sOt/Bcl4/AFvB3ce++Kx7+ZZ9n/sDj2L5BE7fL8pMW3Q03dV9dt2v7CfXfVj+hl9Q1VRgPDB7b7mqLgfSRWR8B+VxcDhiOMrQodtQKy3RNCzH8vtE5BQ7ikmZNMlXJyK3iMg6+/VjYqVVetWe9Wxq+jCVfemXGsRKIXRzk7K9aYr+Zc+2ckXkfPt8gT2bu69J/ZdF5PdN3t8oImvtmdYuEbm6jcv7G/ADEclso859WA700+3Xe8fJAYYDz9tBq6N2gOqFrfRzDlCrqi0GAFDVgKrOxQp/dh5wnYj0E5GgiOwNCI6InClWAmGvfarNdFd2m6FYgccfAq4SkaYxZScAn6nqLluOUlV9br8u5tI8GLiDQ4/EUYYO3Y49QyjESvmzAitI9ZVNqtyLleB2L1/ECtOViRUt5okmZbuw0gJlYIVbe11Emga7PgdYj5UVYpLdzwTgBODLWLkWDwgqLiJn2zL80B73Yqz0Ta2xEutB/4OWCkUkGbgNK4rOG1jZJvbmOazCCmb9uojcJAcGE9+fU+lATFZV3WPLdZGqltryNc0deS/wlh1CbS+/BL4ndk7HFvgKsFJV38NSnl9qUrYU+IqI/FBExkvLCXm3AKe3J7uDw5HGUYYOh4tiYO8D9xUsxYT9EL4KS3HtZaGqTrdTRL1Gk4epqr6rqsV2mp+3gR3A2U3a7rbTDhlYodEGA79V1Yid5iiKpRj352vAi6o60+67qAOhwn4FfFtE9g/mDXALVrDxGcDHWPn8rrOvQbGS1+YB/wBKRGS+iIxqZZxMrHBvHaG1++zGSpf0WtPK7aS7AksZ7v1sJtFkqVRVX8cKD3cVMA8oF5H9+2mgeWxYB4ceiaMMHQ4XA7H2ncDa+7rBNkC5A1igqk3z+pU2eR3ECp7tARCRr9hLmXvTCp1C85RJ+6cwQlXbSmu0l8FYs84Oo6obsQJa/6SF4vuAd+xcgGGsXID3NWlbqKrfUtWRWHurAZrPjptSgxUQvCM0vc8fYOUcHA5cAdTZs/T9aTHdlYhcgLWcuzeY9iTgVBEZ1+Q63lDVL2ApvIeB34nIVU26SQNqOyi7g8MRw1GGDt2ObTwyECvgNna6pCVYs6d72W+20kY/Q7GCYX8L6KWqmcBGOpYyqT0KgJEH0e7XwIM0ycsnIoOwMrl/Waw8h6VYS6bX2vuFzVDVAuBJLMXeEuuB0e0JIiKDgbOwkvpiK+F3sGaHrd7nNtJd3Yd1b9fa17Csyfn9+4ip6ru2rE2vYwywrj3ZHRyONI4ydOg2xEpLdD3WzOJ1Vd3QpPhV4EdY+2Hvd7DLFKycdxV2/w/QugLpLC8AD4jI5WKlUhooVkqmNlHVnVjLsd9pcvperHRQJwLj7GM01r7p3SKSJSK/ESvVkctWkF/F2oNrieVApogMbKlQRJJF5BKsmeBymqdlehW4H2sftq0fHc3SXYlIItas/aEm1zAOa1n0HhHx2IZJ14lImn0d1wAns09pgmV880kb4zo49AgcZejQHXwoVnqkAqzZxj+xHrRNmYKddV5Vgx3pVFU3Y+2xLcFaDj0VWNQVAtvLhw8A/8LKwzeP5ol12+K3NE8TdR9W6qLSpgdWLsX7sPYth2GlOqrHmt1GsJRWS7JFgZex9/+a8IR9n8uAf2MtxV7dNJu8qi7Cyie4WlXzW7uAFtJd3YS1pPzqftfwIuABrrZl/xmwB2sp9K/AN/daxdorAv5WlmYdHHoUTmxShyOGiOwCvqGqs460LD0d20hnAXBGZ3I62m0/Byap6v+6RbjWx30PeEFVW0sg7ODQY3CUocMRQURuxYpQMrrpTMaha7FnZzOBwXbCXgcHhxbwHGkBHI4/xApbNha411GE3YeIvIK13PldRxE6OLSNMzN0cHBwcDjucQxoHBwcHByOexxl6ODQBBG5QER22FkYbjrS8nSU/eOsOjRHrEwjXzjScjj0XBxleBQhIheKlRKpTqzUP4vkwGwIPRqxUgG1FA5tb/lEEWkxIPVh4rfAE6qaqqpTj6AcB00PuIeO8nE46nAMaI4SRCQdK/TXN7GiiiRgBayOHEYZBGuf+YgavYiIR1Xj3dT9UGDTwTTsZrkQEbcdc9XBwaGLcWaGRw+jAVT1TVU1VDWkqjNUdb1YaY8ac92JyDB7BrY3nudcEfmTiCy30xN90DRLgYica884a0VknTRJ7mq3/YOILMKKEzrC7vthezmxVkSetBXl3jZfFZEtIlIjIp/ZYdQQkfl2lXX2MmSzZLF2rNJPgAF2uV9EBtjXN1lEXheReuB+ETlbRJbY45eIyBOyLysEbcloR36ZZ8+wK8VOSGv7PY7AChrgFxGfPf40eya+U0QebDJGS3LNFZHf2/fTLyIfikgvEXnDvvcrRGRYkz5OEpGZdv/bROSOJmUvi8jTIjJdRAJYwb3bpLV72ErdRLESMefY738uInH7hxci8jsR+XcTWZ4SkU/sPheJlSbq3/bnvFVEzrDrvgYMaXIff9TS+E3kaO/79zt7vAaxEiIfENKuhT7vFZF8EakSkZ/vV+aSfanAqsRKGdZa1g6H4wVVdY6j4ADSsVL/vAJcA2Q1KXsMK9zZ3vfDsMKWeez3c4EirNBlKViRSl63ywba/V6L9ePoCvt97yZt92CF2fJgZV9QrFlqJtZDrwIr8gnAjVjpicbY9X8BLG4imwIn7HdttcCF9uuJQOF+5Y8BMSw3AReQhBWD81x7jGFYqYK+t984rcn4JlZkHBeQuHdsuywPK1nt3vfzgafseuPsfi5rQ6659vWPxEoztRkrNNsXbFlfBV6y26dgRel5wC47A6gExtrlL2NFw7lgr6xtfD9eBn7f2j1so9184Fb79QysYOXXNCm7uUn/lfZ9TwQ+B3ZjZbFwY2W0n9PafWxj/I58/3Zh/Rjce3//3E6fYwE/VhouH1YEpPheebCSMi8FBtnlzwJvHun/cec4soczMzxKUNV64EKsh/zzQIU9Y2kvF95eXlPVjaoawMphd4dYaX2+DExXK2WSqaozsXLiXduk7cuqukmtDAx7c+H9WVVr1cqhNwdLUYCVueBPqrpFrSXDPwLj9s4OW7m2TG09se1elqjqVFvGkKquUtWltkx5WA+0S/Zr05qMMazl0AGqGm5tbLECX18A/Niutxb4H80zvjeTyz73kqruUtU6rFnaLlWdZd+Pd7GUHsD1QJ5aKafiqroG64fK7U36/0CtxL+mWoG3u5p5wCVirSKcBvzHfp+IlQdyfpO6U+z7HsYKpxdW1Vd1X7qsM+g8Hfn+vaSq2+37+w77PsfWuA34SFXnq2oE6/vedGn/YeDnamUOiWD9qLnNvgcOxymOMjyKsBXM/ao6CGuWNwArJmVHKGjyOh9rhpeDpRRut5eo9qZFuhDo30rbveyfZmlvWqShwONN+qrGynzQYpDpTtBMBhEZLSIfiZUVoh5L6e6/fNaajD+yZVouIptE5KutjDkAqNbmDuv5NL+Wlu7N/imjWkshNRQ4Z797/yWgaTb5lvrvSuZhzSTPBDZgRau5BGvWvVNVq5rU7eh1dYaOfP9a+xxbYwBN7pv9A7DpdQwFpjQZbwtgAB39YelwDOL8EjpKUdWtIvIy8A1gNZDcpLhfC00GN3k9BGt2VIn10HhNVR9soU3jcJ0QrQD4g6q+0Yk2HRlr//NPA2uAu1W1QUS+hzUjaH8AK+D0g2BZ6AKzRGS+WhkomlIMZItIWhOFOARrybk9eTtCATBPVa9oS9yD6LczbRZjZde42ZZls4gMwZqZzTuIsTsrQ0e+f52lBGuZHrCyegC99hvzq2oFMXdwAJyZ4VGDbWjxqFi58vYu4d2NtfexFrhYRIaISAbw0xa6+LKIjLUfDL8FJtvLW3sT7V4lIm7bqGLi3nEOgmeAn4rIybacGSLSdNmvDMtIpTXKgF72dbRFGlbWBL9YqZa+2VEBReT2JtdXg/XgPsBCVq08g4uBP9n35TTga1j3rCv4CBhtG3t47WOCiIxpt2XbdPQeolbGkFXA/7FP+S3GWko8FGXY3ue8l67+/gFMBq4XyxUpAev73vRZ9wzwB9ln2NVbRG48hPEcjgEcZXj00ACcAyyzLQuXYqX+edTeZ3kbK7HqKqyH7P68hmUEUYplAPEdaHzg34iViqcC61fzDznI74aqTsEKwP2WvXy5EcvgZy+PAa/YS1R3ANgWhxfZ7bdiGbjk2nVatIQEfgDcg3Vfnse6/o4yAes++oFpWLE7c1upezeWgU4x1j7Zr7WLsmzYs80rgbvs/kux7p3vEPvt6D3cyzysZfPlTd6n0Xy/sLP8CfiFPf4P2pC1S79/dp+bsJT7JKxZYg1WLsm9PI71uc8QKwXWUqz/LYfjGCc26XGAWIGxX9fDnMLHwcHB4WjBmRk6ODg4OBwytk/oPPvv+51s2541+f717xcRl/36JyJyqAZ6jjJ0cDhasC1f/S0cX2qn3SettPvZYZL7S62Mf1CRfrqrT4cu4XJVnaiqt3TzOPdj6y9V/bOqFrVdvX2cZVIHBwcHh0PG3o75gu1Piz1b+4eq3mX7NM9W1Yki8g6WG0sEuE1V60VkoapeuPfv3v7s+j8FrsaydXgYa397BrAOy+/3cqygDxXAG1gBStaq6ndE5H7gBqyADQDXaStKz5kZOjg4ODh0FbPtZdKn7NlallgBHC5in0HW/ap6CVYAhTtb66gJj9v1vwT8QFWXY1nQX66qrzWp9xDwtqpeDCSLyF6jqEJVvRbLJeq01gZx/AwdHBwcHLqKy7V5sPrPsKzJLwOet2eIfxORU7FmcFNa6kRkX6xj4F57K8Ckbf/VkcB0+/VKYG92nI323yKs8IwtctQpw5ycHB02bFiX9BWPx/F4evYtcGTsGhwZuwZHxq6hp8m4atWqSlXt3Q1dT8Zys+mvVlKBs4AUVb1YrKD3+xu+iIj4sAJB7OURrFB/I7HcqMAKGuLGijm7l11YsXM3AeOxllBPorkCbapkm9FzPo0OMmzYMFauXNklfVVWVpKT024A/COKI2PX4MjYNTgydg09TUYRye+irmaLiAKGql6uqntEZDhWgHWAbcAJIvIplk/p/oYvLwMLsWL67mU51hJrU7/Xj4GpItLUXex5YJKtZNer6lI7IEeHOOqUoYODg4NDz0NVJ7Zy/vwmr/1YsWf3r3Oh/fd59s3+9pYdEKpPVf/NvrjM7zUpuna/ei83ef1YW/I7BjQODg4ODsc9jjJ0cHBwcDjucZShg4ODg0OXICKX264V80Vkioj0ar9Vm/3dJCLZXSVfWzh7hg4ODg4Oh4yI9AZ+BVxvp1UbDSQcYrc3YblGVB9iP+3izAwdHBwcHLqCa7FyUzYAqOp2VS0RkdNFZJGILBWRL0NjHFPP3tf235dF5BkRWSgivxYrr+bVwBsi8sPuFt5Rhg4Oh4mS3DJCgfCRFsPBobvoj5Uya39+hxU95iLg2yLibaOPz2zL0mtVdQ/wKfAlVf1bl0u7H44ydHA4DMSicf74pf8w+Z8tpZp0cDgmKAFayp2Zpap5qhoDdgN9aN0Rfm+0mFD3iNg6jjJ0cDgMLJyyjJqyOj6ftJCa8rojLY6DQ3cwHfiyiKQBiMgJItIfqBWRYfaMcARQDtQB/UUkB+jXpI/9w63tjTTT7TjK0MGhm4lF40z+x0eIWwgHI3z07MwjLZKDQ5ejqhVYS6Ifich84O9AFMuoZhJWZJkn7Rnic8CHwGNY2SZa4zPgKRF5uBtFBxxrUgeHbsM0TQq3lZCWnULvITmk56QBEAlGmtWrqWxg/dKdnHzu4CMhZrcy7fMN1PtDfPmLZx9pURwOA6o6C5i13+kq4Pz96k1nX1Dtvefub/J6ov13MlZ8027nsChDERkMvIqVw0qB51T1cRF5m30BWTOBWlUddzhkcnDobtbN28x/Hvkff/z4pzw2+dFmZdFIHI/XjcslTH1pAZ9/sJpfPHdPh+NV7syvYNvuMq6beEp3iN4lBIIR3py+CsMwuPbik8nOTDnSIjk4tMrhWiaNA4+q6ljgXOD/RGSsqt6pquNsBfge8P5hksfBoVsxTZO3/zqNQH2Q9//zSbMyVeVPv53Kh1NXUVlax8JP1+Nyu1j4yfoO9a2qPPfWIl58dwmVNf7uEL9L+Hj+JmKxOKapvD9r3ZEWx+EoRkTyRGSDiKwVkZX2uWwRmSkiO+y/WfZ5EZH/iMhOEVkvImd2ZIzDogxVtURVV9uvG4AtNEndYeeuugN483DI4+DQ3ayfv4WCbUWkZaex5MOVlOSWNZZt3ljEjm2lfPj+Kqa8vAB/fQg1TTYsz6WssH3f4o3bi8ktqEAV3p+xthuv4tD4ZP5myxpC4POl24nG4u01cXBoi0vtydN4+/1PgNmqOgqYbb8HK3/iKPt4CHi6I50f9j1DERmGlZtqWZPTFwFlqrqjlTYPYV0UgwYNorKysktkqavr+VZ9joxdQ3fKWFNWi8vtJsPeEwQoLytn7CV2blGBPXmFeNMto7iPpy0hMyGKOwFIhi99/1KrnitOKBqgstJsc7zpn68iJcHEMJS1G3aSf95wUpJ9ByW7qYpLWk3xdgCduY8/f/BiolEDAI/HTX1dbWfFOyiO9+/jccSNwET79StYaaJ+bJ9/VVUVWCoimSLSX1Vb8oFs5LAqQxFJxVoO/Z6q1jcpups2ZoWq+hyW9RHjx4/XrswD1pNyirWGI2PX0B0yqir/+uqLJKX4+Pmk77A3QfeVd17KFXdMRBVcrn3KpqYmwNZ1ZZTMWI87I4WM3n148DvXAx3PcXfJeaewZG0JsbjJZReOpn//viR4O/+vXOb388gH03jyizfQLy2t/QY2Hb2PR/Ircbx+H49hFJhh50p81tYJfZsouFIsmxSwVh0LmrQttM+1qQwPm2uF7WPyHvCGqr7f5LwHuAV4+3DJ4uDQWUL+MH99+AXqq5vv0W1YuJWiHSXsXJvHjtW7MeIGW1bsAmDS1BU89ercxrq1tUGyslK47vzh9OmTTq8kD1++YzydZU9hDR63i5RkH2s3F+JxH5wb1utr1pJXU8ura9YeVPvOEonEOly3pLSOvz/+KYbR9izZoeeSI/00XbLbPURko4isbHI81EJ3F6rqmVhLoP8nIhc3LbRngfv7KHaKw6IM7T3BF4AtqvrP/Yq/AGxV1cLDIYuDw8Hw+eRlrJy1kemv7Eu2raq8/bdpRMMxIqEob/9tGounr+EvDz7PlnV5fDR7AwuW76KkvI4du8r47o/eZPeuMj56bibRcIxAXfCAiDSvv7eUH//wTUxTG8fYn227y0j0eUnwuAmFY1TXBTp9PWV+Px9v206f1FQ+3b6D0oaGTvfRGQqKa3j4p5OoqOrYOO9OWcHCxTtYtjIXsBTpY//4kMrqnmsw5NCcKFHO9VzZ7gGEVXV8k+O5/ftS1SL7bzkwBTgbKLOd+rH/ltvVi4CmfkqD7HNtcriWSS8A7gU2iMha+9zPbF+Tu3AMZxx6MCF/mA+fn0NGTiqz3lzMtfddTHp2KqZh0ndIDtGoicstZPZO5+1/f0IkHOPJX0/G6J8BwLsfraKuIkBtXZBpn67j9kdvwIhZe2lZ/TKZtWY7A3tlkOIymPTaYoz6CCuW72L8hBH85jdTuOuucxk7ttHejF9++9oW5ewMi/LyiZkGpt/ASIAFefncfmrbbhrRWJyN24qZeBDLe+98uJKKygben76Gb9y770d9NG6wp7qWE/rsy/RTUlrLshW7SE1LZNI7Szln/AhmLdzKynX5vP/xah669+KWhnDoaYiAHPp8S0RSAJedCSMFuBL4LTANuA/4s/33A7vJNOBbIvIWcA5Q195+IRw+a9KFqiqqetpeVwpbEaKq96vqM4dDDgeHg2HFzA3464KgEA5GWfDBSgDcHjcP//M+wqlpxDIyOf3yU8nPLSdoGuxeuRujPoQqLFq2i42bi8jOTmHpyt2cef14bnj4Sm54+EpOv/I0nvhgEf/9YBGffL6ReEMUdQnPPzuHZctz2bSlmNffWNziDPFQuOnksTxz5Q30ynPz9BXXc+spJ7fbZs6ibbw9bSU786yAIarKh7M3EAhF22xXUFzD8rX55GSnMXfJ9mazww/Xb+U773xEbXBfAPPPZm4kGjVwCZSV1bNy9W7e/XAVmelJzFm8rdXZYa0/RG3DYQ9p6dAKAohL2j06QF9goYisA5YDH6vqp1hK8AoR2YG1wvhnu/50IBfYCTwPPNKRQZwINA4O7XDW5afw6xF9Gt/3G7pvZrRoxkYC/jCCsGLpDuL9UlDgzLNHcte3r2HEKYOZPXcz0z5eiwIpyT5y8yro19eaNU5dsgnDVArKaghvLgVVQCgqqObpZ2dRb0TIz69k06YiTjllUJddk0uEqbPW4w9GmDprA6c82L/N+tFYnLenrcLngbemruAX37uWTdtLeP7tRURjcW69+oxW267dVIBpmPiDYUSEDVuLuOyCkwjH4ry6bDWBSJR3V2/gwQsnAHDDteM45+wRje2Ly+to8IdJ9HmJRg1mL9jCnTdOOGCcf74+B1OV791+zkHeFYcupwtmhqqaC5zewvkq4PIWzivwf50dx1GGDg7tkJKexKjThx5w3jBMpryykEgohqqydPUezCtHIiK4Rw3mtHMs14obrzuDG687UFmEIjHeX7iBmGEQj5ukj8jkGzecC0B9dYBJ7y4lapj4XVFmf765S5VhXnE1KzbtITsjmZWb95BbVMWIga0nJZ+7eDtVNQEG90ti1fp8du4u57Wpy/G63Uz5bB1XX3IyKUkt53G94YrTuOGK0w44//GGbdSGwiR6Pby7agO3n3kqmcmJ9OqVSq9eqY31hg/rzbDB+36A9Mk50PJ1+54KNu4qQYE9pTWOpWZPQATcR0/4a0cZOjh0kphh8N+PFvHVyydw61cvJhqOoQpvLFyLOzURgIr6AHHDxNPGw8DjdvHIDecTty0mk1xxJp5l7dutKizi6R1riQTiePolc98DFzZra5gm/miUjMTEg7qG3MJKMlITUYWEVDe7CyvbVIYZ6UlcNXEsCe44J5/kIa+omk07SkjwuqmpDzJjwWZuvnJcp2TwRyKM6dsbALfbRXUgSGbyvuuJmnESXB4SfV5GDuvdZl+vT19BLG7dxxnLtnHmKaM6JYtD9yAuRxk6OByzzNuQy7Rlm+mVlsx914ynvLgWl1u48pazWm3jD0V4+ePlfOOm8/F6LFcIr8fNFWeOxlRFgKqqqsb6H27YSihugE+o9ofYVlPFOWnJjeVvrFnHzJ07eeWO2zrlNL+Xy84ezWVnj+5w/XPOGM45Zwxv9IV8felqzr5kBKf2tbLvnDi8Tzs9HMh9553Jfee1HCkrbho8vOQN7j/hfC7u27ZiM00lZpgM7ptpvzcxTBP3UfQgPmY5iO/mkcJRhg4OrfDx64uYcOlY+gzMajwXMwxenr2StKQE3l+8kZvOOZn//vYDEpMT+Nk/7mp0uj+gr0Wb+WDBRk4c0ocrzj6xWdkvZ87ilH59uWLAvryovdxJZCclkZyQQDgWI6nJv2pDJMKkdesIxWIsyMvjkuHDu/jK2yYQjfKfjUsR4AeXXkayt63E5QfHvNLt5DZU8vz2BVzQZyTuNvaeXC7hT/93feP7yspKRxH2BETgKPocjh5JHRy6ib0+fQBzpq6irtpPwc4y3nx8Bu8993mzuku37qG0tgERF4FwlFfeXkT+rnJ2bilmx6aWXZn8oQjvzV1HWpKP1z5dSSxuNJZtr6xkfn4eL69aTSi+L3bnDWeM4Sc3TuTiM4bz21uvYHjv7MayKRs3E4rF8bjcPLt0BWYXW5o2pSzUcIAl6+sb1lAfCVMXCfPGhjUd6qcz1rBx0+B/OxaRJAmUh+pZWLazWfnWumIm7V7c4f4cjiBuV/tHD6HnSOLgcATYsHYPv/j+JOJxg8JdZbzwx2l8+MpC3ntuDp4ENyvmbKEkf18s3LFD+vKbu6/kx7dM5LG7ryB3bi4VVQ34G8K8++KCZg99Q02ipsHsFdtpCEQQESpr/SzekNdY538rVlIX81MZbmBhXn7j+WG9s9gdqeWDgm306pVCWtK+2KObystJSfDi87iJGHHK/d3jiF4YqOXeua+zsWafi5YCL61dhaql4F5Ys7JDyvhXH8/i0y07KKquwzDbjiqzq6GC6kiA/PwYtXUmc0u37RtflWd2zOb13YsoDtYc9LUdLDNL1jG9aFWX9vlu/lJKQrVd2mePQEDE1e7RU3CWSR2OW1SVSS8vJHdnGcsW7mDVp+twu13Menc5sUgMcbuIB6N89NpCvvz9a/n3X6fzyPeu5IKxwwAw4gavmwaamgApCaTvl6/viS3zqKht4P/GTiSnSdmJQ60QinXhMEuK9hA3DeKYLN6Tz93nWm4BNaEQ723ZhMclPL9mJf+6cp+j/V+uuapLrt9UZXbxNi4fcGKL+46vbF9ObTTEc1uX8J/zbkFECEQjkBFiQG9FEJK8cVRNkNZDwm0tq2Bhbj7rC0vRoMkjl5/H1ae3vl95YkY/Hh5yBX/Zs4D+Rio/OeXqxrL1tQXk+itwi4tJeUt4dMw1rS5NdzWheJSXd3+OqcolfU8mxXNwxktN2e0v54Vdc9kdqORHY69vv8FRhUDH/Ah7BD1HLTs4HGY2rNlDcWE1aWlJvP7cHJbN2oSqEvSHGTVuKPG0JM686jQuvHYcs2dsZOWyXUz/YHVj++r6IBVZHrInDMQYkcEFt57R+GAuDzXw0Yr1LPnXZsqCdVxw2ojGIyfDUozpPh8TThdOPjXKmFNDnDF6n4HM25s3UBMOETVMFhXsYWtlRZdf/9LyPH639jOWlO8+oGxrTTmfFW6lly+F9dXFjbPDeSW5GHj46tgz+M/F1/DOFd/A7Wo7NuqLS1bicbkoq26gvCHASwtWEm2yVLw/UcPg5TVrSEvwURMKMzcvr7HsrbwlRI04IsKc0k08svA9FpYeKH938FnJGiJGnJgZ5+Mumh2+tnshCS43iyu2URhsP33XUYdI+0cPwVGGDsctC+ZuwYgbxOMGDf4wl95+Nrd/83LuffQaMgZl40pLYlt+FTmDsvlg8goyslKY8ckG6mqDABSW1tEnO4205ER6Z6dRULpv2W5S7krMJRHwm/z7jRnNxlVVFpfk0xALYbriZCUl0SsphdrYvsgs4/sP5KpRozC9Jl8/8yyyEpM6fF25/hKe2fFRm/t0pirPbluEojy7ddEBS52/WjSLUFDwuT1k+ZLZUVdJfTTM7MKdJLo9PL95MTPLJpPkbtt4psIfYG1RKaapBANRYoZBlT/I55t2tdpmd00NoXiMuGkiInyWu4P/t/BDDNPkvhEX8avTbuYnJ9/AHUMuZF11MU9sWtju0uuhoqpMLVyGqSaKMq1oBXGzdYXeEQoCVSyp2IEiBOJR3s1f2kXS9hAEcLvbP3oIzjKpwzHD3od/R5fNHvr2FXzl65c0tklJ9SEi1NUGeffrL2IClTV+XnxmDn5/mKSkBCLhKAvmbOH6m8/ijLGDePo3dzXrc2NZGYZpsmDddrTEgFShcHkFZRV19O1tRZ1ZVlrADxd8wuOX3MC/z/pmY9umeTpP69uPXy6fSUiiBDRC39RUOsrrebNZV5PLxL6ncVL6kBbrLKvIY2d9BUluL7saKllSvpsL+lpRXzZUlFLqD5Asifz33NsYnGbJ/crWlfhjEaISoSEi7KxtYEv9ek7OGNeqLL1TU3jz/jvZVlLBUzOXoqq4XS6Ka1vP53diTg4z7r2/8f2Pl0xnVsFuPi/axRWDLTcLVeXFLWtIcnspD/mZX5rLpQNO6PA96iwiwk9PvpWwYWXeSHR727Rw7QiZCSn8vzHXsjfZwoCkrLYbHHX0rJlfezjK0OGY4X9zlrF52lr+8c+HcHXApNvrdePxJFJfGyQja9+eXjgU5bQzhzJz7XbCmAwc1otb79wX4quvHYB7fwzT5Pdz5xI3Te7xncG0NGtJ1ZUk5OVW0rd3BqrKU+uXEjUMnlm/jOe/cHOLynvGnh3sabBmSG9uX8eXx5xB76SUA+rtz/b6QrbWF+Bze3kzfy6PnXJvi/0nu718ccipje9TPPuixzyzfrk1A1J4eeMqfnneZQBM6DsYbzjE1JqPcEscryfCrLIP21SGAL1Skjn/hKGcf8KBUXzaY2ddFUtL95DpS+SZTUu5bOBI3C4XeQ3V7KqrxCVC3DSYlr+pW5UhwInpA9uv1AK1oWXEzGp6p1zT7HyaN5Er+5/aSqtjBEcZOjgcXmpDYd547lMS5hcx98o1XHbtPgd4VaW4tLbFEF2LZ23mrWc/52+vfYNEO5xY3/6ZnPnFsUyqyUXVzY7EEF8+sV+7MszPy2u07EyfkM0rtz58QJ1lpQVsra4kxZvAhqpSVpUXM77vgQ/ZPfU1GJh43W5MlPKgv0PK8KPipcTMOInuBLbWF5AfKGNY6oGyn95rEKf3OjC8WzgeozoUIsNnGYfk19c2lo3N6svWksVkp/u5ou9F9Evsg8916EYkbTEldyNR08DjclESbGBlRSHn9B3CsLRs3vnCVzDtWVVTZd6TMDVGXs3fMNRPVtJFeFwdn+F3lNJQNVvq87i0b8sBDI4ojjJ0cDg0guEodQ0h+vdueRa2P6/MX4Z7eSmmR3juj+8x8eozQASXCJu3l/D8Gwv57jfSOWHYvkgp8ZjB5BfmU13ewNyP1nLGNWOZsnkz35gwgX99vKDR/3D6qq08esNFpCe1/uBXVZ5dsYJAzFpGe3bFCi4fOfIAK01DlYsG7p0hSat7XYbLJC3JS7LHS8SIo23kLY3G4zz28Wy+d9kF3D30Uq7sZyUMFoGByZ2L0Zno8fLm9Xe2WFYXq2Nzw1ZSfankB8u4ZeCdh2TJGYpHSWpHiX197Nl8cdjYxvfD0y1/SxGhd1LXK5b2UKOSuP85POk/RtqwoN1LdXAOMdMyjCn3T2FA+r1dLtOk/BmsqtnOqZkjyPFldnn/B43Qo/wI28NRhg49ktc+WM7qzQU8+cs78Hjaf+hMe30erpCBuoXq3ZXM+HQVr4Z28/R1N/DG+8uIRGJ2toXrGtssm7uVmsoGUtISmfraYlZlBZi2axvnDRlM/37pxLyWAkr0egjF4qS3Y8Ny45gxBKJWOqPWorJcMGAoFwxof7nw8kEnMDw9m50N+ZRHqhiQkt5q3U8372DO9lz6pqfy3UsvoH9S6zFGO0pxsB6fy02vxH2z0QUVi4iaEURc5Af2kBvYzchUa5+xtP51IE6/9Ps71H/YiPLtVc/zjROuYkKv1pc3s3xJZPk6bjzU1fijm6gPr2RA+n0AGMHXMUPvY/rOxZ14Wbvti+pfwtQogouShjfpl3YHLvG1266j7AmUsa52J4IwtXABXx95Q5f1feh0TT7Dw4WjDB16HJU1fmYv3UbcMFm4OpeJZ7cfdDnjsmT82gv3kgjuG9KYHyphW1UFT81YzLZdZQzul8SajQXsyqtoDPq8c1MRScnWzETdwuxVW0nMSeCZFSt49ist7+W1hohw92kHZmY4WMZm9WVURjZz1n9AQIPECQH7XC9UlW0N6xiWNJaXl6wiMymJ6Ru3c/f40xHewy3JZKfe3uHxJu9az6jMHE7vNQBV5YfL3qdvUhr/PPfWxjrjMk8jK5JJnWmSW9JAH591H+NGHWUNLwNKr5Qb8brbNwSZWbKOklANr+yew1nZI3D1wIemqlJQ+28C0S1kJ38BnyRghqaBKx3D/ywu3yXtzg6HZn6HuGktnbskAaFrQ9dNKZpPwAiTIF7mlK/hlkGXkO1r/YfTYUVw/AwdHA6F92asJRyNI8AbH60gvp9P2prC4gOWF/9085fpVZyOJCQypKYPS/wV9E5JYcbmHQwamEV2ZgrDB/eiujbQ2Oa+713J09O+x9PTvseJPxxPIFMwyqOsyS9mc0Xbfn2RWLzN8q5gSdVagoaVA/Cj4rnNygpCu5hc+DzvbJxHddBKaBuJx/lw/VqqGl6ivP4ZDLNjkWkqwwH+u3ERf187D1OVJeW72VJbwILS7eyo23cfBiUP4sS00Sxc6WfagiJCQdun0v82SgxVg3L/W+2OFzaivLNnEZneZEpDNaysbt3N4kjSEFlNKLYTEQ8l9S9jhKaiGgRcqLEHjbbvCpGZdB45KVeQk3IF2cmXdHnElYtyTuPrI27gK8Ov5r7hV+Nz97C906PIz9CZGTr0OMLROCMGWUt9SYkJ+IMRMtOtWdHWsgq+983nuOWKcXz3Ozexo8hyRyhdV059eQCP103+qhJip/bH2FWHDkvjqi+fwdlZ2W3muLtm9Gj6uVN48dXFjDt9EIPSW/91XR0I8uBrU/jLLVdzQp9DX5JsjVllizHVxC0uVtVs5I74NaR6klFVPi//AFMNqhIX8t87Hm58yHpd72IacZA4NYH3yEm7r91xJu2w4osW+GtZUprHvzbNIK5KzIzx382f85/z9u0hFlbVsSavGBHhzcXr+M7V51EZmGoVilLh/wDTcymDU05qdbxd/jIipuVHCLCyaidn9xqFmn7isc14fWcf5B3rWkoaXsHUCEIC1cEZDEj5D97MMY3l4j3lsMixoGwHJ2X0o3figXkcz8w+sYUWPYWjK1C3owwdehzfvXdiq2V/nTQT78pipm8s42sPXMVf3/4cEWFIYRyXx2W5VKhyflUGaz7bwqnnjuLqH42iukl6pKaoKiLCSTm9+eCjdSQnJJC7o4JoKA6t2Mu8vWI9JbUNvLRoFX+4+cpWZV1VVkRDNMLEwSNardMW3xn1FUJGGAC3uElxW3tnBaFd7AnswOvy0WCW4kkrZUz6GajG2F461VqeUqXK/xa9Ur/S5nJvfTTMlN0bMVWJmQb/27qckJaRnRhDUcrDBc3qz9ywnUAkSoLbzcdrt/Lli87gxD7PY6ol507/Tt4ueJz7h/+SHN+Alobk5IzBvHPhDw84Hw68QTj4Khk5U3C5O58SqqsZmP4gMfNmAAQP3oST2tzvi8c24vaM7dLZX1XEz+/XT+ey/ify4yZh6Y4KjrJlUkcZOhwVVNT4qY6G2DllI764iek3+dVjb1CeYj147r77Iu791hcAy9/vke8/jZqwbfVudq3fQ9bAfYYgexUgwJNvzGfYwF6ccfIgPl+xg0A4SrLPy3uz1/LwbRceIEd1IMi0dVvonZbMirxCdpZXkZWWRHZiUjOlY5gmf1w+j0Asyrn9B5Po6fxeUY6v5b03QTglY0Lje7fs/Tf2MCznuUbF5JKkdvc9E91efnnWFxpnaaleN6VxP1EzAkCGt/nM99xRQxk9fKjdv5Dk9eDzWK4hphosqXqRmBllYcU0bhp0oGtJa5hmLeHQm6jGCAVeJSX9Bx1u212k+jruA2jE8whWP0xSxm/xdsCwpqO8s3slqsrc0u18ecQ5DEw+yhzze9AyaHs4ytChx1NcWcdDf3mHS88YgW9nFRJXQNn03mr0ztMBeHPBWp757m24XMIHy9ZhrKtAVIkGYrz0+Md8/693NPb3+PZpnJw+hLGu4cxdtoNE327OPHkQqVlJ1JaFufCskYwf2zxyS3kwQHZiEmsLSogZZmNszZnbdvJ++Sb+eOGVTOi7z29vTkEuFaEAKEzbtZU7Tuw65+rBySMZnDzygPMigs/buVlogtvNZQP3t+Y8sO+9jBnYp9Xl5u0Na6iJluFzJbGtYRWVkeJWZ4f7Ewm8jZp1IElEQ++RlPKVHjE77CgR/wuoBgn7n8TTAcOajlAdCTClYC2GmoRiUd7IXcaPjrLZoTrK0MGhY6zctIcxI/qRktT6xv9TUxZRt2gXywrruPCOC9iwZBeC0P+MgZx0wckApCQmUBHyY6DsrqgldFY/XAKmCQMv3Pdw3xMoZ1HFZtbW5HLK5ioEiETjTP18A6V+P/EkWJVfzE/v+0Jjm4gR58FPp3D3mNO486RTmTh6n8L595pFVIWDPLNuGeOvGNg4E3tuwwpCsRgiwkubVnPzqLF42wlofbQTNcIMsfcKBSFkBNppsQ9xZ5GQeLXd1oNqrFtk7A6MeB6x8AxE0jDjBcQj87pkdugS4Y6h4zHVmrUPSc1up0UPQwC3owwdHNqlrKqePz73GbdfeQZ3Xze+WVk0EuO1373HZQ9dxrxFW/DtqqQ6v5rc04WUXumoqUiDwZcvPI3pL83ljv93LffMeZVAPMqvRl+D3mv9EwrCFRP2GXO8mT/fSswbjDJ39XYS8ILCR/M2EnHHcYmL0toG1ucWc9bowQBM37WN3LpKnl23nBtPGEOS7UNYEQowdddmeiUlsbWmslk0ma+fOh6/7XPoc3tw0fZDYVrRy5zqupAcOuckf7gx1GCXP4+RqcMOKDst60JOyzpwabkjJCbfAcl3tF+xB6JmDe6E08BWWqbZetzVzpCZkMzXRx/c/ewZ9Cxr0fZwlKHDEWPyZ2sxTOWDORu4fuIpSNzEXxuk39AcFry/nM9enkthJIJrcxkgEDdJynTz0Hcsx+Kk5AQ+fnEOHzw7G8+J6awPFQMQTIvw/266uNlYlZWV1EUDrKnJJVInuBJMht7hoqoimWHpWXhNL7P35JKAi5gohr3FGDHiPLFmCWGNUhaq54Odm7lrjLU0u73GCqtmqkmqN4GNlWWNyvDKoe37Ru5lU90KFlV+Qp2rjuH9v9usrCFWR8AI0C+xY8uNXYE/NBuvuz++hLEHlG2p387Uwk/5xdjvk+PrPkvaowlPwhmkZj9/pMXomRxFyvDosXt1OGrJy6vktVcXNjtXXtXAzCVb8bhcNATCfDRvI5P+8TF//84rhIMR3n98OimZyez6ZB0Z5QGSE70k+7zEcis489yRnHnuSAYMzGT2m0tISvbx0t+mYpiKofCXdTNbTF+UkZDCv8c9hHdLP06qPI0bh11KsfpZUlfI3eNP5yfjLqRPiY8XvngTZ/W3lFqp30+9EcTrdoHLZFnZvmz0FwwYyic3389nt3yVz255gPtPPrjYkB8VvwpAQXAX1ZHyZmUfFL3GK7v/haHd79cIYJp+qup+S2Xd7w+4h3EzzpKqFUTNKJ+Wfn5Y5DkeUDUbjZ6OKcQKZtHe0VNwZoYO3c4bry9i3do9nH/+KEaeYGV5N0zl0nNGE48auDwuvFGTZTM2YBrKm3/9kOqyWpJSE4n5w3z521cz4apxACSl7fN3mDVpMYH6IEkpicTyGsjMTyE+OomaWB3BeJQU74Fm8Gu2lREIxVm/q5Qd3hpbgRq8vX0dZUvqqKkLsnlrKSddbBtveOIMGBDG6/IQMWNk92q+l1UZ9rOroZxzeh+c+8SW+lVUR62Zr4nJtOKXuH/4jwEoC5ewpX4FpkZYV7OMM7MvOKgxOkN98D1MjRKN5xGKLiXZd15j2bLq1fjjQXwJPpZWreLqfpc5s8MuoLrhKSKxrQzo9cSRFqXrOYpmho4ydOhWduwoZeuWYnw+D2+/vYyf/fyLAPTvnc43b7+An9/zFPf+4FqWfrKWurogcdNkzaIdfPeJr+Gyg/wOGzuIPkMO3Esbf8Up9B1iPYxLQtUMGj+A1JwUElxeklsIAB2LG7wydxWGaRA2TKIFJiPGWEYJJUX15JdXEXCHmPT5Gq45ZwxJPi8Dk7L447i7G8NkZyU0zxzx4s6FzCvbxhsXPUhmQjKdJdmdxqg0a9nV60pmSPLwxrIZpW8RNSO4UD4rfZPTs85p4kbRtaiaFFZ/j1h0OWgEMKhpeKqZMjTVYHDyQCLJMVwiBI1Qt8jS1ZgaJ27WkeDueYrbMKqpC7yNaoxwdCOJCYfHkf+w4ShDh+OdJ345mbMvHcuitflEowa+RA8b1hdQUFDF4MHWQ2nBR2spyi3n7SdmMmBoNuEkD6ZhktY/g1MuPInUjJaVy4ZVu0lM8jFq7ECGnrQv/dGM+Zsp3FHB7de0HMEkEo8zblh/QnZmiUHZGTx4hZWn8IkpC9kYKyCuJnWRIDuLKjl1RH88LjcnZw5usb+iYA1zS7dhYjI5byVfH33xAXVKwyVkejNJdLccbHpoymi+PuLngLWv2dRtwWNsYXBCA+DCJ0VE4vUke7vHojAQWYI/vACvuz85aV9GRHC7MpvVuaj3eYyRUW1G8umJFDVMpsQ/jfH9X8YlPStcWa3/DVTjCC6q659iQM5TR1qkLkMRx7XC4fhm95Ziln++mZ0bC/nuP+5h4kQrhJW4hD59rDBn0UiMKc/PIT0rhZK8SgZcMAKzfzYajOG+YFirijAWi/Ps3z8lMSmBv/7vgcYkvsFQlBcnL6QmXM0F5w5lQFbfA9qmJvr4yS2XttjvhIv68HmvKD6XFwgzckj7SmdS7jIC8QgJLg/v7VnN7cPGk9Fkdhg1o7yw+ylOzziT6wfc3G5/0Hxf8Oo+5xEzCgEQ8ZHYTXEnVU0q6p/CJWkYZh0J3jEk+87oUFvDDOF2tZ5VQtXs8nicnSFuBiion4Rh+qkIzKFv6lVt1jeMSoLBt0lNfeSQ0lN1lGBkEWCgQCS2FcNswO06MOza/qhGkC7MftEtHGWuFY4BjcNBU1fRwL+/8/IBhhaTn5+Dx+umviZA8fZS3AMTGTA6h3HjhuLzWW4JuZuKCDaECYeixOMGc6auwqgNY0YN1i7ZRZ2/5SW4hbM2428IU1Vez6rFOxvPz5i3mbpQPYZh8syHH3f6WtbV5uMRN4aaiAi7/eXtthmT2Z/bh57FjYNP58bB4w7IOLiqehnBuJ/l1Yupj7Vtbq8aIVz3O+KRZY3nstIeoE/mL+mT+Ut6Z/wIVzckhgUIxzYTie8CEZQ4NYF3OtQubtaxtex2ApH1LZbH4nmUVNyOaTZ0pbidosT/IYYZQCSBvLoXMNvxXwwEXiXgf45odPlhkW9wn7cYOWA5IwcsZ3j/zzukCM3oOiJVd6BH8L52FBVp9+gpHJaZoYgMBl4F+gIKPKeqj9tl3wb+DzCAj1X1R4dDJodD55mfvcXcyUsZfc5grv3S5QBUltayZXUeIoIRN/j03WWsrE7g9L79+PPl+36Vn3TmMJ6d87PG93/8xWQWzdtKYqKXQTHLcnQvYSNGXTREjjeF915dRI2/HhTeeWkhEy4cDcDkGSuJmTFcuFi2uIzq26vJTmp5dmeaJoH6MGmZ+2ZxD51wBQ+dcEWnrv/6QdZeXzAeZEXNcjK81gzJ1DhxNZld/ikgRM0I8ys+b3N2GAtNxzSKififwJ1w9mGZlewl0TuG4b3faEwg7HF1bCm2ouFtokYxJfXPcELvA5f36hqeJRrfQn3gbTJSvwrQJbPEzsw2A7GdeN0ZgCDiJRIvJ8k7sMW6hlFJKPg+Ikn4G54k4TB/Dh1BVYn5n0HjhcRD7+FNuf9Ii9Q2Pev2tcnhWiaNA4+q6moRSQNWichMLOV4I3C6qkZE5OiJv3ScU1tRz8IPViIifPbaQq6++1K2byul/4BM/vzGIxiG5YA8q3g3C2YvZuX7uewcdxYn9Nq33+RN8DB/3lbOOHMoJcEgsWQPcRecetZQakMhQpE4Q1KzeC13MQvLdvDcOfdx0W0nMS2/BBAuGLkvYv+NX+nN8jIrFZDbC6XRglaV4YKP1zHlpQX89c1HSPAd+r/AoqoFfFo6ncFJQ8j0xlhV8Q/O7P0b+ib2J25aM5G2MtWrRogGngdXL8x4HkZ0GR7fuZ2SQdVAjQJcnmGdaheK5ZLoGYLP23oItpaIm3VUBt7G4+5DMLaJQHQDKQn7Qs7F4nkEw3Nxu/rQEHgV06xANUavzF91apz9KQ7tYE7ZG9w15Oe4Xe3Hez2p1y/YWPEDEtwjGZ39zTbrBgNvodqASAqx2AZisTUkJBycu0x3obF1mPHN4O6NEXgDT9KtSAdmk0eMHvZjoi0OizJU1RKgxH7dICJbgIHAg8CfVTVil7W/NuXQI3juF28Tj8YRl9BQ4+fjl+Yxdf4OzrtgFF9/yNqXC8djTFqwEc/sYlx7Avxn8iz+8427GvsoKKjiqSdnceU1p7Glvp5Y70RQKElR/rd5KQv+t47//v5ePixYS9Q0WFy5k/LTCkkdCmBS06u0sa/bx93I7dzYTEZV44AYkbFonPf+N4/qigYWTF/H5TefhapSH55LemLn880F4wHmVczFIx4+Lf2Y01JKqY/mUx1eyoMjvtWhPozYZtSsB00FjREPz+q0MoyHZxDz/52k7LcRd8cMXGJGFVvLv8GgjO/QO7VzGdIDkXWoxlANA0J9eFEzZRgIfQrEUBVUw9QH3sYlSaSnPoDX07JBUnuoKgsrJlMa2sXW+mWcnNl+dJaGyGbyGpbhN5cxJP0OEj2tW5QmJl2Oxzus8b3bPfSg5OxOjMgiROOgfhAXZmw9bl/3u9wcFLaf4dHCYd8zFJFhwBnAMmA0cJGILBOReSIyoc3GDoedaCzOP56cQV198z28k88dxbnXjuOcq09n7LmjKCirJxyKsWDuViorrL2MmGlyZiyd1Mo4yck+gjPym+0vvjd5BSLCp5+sw4gZqIC6YNGaXcycvoHYrhCP/W8KYcMy5f/fzvkUBsvwujx4XV5y/YVtyL2bvPKbMYyaZucXf7aB+pogyck+prwwj2gkTiCygj3VP6UhsqjT92dx1SL8cT+qsN2/lVz/dhI92Wyve5eYGexQH56EM0jtM5vkrMdJ7TsPX/rP2m/UBNUY8cCzqFlPLPhGh9uVNbxJ3PRT3PA/TI12asyMpIs5beB8Th+4iNMHLqR/evMMFRmpX2Vgn08Z2GcaKUk3IVjGHvX+Fzo1TlOKQzsoC+8myZ3OkqopGGb78Utza5+i3jQx1GBd5T/arOv1jiEp6frGw90DXTE8qY/g67MAX5/5+HrP7bmK0Eal/aOjiIhbRNaIyEf2++G27tgpIm+LWKbCIuKz3++0y4d1pP/Dak0qIqnAe8D3VLVeRDxANnAuMAF4R0RG6H4WGSLyEPAQwKBBg6isrOwSeerquiaGYHdypGVcsTqPbdvzmT4jkSsm7gvPdc4Np3DODZZPVEVFFS88t4icPgnE4wYff7iU675oWSMOzgvjH55Ngs+LETPYvHY7fQf3orKygZ0799CnTwLRmEFSmpeGZBNB8IkHd0EMGZGC7g5y8lm9cCe5SFAvX+t9DXnB7fhcPk5KH9Pqd6Gq4S2CERMzMpmM5Jsb7+PuXQX0G2FZtHq8bnZsySWe+jaR+EByg+8wKOskOrPR0TfanxtSrRlpZWg9hOLEIy7AJLdkFREzncyELJLd7fsg1tUFgfaX/iLxQir8bzEw83sIHozIMuLBBJAToWEpvtD14Mposw/DrGdP9RLgBKIaJTf+KZlJ57c7dnlNPnWRHWT4Oh5urqImFzWtjB7+hp1ovJSDefRsqFqGN5iB2zZ02la8nj6JB87e9n7WcbOegvJqxBiIAPnBfIa5C3FLK4kqDyNH+v/68NDlsUm/C2wB9mbe/gvwL1V9S0SeAb4GPG3/rVHVE0TkLrvenS112JTDpgxFxIulCN9Q1fft04XA+7byWy4iJpADVDRtq6rPAc8BjB8/XrvSz+lo8Jk6UjJGY3Hem76JcET4eNZ2rr1yAhnpB5rR+/1hhg0bRMxOa5Sekdko881fv4q5701lbO++3HPKaQwbMxCP142qjwsvPBUF/LEwbwZW4hrmIWLEydmaTLgwjCS4IKp8MTCG26+xlg39cT8vbZ2NW9xMGHo2PveB5uXR2G5qoh+SnpiE8hpZWbcBGeTk5HD/977YXPbwcnIr55HiSsY080lI3Up64kUdvkdWYG1redDUSzGsFX8AYib8ZevvOCXjNG4ffHfH+uvAZ51b9Sc0cS7upIvJTrmWaKAcI9GFtTXvISE9hLudPcBAtIJM9aHEAR/JSTXkpLc9tqqypfoZIuZ6Ls1+FberY0qlV69nUbW+G4gL10Eqoy9k30nE3Lecm+zOaNXAxbqPOQz1XEVNxLI6douP9KwkktpYKj2cHA3PnkNC6DIDGhEZBFwH/AH4vlgf/GXAPXaVV4DHsJThjfZrgMnAEyIi+0+y9udwWZMK8AKwRVX/2aRoKnApMEdERgMJQNdM+xwOmeWrdlNbGyQpMYFgOMrnC7Zy83UH+p+lpibyre+2nPF9m9lAcZZSbpbzyAk5uDxCeaiWPr0zuetuK7pJeaiecJ5i2kYmI8fkMObGfX6C/fpnNr5eWLEAwzQwxGBZ9VIu7n3JAWM2hD9HiWNqCDAIRBYD57RylW4yk/fliJMOzMxawyVuXLJvBrigYgZRM8ra2tVc1ucKevkO/eEXjG7FH1mJx5VFacOzZCZfQULKA5DyQKf6SUkYw9i+L3aqTW1kM/5YPi5fiAL/ZwxLv7H9Rlg+kl0xQfC4EvC4OudreWr2fYc+sMNB04V7hv8GfgTstRbqBdSqNgbtLcSyQ8H+WwCgqnERqbPrt6lbDtfM8ALgXmCDiKy1z/0MeBF4UUQ2AlHgvva0t0P3EInECAajZGXtCzd2+imD+fWP9v0SH9i/c1m2TVWeXrUcjZoEaiK8sXEd7oQIzy1byqQ7rueULGuZtU9SOt8e037+t7gZZ3HVQssyU5X5FXNbVIbZqfeTmbIvHZBLUqgKVrfYZ2riWaQmntWp6+oIISPE3IrPUZSIEebz8pkdnh22hGoc1RBVgWlWrj9xEzOq8EdWkJ64b3nTMMqAOG53y+4Dh8L22leJawi3RthZ9waDU6/G7erhjt8ORwylw8l9c0RkZZP3z9mrgQCIyPVAuaquEpGJXSpkEw6XNelCWp8wf/lwyODQNm+8uYQtW0v4yx/voLShgYKaOs4ZNphTxw4iFIvhdrlIcHcuOW1tOIRbBHeZYpbH2V5cxqKynYQCHv60YhqvX3Fyp/y43OLmwRHfIGYbTrS0RAog4sYtR9bc3FCD0zLGYdjLg719h+Y1VNfwDJHocvpnPUmftH1KNcHdPLWTv+4x0CDp2S93uY9cn+Rz8aTUkpwewy0JKGZjWdyMMavsDSb2uZ1Ed0obvTgcN3R8mbRSVce3UX4B8EURuRZIxNozfBzIFBGPPTscBBTZ9YuAwUChbZeSAVS1J4QTjs2Bqio/c+ZtxTBM1qzLZ/KebawtKuHdr92NV1187ZFnOPmWk/n59Z3L3p2dlMzfLr6KB1e8S3JKCjWFDQTjiicAa7a62ThhI6dmndpi21c3rmHikOEMSc9sPCciDE4eciiXethI9aRy2+C7mp2LmX48ktxp9w3DrMEffMtyu4hvIrEVt4tYdAPxmBUNJh5dgdfXcozWg2V4+s2kRSvJyT5wuXdz/VLW1Mwh3duL83M656bhcOzSGWvRVvtQ/SnwUwB7ZvgDVf2SiLwL3Aa8BdwHfGA3mWa/X2KXf96RFUcnHJsDH3y4mljMmsG88OoCls/cROyTfKau28K7k5dQtbGCuR+spbTB32ofqtpiDsG356ylrM5PQyjC9u2ViF9IKnAh5W6m7lzaYl+5tdU8uXoZT61e1mL50YiqycrS77On4f32K+9Hg/8N0Cgg1NU/0eJ9Bgj6nwINg0YI+luv19XEzRiLKj4gyZ3CyuoZhI2OuZTsT55/XWOQAodjA3VJu8ch8GMsY5qdWHuCe/12XgB62ee/D/ykI505ytCB8ooGsrNTyMxMpqi2Hvfnxbg31vD6tEW8++pCNNGFbKvnhXmtx2t8c/pKnn574QHn07OSSMjwYKrB+ecPJlsVr0dIbRDuG3UzYSNEw35xO/+3biUJLhdLigrYVdPyXl93Y5oNXapMKkKLCcTy2V03ibgZRFXJrX2dmB1f0jCKiMe2tNg2GluPSCIiCRhmNabZ8j1xuTLwJJyJJ+EMxN2H/QN/dxc7/GtoiNcAQtgIsrHuwO9Be1SEC3g9/zfMLz/QTzKvYQZrKp/sAkkdDjvSgaMTqOpcVb3efp2rqmer6gmqenuT4C1h+/0JdnluR/p2lkmPY7ZXVTI8M4uf/PC6xnO//et7LJ67A0xImVdBwADxCsRMVs/dhl536QF7UYFghHdmrqEs0MDlF4/mxAF9KayoZdmWPczdk4cETOK1capqQ7jLvHi8ghlXPlu6jd6n7KI8UsKDI36EiFBQX8uc/N14XS6CsSivblzDby66/LDeF9UoZZX3kJ76TVKSr+2C/kx21L6ASxIwNESRfzqJnv4sr3ySQLycU3O+T6D+n4QCZZjmGQcE5O7T67lWem5OWuafOyVXxKihIZpLTtKBBkRRI0RCK2mn9mdw8mhuGfTtxvc5vs4b73xW+gyGmiyvns7Ffe7GYxvmxM0wm2teJW4GGJVxE6mtxBV16IEIcGgzv8OKMzM8TqkKBrj9xTf598LmUVe2TN2IxuJoLEqoqI4f/vJGRlw5msf+8SX+88O7uO29N1lWVABYS6OmUcHCNblUhgLE48qPX54GwAufLOe5j5YSrotAg4Hb7aIhL8ItF4zjpnNO45YLTqdPRgKb6ldRHikiN7AVgKzEZH594aX85LyL+c1Fl3Pz6LEcbgLBj4nHC6lreNKy3DxEQkYZcdMPYgWLrg6vYW3lvzBU2V73AaHIGmLRJahGiYTeO6SxTI1SFVrQobo7a19lbcVviBq1zc5HjCCv5/2M/EDL2Sj2J9WTyQlp4xqPzITenZK5IlzAbv9m3AhRM8r88jcby/L9s4ibIUDYWvtWi+1NM8LWip8SNRyvrJ5GV0ag6W4cZXic8sfZ8/BtCPLmnNWNyW4Bhp49EsGEYIi0Uf0pdsfZHKhlVXUFn5fnsyG3jG9OnoaqEo8uxV91N0s27iIaMxBDKN5ez/IdeazaXojX48ZboaT7fPRLT4WAwe2Xns6j91zKo/dcijloM8GYganKrLIPWLAzD9M0uXrEaK4deSLXjjyRcX37HyC7qnnAua5CNUq9/xnElYZpVlBTfhNmvPWwbx0h2dOfSwa9y2WDp3HZ4GkMTL2BqmgRLoSIGWFzxWOoBkDjhAIvURbczJ4OKqL9qQjMZGvlL/FHt7ZYvrRyMqWhnYTiZZQEZmFqnLz65vuYG2vnUB+rYHHF5MOy71gW3kWCKxGPmCSIUBkpbizb3fCxbbUqlASXEDXqD2hfEfyU6tDnFDe8eUCZw5FFXe0fPQVnmfQ4o7K8Hk128fnCrWQWG4RjYf67aAk/mmhlac9MSUDCEdQNqRi8v2Qj2anJfLJ6G5WFEVx+CAYivL1xA9cPeBI1q0g/s456NXE3uFGBP02dQygSw+f1UBEI8I1bzyMrNRkEstP3OaWvrahl1o6B3Hl6BH/Qxa/mzOLOM0/j4Qtbt4I0zToqq+4nK+ufeD3Du/z+xOK5qEbB9uuLxrcQCbxEUsYvu2yMQLwGrysd1MpmEZXeJCVfQDiaREJinJnlrxAy/Hx5+L865WRuapQ99S+iGOTXvcjJvf/arLw6UsTK6mnsCW7k5OQs4mYAl/jIb3ifYem3kuDOIGIEWVnzMUnuDKqjxewJbmBoymlddu0tcUrmRE5OTkbrfgJ4kOyHGssu7PsHYmoZ5LjwkOBOb9bWNCMU1L+Ix5VFuf9DBqTdg0MPQbo8HFu34ijD4wBVRURoqA/xi++8ztjrRpOyK4o0BPHhY9m6PLCVoVlWieEyUI9QvauQ4FlZJHoSaQhHaCgM421QcMHfP/+cq+/chcuVzfisrXwcPQF8ghqQlujj4TssR3ARuOCU4aQkNn+om6psLemDYVQzQC6koKwONdcxZd0m7jjzVLKTW96v8gffIhbbht//HFmZf+rye5XgPYmB/eagZohA5Y0oBvHwTMyUB3B5BnXJGMPSrmZI6r59UJd4EHERilZS5S6kLvYfALbVL+DkzI7vl1YEZhIxynBLIjXhJfijW0lNOKmxfHn1FNzioSpSQCi5P4PSLBcIF24Uy5q4IpKPqQamHdgj39/9ylBVUf+z9ppZFA2+haRZGT8SPdkk0np+xerwQuJGDS5JxtAwFYGP8XHo+7wOh47ldH+kpeg4jjI8xtm4rZh3P1nNr79zHZ9NW01dbZDN03eQ0yCEiutw5aRxmatfY/3qYQlELu+HW1xEExP4+wPXk5qeTDge50vPvYVhGIgJ1Ct74+XWaRLZJ0VwSwImysWjhnDV+BNbFshmQVEexYF6Mn2JPL92JbGSEKI11IeVd1ZvaDY7rPAHKKqr57T+yQQCb+By9yUUnkNqfHe3zA4B4pEFxM0GtoRTGeuLEgt9hC/tYQLRHbgliUTvwStGEcEtLc/4llW+S9QMAi5WVL3PSRkX45aOhYhzu1Lp0xharrmpXn2sklz/KlziJm5GKQrXcuOgHx/Qx6DkMXxz1LOdvKJDROssl5C9efmMDhn/AZCZeA5j+zzR+D7RM4j6aPctozt0EkcZOvQEVJWX31vC9t3lLFq2g+nvryYlNRF/Qxgj6Mf0gvoDnHT6vigmqVcMpf8EK4JIisdL/0FZ5CSlYJrKoIQ08l21uEW4oPcw0vt+QmGwkmtPgNvG74v4kuJpP0TXstICRISoaSnX8YMLyfKWI64MhmY3j3P61KJlLN9TyCt3ZKMaQvAAcUKhGXjTvtE1N2s/PIkTKQ79hCU175Kd+T1Gp0xE1WBn1a/wurMY1etfuF0eXNK5qDztcXLGZXiIEDP8nJR1PZ15muQkX0JO8oHh6VSVFE8G1w34f41RY1LcmV0k8aEjrkykV8dTTzXF40ol3bf/zNUxpOkRCIfqR3hY6bAytMPaZNI8OKpDD2bdliL2FNeQnJjA6+8sIT0zGdMwMYw40T211mM2bvDmpLlcfrEVJ/Sxc65osa+GSJgyM4CRrGjEwJvkYWX1Tv61bSq/GXY7fb2dCzf2o/EX86Px1tJsJLKIuuqnQJJAg2RkXQ9Yy3t7ampZkJuHaSozdw3mjnEzGvtwSWpLXXeIQv9s+iWfj8eVRCheg6FRUr37goObuFhSsxC3JLK4egGjMq6mOvAx0XgJUaOMD4t+T+/EsVzU5+DjjbbE0JRT2FLzPC5RBiaNwC2H9ntVVVlU9ntGpl/L0JSuj8Hq4NAWR9MyaZu2PCLSX0Qes4Nrh4EyICwi60TkNyJyoKmfQ49hxvzNREIxGqqDVEUiPPKL6/nPKw/y6N9uI3xqNvHeaUQHZCD928+1t7ykCH9vE5E42euD5EoNr+fNIRAPM7d8Q5ttVZXpJdOpjx1oCQjgkjSSku8iKelGkpLvRmRfLr6Xl68mGI1hqvLqyrVE4qm4XVm4XVlIB5YPW7I8rYvsYG3l38lv+BiAZRVPsrDsr80sJ3fUr6QmWgpYhic761eRV/0YcbOK2liU/MAGNtbNIRCvbVeGzrC5djKmxlE12VA96ZD7qwhvpCS4kvXVL3erFa6DQ4t0sdN9d9Lqz04R+S3wf8AnwL+ADUA91kbRKcAVWFkonlTVXx8GWR06SO62EuZ/toFHvnYJ9ZUB1m4s4PsPXMYJw/pQHw2TlJmIKzmD6KA4LhX2BKMYponb1fpvo4uHDGN4dibhTwuRKPjXVFAwLESaJ4m1NbspDdfQL7HlrBY7/DuYUTqDuMb54oAvHlDuTTgNb0LLRhojc7K5bqy1/5jgdhMzDDqSABdANYRR8zDu9F8A+2TbVvsa4GJn3TukJZxEWXgjACWhtQxItlJUZfn6clHvfZkvkikjKjESgF3mYFwSx1SD1dWfdOnssDaSh8dl/TgJGhXEzTCeDuYN3B9VZUONlXcwEC+jOLiCgSmtpbJycOh6epLrRHu0tQbjBUaqam0LZauBV0UkEyvHlMMR5sV/f8ZZF4zitPHDef3p2Wxev4cRpw5ixeY8AsEg02ds4LRxg7lvwevclTqOBFzExQoQ0TvmIxKLUxLbxYiUk1rMduDzePjNkAt43PgQX18PdTuD9NFsPIluMkmgIlzXojLcOytMcCWwuHIxE3tPJN2bfkC91vjSWeMO+p6YoY8gthkz8CLwKAB10V2UhZbhkgQiRg3LKv5D1KhHUdZVv07/pHGICH0Th9M3cXjjNVRXfxWvN4moqQTCcdyudBTID2zgIrpOGV456G9d1ld9bA+1kd24xIOhMXY1fNJMGaqaxEz/Ae4KDg5dghwjytCOFN4mtqL8WVcK5NB58neVM2f6erauLyThux42b9lN2BXihf98RjS3mKT8SpZ5Enhl2VJKQ/V84N7A0G/1Zlt9KTVRPz+acBrl8Xwm7XmKe4Y8wsjUMS2Pk1tOeqY1a+mf4uOhzKs46bRB1FRVk53ei3WFRWRmKUNT9lla5gZyyQvmkeBKIGSEWFi5kGv7d7/pu2oIDb4Crhw0ugzMQiAHt/gYmX5rY73C4HbcxFBMEt1pmBo7wNJTNQRq4HL3J9ENt+f0Jjv7qW6/hkMl3TuEqwY9AXbS5IT9wrztbviI3fUfcunAp3Ed4t6kg0OLHMt+hiIyCjgd2Kmqa7tcIodO894rC3G7hYqyOl55cibBcAjxmZSVVuHdXYHEDRqqSplSvYHYDoMtA4vxpfrQggbMHOGZHZ9z41AhZsaYXfZBq7PDW+45j1vuOa/xvakmf9r8AhclnoanNsD/mzyF/meW8Pblv23MNdjb15svDflSY5t+if0O6Lc1VM1OpztqbBtdDmYDiAc0jhldAYwj1TuIsU2cupM876PGNgD6Jw7C3YKTu8uVTK+cVw9KjiOJiJDqbfl+x80wO2rfImLWU+Sfz+C0zqXncnBoj6PNz7BTTxoR+RrwIXArMEVEftMtUjl0mIrSOtatyKV2cz7+4kpqYjUM+06QkQ/HCWX4IW6iQMqaKhqKQxiFLsh3M0KSyP5fNSM2hnF7a9jcsJtEVxLFofzGOKHtsaFuBzv9e5hTvpIn5ywmGI1RmpvMm/kfNdZJ96YzIXtC4zE4eXCH+o4YlSwv+QrhePnB3BYk4SLKk//IcvNG3L3ex5V0fYv18uqnoPbMqaDhEwwzclDjHW3s8c8gZgZwSyJba19rdLJ3cOhSjgUDGgARGaiqRU1OfRk4Q1VDIpINbAMc45kjSK8+aXzlqxfxn//bTmLIyyWPDKAmy3q4b9myAxNQEVyGSc78INVDk6HWjefzYnwxD55ZcU67cSRpSVnk+BIREeJm+w9GU00mF8zE506gvCLClvIiXB6DYGUyU3Yu4+6h17eaib4j7Kl7i0Aslz31kxid/b0Ot4uZEbx2xoMV1VOpju7hxMwbgDRM08Dlau4XOL7P74nb4b7ckoCrFWf4Y43q8GbcYhnmKHGC8VJSDyGQgIPDARwre4Y2s0Tkn6r6vP0+CEwQkSXAhUBNt0rn0IxwMMrSGeuZeNN4AAzDxOUSPnvxc6KxOLWVDfTZMYR77rHiM86+/T8Ury8BBUEgKwm3uDACBlWz/GSnpVBbX88Jawdxz1c7l528NFxJWbgKRTFj4E0N2V+mOEbUQ16giBPTRxzUdUaMSkoDH+Nz96Ys8BlD0u8h0dO+H2NxaBcfFD3N/cMfoyq8nZpoPoYZYk3V25zkvpPndj7I1QN+yrDUfdFt0hKGHpSMRzvj+3Qo36mDwyFxNC2TtqcMzwf+ISJ3AV/HMsl7EXvPEPhq94rn0JTP31vOpH99wsDhfRh56iD+8K3XmHDRKHaszUdNJW7Gmf7SPL5wz4UAnHjBiexMMXH5lYR0Lz+9eiIDM9OpLKhmUc1iisJ78MWVdRUbuIeOK0NVZUBSHx4/8yeompT0X4R5zs/wupJQDTGo93t4PR1bDm2JyuBCTI0CiqkxKoPzGZR+W2P57oY5JLoy6J9yZrN2CyqmUButYFX1bPzRdUSMOlTjFAZXE4y5iIif+eVPN1OGDg4O3cixogxVtQb4qohcCXwMPAdcoIcjr4tDM8LBCNNemEs8ZjD5qZlc+8Al7NhUSFlRDSkjBxHcshtJTSFz9D4llOTxku7yEYpEcIeFk4f2Y3S/HDhxCCedm86Tu55koCuTsFlFQbCgQ/t5u8qq+OuH8/j3V24gKcFajvRSg2uv8hMXcaOoTWUYiJWT7Mlp1ThmQOoN9E6e2Pje69oX6i1mBllT9QJeVzLXJT/VaAVZFNpFcWgXad4sVtXM4sreVxKMrgT1keodQH7DVlxJHuqixeT5lzsK0cHhMHA0zQw7tKKrqjOACcBIYKGInNROE4cuZs77K6gorqGhLsjahdt5+W/TSUz0EgpESOrrw0xwETfCXHTHuMY2D5x/JkN96fROTyFDEkj27PvtUxurpV9iPzK8GfRL7EdNtGMr3i/PX8XmonI+Xbe98Vxy4qUM6vOedfR+lyTfua22Dxt1fFr0AwoDy1qtI+ImwZ3ZeEiT+J876z7F0Bhho449/n2JiXMb1mOoQdgIYpgxtjfMwiuKz+2lNLwVUw3ARFFWV0/u0LU6ODgcAnZs0vaOnkJ7BjSjgb9hKcH1WMukJwDviMg7wJ9U1eh2KR3oM7gXvpwMNBglq186VeUNRA0TjwsqFu7BdCvElddfmsWll1jRXKpqA1TVBUnyeQmGo3y6Yhtfv8Zyuj4z60zOzDqzrSEPYFdZFStzC8lOSeK1hau5ZtyJJHo7552zte5DwkY9a6tfZ2DK2Z0KdG1ojK11Uy3LR1U21bzNsDQrMPWFvW/iwt43NdYNxIuJ2BncTdOktsYgs5flXJ7WyTiqDg4OB0nP0XXt0t6TbBLwOlaUmeuBJ1T1VhGZAPwGWAaM714RHQBq/VFITiQtM4WGuMmVD17M+/M28M07z+fpP75NdW0AEUhvkjdw5IAcHvvKvgwQA3MyWuq6w8xYv524YeISIRSLsyq3kAtOHNbh9mGjju11H+NzpRKIl1MUWM7g1PPab2jjws2E3o8Qie4gEl1FZtq9jWX7+0WmegeS6h24rzxYSU5iTofHcnBwODSONj/D9pThCOApVY2KyB7gXgBVjQA/EREnDP5hIjnFx5hTBrB2zhYuvHk8M5dsw+VxM2fNbiquHUxtJAIKnlNHNrZJS/Jx7pius5Z8YOIEbj371Mb3OWkpnWpfHy0i0Z2JqoGXZGqj+fQzTsbQCIme3u22F3ExMPkcqsLPETHXkON9sNPX4ODgcBg5hpThFGC2iMwHJgLvNS1U1VXdJJfDfpx7yYm88finBANhKivrqItG8fm8bNxeTGyUQVIE4ilQHK8DLD9A10FGb2mNRK+HxIzmIb0aYnWYdCwbQp+ksXxxyNPNzm2s/D2heBHj+z7VYtSb/YnG1hCNbUHER0PDU/TKfrLjF+Dg4HD4OMr8DNsT9evAP4Fa4Leq+rtul8ihGaah5O0sZ+PK3eRtLUG8HjbO38Z544fjH2DwhYljuHrsKDKqvIwMZ3DLGadQG/TzswV/Z0+wAIDlVRuYV768y2WLm3H+l/sPNteuOaj2gdgeKkOL8Md2UxPu2O+qQOA1IIrgIRJdTiy++6DGdnBwOAwcKxFobBeKKYdJFgebqR+voVd2KhedN4p33lnKgunbOXFEDqZpIiLEoga7G0opOyFK1ukZuFb7yUpKQvzKKDObf0//mEXLTAbkfMI3T3qAN/d8TMyMMyH7VJI9SV0m5/q6FdTFqlnnX8F5xiX43J1LNbS77lVMjQHCrroXyEo8q93ZYXrad0lJvsd+J3jcA9us7+DgcOQ4JvYMReQB4OW2fArFenLdr6ovdYdwxyO1dUHenbqSpCQvo0b0Yc2SXdTXBXGnJXPNd68CoD4WZkbqHvokpfDaulWkrXDhdgmRqMFrHy9jedEeNO5myYpiBqTNImSEUWB2+TJuGDCxXRlUlc+KN3J5/7F4XS1be8bNOHPKP8YjXmJmhJXVi7ig9+WdulZDQ6R4rT1Nl/gwNYxb2lbWHs8wPJ5hnRrHwcHhCHEsKEPgSuDnIvISMBPYrKp+EUkFxgJfAO4HVgKOMuwiPvp0HdG6EPFAhN//dgoaN0GEDat28+yjV9OrdxrPbFhGfFsBYSNOXAxuuf50Ts7sS30wxJKd24jtBpdbKdyQxEcnLMST4EYQZpYu4up+F+J1tb1VvKoqj79v/hS3uLhiwMnEY7twe0Y0m7XVxapxixufO5FEdxKl4YJOX+vpvf/Q6TYODg5HCUfZnmFb+QzvFpFzge9guVakNnkY+rEi0nxFVZd2u5THCZFIjM9mbyK0tRQS3NRkNjCkfzIun4e4QG11gF690/jayeO5c9S+zPAZvkRembOKz9Zu46oTTyB2kvWxetwurh04jPRMy90iQby42zGqUVX+t3MBgvDyroVcmOPDX30fGVn/IsG3L2pLL18fvjfaSlpSWVlJTs7x47agRjlm/a9xZfwN2S9HoIODQxOOkZkhtqJbKlYIkFFAFlZw7h2dcbYXkcHAq0BfLPeT51T1cRF5DHgQqLCr/kxVp3f6Ko4RvF4PN5w3ikmLduKOwr33X8SJp/clJSWdpMQEBg2wMsl7XW6yEvctJ9YFwkxdvpFwLE7vvmn8/bpzWhuiXVZV57O9vhSfy0txsJaZ+S9wbmoDfv8TZCW80iGLz2MdM/gGGl2BGZqCO+Xe9hs4OBynHBN7hk2xFV/Hkty1TBx4VFVXi0gasEpEZtpl/1LVvx9C38cEqoqaJnPfWUEsEicGbJy9mcuvPqlx1mWqSXGonEHJzRO2Tlm+kYpAPckJCbwydzVXnD4aXycjw+wl3ZvEHUMnAGCYNaTxFOLKIh7bQiy6otns8HhEjXI0/BG4ctDgJDTpZmd26ODQGkeRMjwsK7qqWqKqq+3XDcAWwDEDtKmvC/K9+/9HRVkd7gQ3XlG8XheKsGh7Pn+ZMgeANTVb+MPmZ6mI1KCq/HbTP1lUsZzi+kqCvhBBTwP9s9KoCYQOWpbR6X15+MRLubTfGL42vDenZ2bjcvXG7R6KYRS138Exjhn+FDQKmKABNDLvSIvk4NAjUenY0VM4uOnDISAiw4AzsEK5XQB8S0S+gmWI86idKeO44o0X5rNtUxEvPTWbaNQgVFSOeD34A0N4e/E6thY3cNW40UyqnMGurcJH2XM5LXMkW+p3UBwqJWvgIHyuBkyBe84eSL/MtPYHbYMd9WX8vxVv8bszbuas3u+0Wi8Ur2Zb7WQGcfMhjXc04Uq6FXwX7TvhHnDkhHFw6On0IGXXHodVGdqWqO8B31PVehF5Gvgd1j7i74B/0EKORBF5CHgIYNCgQVRWVnaJPHV1dV3Sz8FSVlpHWloi61Zso/+wNHbvKuTciUMIlxUgwMBzcwhpkCEZPp7+eBbRbCWptBerNueSm72NPrFsjIiwtS5Ef1JRhHc2LOCE04cdklzvbV9MVtTN+5sWM3Rs6yHXdtV/TEFgKeo5ATit1Xo9ga79rJv+2Giwj0PnSH8fO4IjY9dwNMjYFXTFzE9EEoH5gA9LZ01W1V+LyHDgLaAXsAq41w4d6sOyUTkLqALuVNW89sY5bMpQRLxYivANVX0fQFXLmpQ/D3zUUltVfQ4rlyLjx4/XrrRcPFJWkDu2lfDX333KhAkjyNtVi8vlwojFKfh0HfFQBFQp/OtMUh4eT15diPy6EK5KiEqI6p1Cvwn5FAYyyUgMkZAeYVBiMS7xMKHPLR2+pmBkHYne0bhc+4xxdtSXMdufS5IvgWXhYvIlwFm9DoxvGopXU+L/CHeqUhb+nHG9Jraan7CncDRYvDoydg2OjD2DLnKtiACX2a59Xqw0gp8A38eyOXlLRJ4BvgY8bf+tUdUT7MT0fwHubG+QDitDEbkQ+ArQX1VvsIN0p6jq/A60FeAFYIuq/rPJ+f6qWmK/vRnY2FF5jnbeeWMJQX+E3fmVjL14FKpgROPs9tdhRKIAJGelMHiESSxcT0KwH3nVIXAL4foETjQuYnesANx9uWTQSjI8xSgwKq1js+aYUUl+1bfpnfZ1ctK+0ni+NhrkxIx+qIKI9b4lcus/IW6G8LiSCMRLKQ+vo2/SGYd8XxwcHI4Ruijcmh34xW+/9dqHApcBe8NRvQI8hqUMb7RfA0wGnhARaS8pfYeUoYjcA/wXeAO4eK+MwG+xAni3xwVYGS82iMha+9zPgLtFZJzdVx7wjY7Ic7SzY1sJmzYUkpTmo7Soht/8+Q5OGN2PUDTG7U+ZiGFayihJGHhiATkpUfLLCykrSSViREnyKIUxJTMhlYgRozYcITVVcYtSEJjD+N5fa3XsiBHC0Dj1gddRDVHZ8ApZKbfgdqWiajAhZzgTcoa3ew2DUy8iwzcMgAZPjMyEEV11exwcHI4VOqYMc0RkZZP3z9mrgfu6sdz7VmHl030S2AXUqmrcrlLIPqPMgUABgKrGRaQOaym1zZlCR2eGPweuUtWVIrLXsWojcHJHGqvqQlq+LcelT2F1lZ/sfmlsK6/kxH69qaps4ITR/Uj0evjvl75I1LBcOJfWzKOkQYhpjJDPg5EdRPMTiPQOsyNYQbInAVOFgoZT6ZeyiCSXj7Oyrm1z7Nllr1MfK+E096eIy4dh1lETeJ/UpKtZWfYDxvf9R4fSKaUnDCE9YQgAlaFKfO5Dy5Xo4OBw7NHBPcNKVW0zL67t3jdORDKx4mWfdMjC7UdHleEAVd2rufdONeNAx9OUOzRyzvmjmFyxk4bcCMkj+3HO+aMAK0Ht8N7ZjfVO7HdbY3SX6cXL+e+azygtTMIVjvO3e65lULKltFZUPk5+yIVpGOAe2eKYANXRUnY0rEKJMir9dHJsX0TBTW7tm9RHd7K77i3G9Pp2N169g4PDcUMXmxGoaq2IzAHOAzJFxGPPDgcBe32/ioDBQKGIeIAMLEOaLhF1l4icv9+584FtHWzv0IStZRWs2FNEdnISy/ML2VZeSSQab7NNH18mGSUDSfIk4K5PwQgIQ1KzyPbFKIusQoC4GqysmtRqH0srPyRsNBA2QuyKDWNIr/8wNOe/pCRdQVHgI+IapdA/jXC8otU+HBwcHDpEF/kZikhve0aIiCQBV2D5qs8BbrOr3Qd8YL+eZr/HLv+8vf1C6Lgy/D3wgYj8AvCKyKNYJq2/7WB7hyYU1tbROyWF2uIAfVJS2JxXwoM/eI2tu0tbbdOXPhQXRQnGDTzqZd7qPQCoCi5XGimeXlYWeWndx7A2WkZcI8Q1SiBeR1wtQ52ywELCZh2KScSsozywuGsv2MHB4fika/IZ9gfmiMh6YAUwU1U/An4MfF9EdmLtCb5g138B6GWf/z7wk44M0tFwbFNFJIAVtDsfy4rnAVWd2XZLh5b4woknsHlbKZMWlnLR+MFUbKthT3E1v3/6U17/6/0ttslKTSbrhCQaGiKcOWggt55xKgAp3mxuHPRr1M42n+ROb3XcE9PHURfbDSjDU0fjdfkASE0YhteVjluSMDRMir0X6ODg4HCwKF3jZ6iq67ECtex/Phc4ID6kqoaB2zs7TrvK0F5zfRwrOoyj/DrJR5OXUVlax/3furLxXF0wzJTP1yEGvPfZGjIDbtQDBXlVrNyyh/FjDlRG66tLqPKFyElOZVWkiJ9lW87wLnHTP+nEDsmysXY21ldU2F6/hAty7sLrSsTQGDlJExrrWQl3HRwcHA6Rnu163Ix2laFtmnoX8K3DIM8xRTQa54XH3iMWinHjPeeRlW0tYU5dvIFwXRSXV4hWR6mJ7bVKUl78YClJqT627CzhtkvHNfa1vaaSLJ+VST5FvOTWV5OT1Hp0mJa4dfAviWoYAI948bqs/vokn0Of5IPPdOHg4OBwAD0s9mh7dNSadBpwK5YDo0MHefP5z4n4w6jCM3/+kK9+/zrq6oJkuH30Sk0GwPSZZPRPJT3Het8vO4Mf/+9D6sqCjD9xCMWRYko9Ae4fexb3jz2rQ+Oq2YDGd+FKGNfsfKo3m737yE4qJgcHh27nKHrMdFQZeoHXReRhLOd4c2+Bqj7UDXIdE3zw7GzMmAEIi99fwe5wmMoKP2+98ghfvOTUxnoikBfcQ6onlTVbq5g+ayOqyn/fX4CcVoK/wuTJCQ+3m5h3L/HAyxihqfhy3kdczf3/dta9Rdio5FTHfcLBwcGhkY6u6MaAN7G8+t3sC4nj7Sa5jgnOv/Mc9KyB6FkDGHnFyeTuKqeuPsi0z9bicknjYajB87mv82b++/xnygLUUFBYvH43/voY1VE/iyu2dGhMNaowQlNQjRAPvt2sLGo0sKt+MgX+WQRjJa304ODg4NA1qKv9o6fQUWvSB7pbkGORsn4JGKf0wi0uNu+oR+NWwM/X31jEjVefgddrxSxYXr2aYDzIjkgeffqeTKovAYDCaAUxUUJGhEn587moT/sBf+LBSaj6QRIxgm/iSb6zcXa4u+EDTDt60fa6NxmX8/1uunIHBwcHjsll0r3pl67D8uzfA0xXVX/brY5vhvfLJiPZMlL5vHwjZqYPF4I/AeqCIXIyUombcT4snkFJA2QmRzjjC0G+PerrmGoyu3Q94To/kuYjyZ3QsUFdWbh9l1mvxQsaxgrAAIX+WVgr3C5Kg4sxzEdw20Y0Dg4ODl3KsWhAIyInAzMBA2vPcBjwbxG5UlWPm0wTHcVUxSXCI9ftC9rzfzddwBtTl1NQXMOj3/gCOampABhqkO06geklhdw0OofhKVayWJe4uKL/OCq9lZ1K9eJN+XKrZRf3/y+GRuz+vY4idHBw6F6OImXY0RXbfwPPAkNU9SJgCFaqjMe7Sa6jlkg8zlffeZ8t5c1DmklMWbhsF7t2V1JTESAcj/G7jz9E1cWOCi8+VyLbK3xc2/8KaiMhgrFol8vmdaeR6Mkh0ZNDghNY28HBoZs55vYMgTOBa/fGd1NVFZE/A9/rLsGOVj7Zup3N5RU8t3QF//rivgwSH8zeQDxuIgKTpq1k4IlJzP/RDMywyTqzhGSPl9y6KhYW72bSjnUMSEnnVxMuP4JX4uDg4HCISLshQXsMHVWGtVhLozuanBsG1HetOEc3kXicF5avIrkW1koxm8rKOblvHwBWbypAVVGFXQWVLJ+xFXfIZNHLS5n404tw2W4TeQ01rK8qYWN1GQ+MGU9SG+M5ODg49FQ6Goi7p9BRZfgK8LE9G8zDUoQ/Al7uFqmOUlYXFVNXHSSaF8IVSeCzbTsaleHjv7itsd5LM+bx5rQVSKoHtjUwJpDC/Vdcgqry8NwpVIWDuER4actKHhl++pG6HAcHB4dD4xhUhn/A8jX8MZY1aQGWIvxb94h1dHL24EGMd/dhoz+PlIYE7j2tZUXWJ+hl6Mh+e8OE0jtoWYqGjTjFwXoQxUTZVuukUnJwcDh66Ul7gu3RUT9DA/iTfTi0ws49lWxaX4hRF6XebGDG4q186doDEzhfd9P5XHdT8/SQRcFSEt2JZKS46as+DDXpl+5Yezo4OBzFHGszQxG5Bdje1I1CRE4FRqrq1G6S7agjMy2JPi4fgQxQVQb3am6xObt4G3n+Kr42urkiNNTgqV2v0c/Xl5FpvembZAX07p2Yethkd3BwcOhSOp6vsEfQ0UnsX4Dq/c5VA3/tWnGObkqLaoiGYqSl+PC6XOzeVtZYFjUNntw6nzd3r6Is1NCs3arqDdRF69nm38nXx4xjRFovzu8znJ+c/oXDfQkODg4OXYdo+0cPoaN7hn1VtbjpCVUtEpH+3SDTUcvoUf345c9ubHzfp8++RLszirZQHwkRKDR5fedyHj3Vcpsw1OCD4hnE1cAwDSbt/oQZ+SF8Lg9XDxpz2K/BwcHBoas4Fq1Ji0XkZFXdtPeEHZWmtHvEOjpJSkrglFMGNTuXl1+BPxDmrcpVBEsNgrkwLXkz3zjpQlK9PmJmnOEpQ+if2BeA1WVhVJWwEeeD/A1cmTn0SFyKg4ODw6FzDCrDV4G3ReQHWL6Go7CWSF/pLsGOFX75gzcI1Ib4w5sP8KNln+JNDNG3Mo1El3XrE90+vj7iLgBC8RjTdv4PEcFU5f289Vw5zlGGDg4ORyfH4szwr1jRnt8FUoAg8BTw526S65hg4aKtlOdWAvDO68uprPMTIYxZpSzdUcCFJw1rVj/J4+Xli+8hahqN7wlEDrfYDg4ODoeO0KP2BNujo64VcSwfwx+LSG9VdRzgOsDT//7ETtsEKz9cj97lJW7EOaVf6gGWpnsZkNL8fKWjDB0cHI5WjjU/w/3IEJELgJWqWtjVAh1L+M0wsQwrZ6EmGARS45AoFGYUIclBIOvICujg4ODQnRwry6Qi8jtgq6q+Yb+/EWup1AMEReQaVV3Q/WL2fEzTxOVq/jMoeH4CtSVeUNAUJe4NggmlDYmUh2sYkjLwCEnr4ODg0N30LNeJ9mhvEnsbsLrJ+z8DTwLpWGmdftk9Yh1dhAMRfnrDX8nfUtTs/G2nnUfKVj+pq2sZO6I3g9PdDM5wc/nAkZyW5bhNODg4HMNIB48eQnvKsB+wDUBEBmFZkf7eznD/Z+C4jSK9vriU/8xbDMDstxeTu6GAyY9Pb1bHH4qQuLsBX2mIsrIKslIMcpKVsngehsaPhNgODg4Ohw11abtHT6G9PUM7lDQAZwG5qlplvw8Ayd0lWE9GVXly3lK2lFVw+bBhTHtmJll90tmwaBv5W4oYOsZa/pz28gK8pvVhx2bW8dC9t5KU4MUtbhLkYLZrHRwcHI4ietDMrz3aeyJvAO4EJgG3AnOblA0CarpHrJ7NqoJicquqSfC4efKVzwj5w7g9boyYwcJpKxuVYe9KgzoBELLqTCaknkh6+nH5+8HBweE4RI4hZfgb4EMR+QeQCpzTpOwmYEVHBhGRwViO+32xZpvPqerjTcofBf4O9FbVyg5Lf4R4a9V6YoaJz+Nma7rBkx8+Sv90K7h2SsY+ZTf+n5fwcd4WBDilz0BHETo4OBw/HEt+hqr6uR127UwsV4o9TYrXAJ93cJw48KiqrhaRNGCViMxU1c22orwS2NN2Fz2Hb118LtXBEAAuEU7o3wev23KhiEXj/PaRV7nv+1exqa4Mt1cwgfxQHaF4zHKkd3BwcDgeOJb8DFU1Dyu7/f7nF3Z0EFUtAUrs1w0isgUYCGwG/gX8CPigo/0daYb1ymJYr5Z9BBfP2sTm1Xm89795vPbnu/nD5pepjtbz19PvxS1H0TfDwcHB4ZA5RmaG3YGIDAPOAJbZfotFqrpO2lhcFpGHgIcABg0aRGVl16yk1tXVHXTbnVtKWLN0F7c/cGHjOSNuMPvDpfQbnUFRUSmzly6lOmZlvlq8ew1j0jsfZ/RQZDxc9DQZo0Y5DZHl9Eq+vvFcT5OxJRwZuwZHxh7EMbRn2KWISCrwHvA9rKXTn2EtkbaJqj4HPAcwfvx4zcnJ6TKZDqYv0zSZ/OJUSotquOzq8Zx4qpWpYu2SnezaXElFLEi6JPDWh/PxX+fHJS6m1i/iguFn4DqI2WFXXm930ZNk3F31bwKxmQxOP4fkhH3+nD1JxtZwZOwaHBl7AAJyFO0ZHrZ1OxHxYinCN1T1fWAkMBxYJyJ5WNapq0Wk3+GS6WBZtXgnVRUNJCR4eOelBagq9dV+Tj17BP0eHYv//j74v96XcXeO5uxeYxmffRKjU4cQV/NIi37ME4rlUhdeiAsfxXXPH2lxHByOYxRxtX+0h4gMFpE5IrJZRDaJyHft89kiMlNEdth/s+zzIiL/EZGdIrJeRM7siLSHZWYo1hroC8AWVf0ngKpuAPo0qZMHjD8arElnTluDETfxJrjZta2EDSty+cUNf+Wuv93BwuQifDk+KuMxeiUN565Rx21cgiNCecObmGYQlyTTEFlCKLaLJO/IIy2Wg8PxSdcsk7ZogAncD8xW1T+LyE+An2AllLgGK0DMKCwPiKdp7gnRIu3FJv1ZC6djQC4wQ1UbOngxFwD3AhtEZK197meqOr31Jj2Xb//8BvwNYQBEhKd/8iYRf4ipf/qY61+9trFeti/pSIl43JKTejNpiRMAEOT/t3fn8VGV1+PHP2dmMtlYAoZNQBFEZF+MS8XWrYqgrXWlLqio32p/rdS6t377ba1FS2u11FIr7lZqEeouiiKgIIIQiOxbIEjClgBZyDrL+f0xQ4iQZAKZzJI5777m1bnL3Hvu4M2Z57nPQpKzc4hPGGNaSjj6GTbSAPNy4Lzgbq8Q6Af/YHD9q6qqwGIRyRCRbsHjNChUyfCietYlAb0Bv4h8X1XXN+FiFhLiN4Kq9gp1nFjRtn0abYP9CXfn7+Wr95eh6qO0YC9n5bXhomvOinKEiSvdPYB094Boh2GMaXo/w0wRWVZneWqwnciRh6zTABPoUifB7SLQjx0CiXJ7nY/lB9cdezJU1fMbCEiA3wOTCGThhDX/nWz8Hi9aVYPPrcx7c6klQ2NMwhPA0bSxR4tUNSvk8eo0wFTV0ro9EFRVpZmtdY6pAU2w+PlH4IzmnLw1uOqOC2nbOxXcLhzthF/8+bpoh2SMMTFBREO+mnacIxpgAuwWkW7B7d2APcH1BUDPOh/vEVzXqOa0Jq0mUGWa0BbMWULJlhIU8JX6ePW5/0Y7JGOMiQ1hmMKpvgaYQe8CNwff38yhgVveBW4Ktio9CygJ9bwQmteadBSQ24zPx7U56zby1lerGHVSF+jaCRVw+JWi9DjqZWqMMS0lfP0M622ASaB28g0RuQ3YBlwb3DYLGANsBiqA8U05ybG0Jk0CTgauBBKyTtCvyuP//BDPlkp63phKZf/2UAW4FF+aDcZtjDGC4ghDMgzRAPPCevZX4GdHe55jaU3qITBW6aijGZ+0NXl7+So8WyoRv/JpzmbKLm6P+h34/Urv8ztGOzxjjIkJ8TQCzTG1Jk10U1/7HPEq6oDq1eUkd2xLWptkqr1eyousZGiMMdCK5jMUkYGquqaR7WPiteN8c7TvmkpxdWC8AZfLySOXfJ++3ToB0LlNejRDM8aYmCACTkf8DEEZqpr0S6DdwQUR2aeqdesB/1N3e6K49voa1pZuJsmRBAjnnNyFtkn1T+lkjDGJqtVUk3LkQ8tQywlhWIfv0rvNQCAw5JfbkRLliIwxJva0mmpSjpyZMdRyQujTZlC0QzDGmBgXntakkWJTrx+jNft28Y81X0Q7DGOMiUkCOMUf8hUrQpUM3Yf1NUw5bDkhR6BRVSavXsDqfbu4pOep9G53XLRDMsaY2CKtq5p0Md/ua7jksOXFYY8oDqzYW8CmkiKSHE6eW7eEx88cE/pDxhiTQATiqpo0VD/D8yIUR1x5bdNyqn1eUpwuFu7eSkF5Cd3T29e7r9fnx+U8sjb6iTUfsHJ/Pq+ec0dLh2uMMVHRmlqTNkhE3ECxqiZcL/OfDvgO1/QeAoBTHHRKaVPvflUeL3e++Bb3jPkuQ3p2rV1f6qnk9S3ZeP3Ksr1byDqud0TiNsaYyNG46mfYnAY0AiRkn4K+7TvxnS69+E6XXpzR+QTcTme9+81euZHcPXt5ft5XBIbLC5i8bjZevw/Fx59WJdyYBcaYBCASqCYN9YoVzW1NGjtXEmOqPV5eXbicjumpbNxVxKrtu2q3fZS/BkURlI2lRRSU74tipM1TN8kbY0xdDjTkK1Y0Zwon04hV23dRVlWNQwSv38+nazYz5IRuADw62EVe2RLAQVt3F7ql1v+8MdapNx8t/S3o76IdijEmBrWaZ4Yicn0jmxOyW0VTnXZSd966e1ztcrLr0FfdLaWY9q7Ao1an+PFpOY5mjGrn8x9gb9lUOrWbgEjkft9oxavgWY16vgB+HLHzGmNin6C44uiZYai/nBNDbP8mXIG0NiJCerK73m39Oz0OBKoYRQRVZV/lMjqknIYcQ8eckvKZ7D/wMilJA2mXNqpZcTeVevOhei44MqHqE9R/GeKovyGRMSbxxFvXikafGarqSaFekQq0tZm/awrTt90FQEn1SlYV/Yr9VdlHfRyf/wD7D7yCQ9qyt2wKqt5wh1ovrZoN6gG8QBXULIrIeY0xcSLOGtDYM8MW9t9vFnFh16FkuA9N7VTlLWVV8Sz86uObA8spPPACO6s8pBY/y+lHWTosrXgXn78YcaTj8W7nQNXntE29oCUu5Vsk/QZIHR14r2WQ3KfFz2mMiS+x1EAmlJDJUEQuBAYDi4ClwEvAFUAOcIOq5rdkgPFg2/a9LMvJ46ofnPat9etL83ll61xKPRWM7/P92vUL9zyHX30AzNv1FO0kn/0+BwUVmzi5KpuOqVlNPnd68ndwdjhUm53iHtDMq2kakRRwHh9YcBRF9FmlMSb2xdszw0arSUXkF8A7wPXAJ8AU4CTg1wS6VTzR0gHGg39N/5LXZywhf8f+b61/Le8zkhwuZu9awf6aA7XrN5UtQIP/K/YUss+XSpIjmWJ/OjW+8qM6tzvpJNqljal9JTm7hv6QMca0sIPPDOOlmjRUP8OfAReq6hnAGOAO4DpVnUKg+eC5LRxfzNu8ZQ8r1+XjER8z3l4KwNLN21m7fzsr929FgAOeSt7L/6r2M9ee+BRDO/yInulZnN3pRkTSSXZmoqRR4vNE6UqMMSa84ikZhqrb6qqqSwBU9QsRqVLVHcHlXSKS3vjHW7+33l/O/vIKasTLwiWb+d75/fnNjI8Zf/EIftp3dO1+vdI7175vk9SZvPIVeLWa0zteRWbXQ8OxZabYszdjTPwTWtkzw8NUt0gUcSzr3JP4yLkWqXLRp2cX3s9Zj8fn478L1/Da3T/G7TryK15TMgev1iAIq0vnMub4+6IQuTHGtCBRXA5ftKNoMpvPsJm+YAtlXh+6w8HinttwrW9DapKLvWUVfJyzkcuyjmzQsrlsEYofEHZVrqfKV0aKs23kgzfGmBYSb/0MbT7DZjojoxezt2+kqsbLkOqedOjfDpcEHsWmJNX/W+HKno/iD/YHFHGQ5EjI8c6NMa1cq6kmDdd8hiLSE3gV6EKgFepUVZ0sIo8ClwN+YA9wy8FnkrFuU1k+XVM6sn1dCdU1PsQJuev28tZvf0RKcuMFZpfDDdQ/Oo0xxrQGQmw1kAmlubNWNJUXuFdVBwBnAT8TkQHAn1V1iKoOA94H/i9C8TRLpbeaSete541v5vHVzq040nwkiZ+ypEp2lpREOzxjjIk6EUhy+EK+YkVEkqGq7lTV5cH3ZcA6oLuqltbZLZ04mRJqzu5sKrxVfFb4NYPPzqR/Wzed1x7g7DHHU+6p5t3PV0c7RGOMiTqbwqkRItILGE7g+SMiMhG4CSgBzo90PEer0lfNW/kLEIFKXw2umgr0s0oo9TNq58l8tHEtc5dmM6SPh17dh0c7XGOMiRLFIfEzAk1Ek6GItAH+C9x9sFSoqg8DD4vIr4CfA7+t53M/AX4C0KNHD4qKisIST8kxVGmWeSs4PakP3uBwage+KMXVVmnXNoVZby/kQK9kenRw8dYncxl3Wc+oxBhpFmN4WIzhYTHGhtbWmjRsRCSJQCKcpqpv1rPLNGAW9SRDVZ0KTAXIysrSzMzMsMV1tMfKBH7a9YTa5cc/eJ1v8ovRkhoKu1TjETfJ7SB/OVx9ST49uw6LeIzRYDGGh8UYHhZj9AlKksTOM8FQIpIMJTANwwvAOlV9ss76vqq6Kbh4ObA+EvGE094Lkqj+wIu7pJySKzO56cz1dEjyAT7cpALDohyhMcZERzhKhiLyInAZsEdVBwXXdQSmA72APOBaVd0fzDWTCQwfWkGgh8LyppwnUiXDkcA4YJWI5ATX/Rq4TUT6EehasQ24M0LxhE3Gajcp+eWIw0G7L6u4+qHfk5Ee6DfokIzoBmeMMVEiAs7wVJO+DPydQPe8gx4CPlXVP4rIQ8HlB4HRQN/g60zgmeD/hxSRZKiqCwlUIR9uViTO3xxVVR5SUhruN1g4P5ckvyIuJ7JlH/t3J3Nc324RjNAYY2KTg+Y3oFHVz4MNL+u6HDgv+P4VYD6BZHg58KqqKrBYRDJEpJuq7gx1HpuErhFr1xTw9N9m88STN5Censy05+Zz1vdOpU+/Q9MkXT/+AnadPxgAh9NBl8z20QrXGGNiRgs/M+xSJ8HtIjCgC0B3YHud/fKD6ywZHitV5fV/L2LP7lJmf7iSEcNP5IMZy9iyYRf/++extbPRnzlqaJQjjS0e3y6c0g6HIy3aoRhjougoWpNmisiyOstTg40mm0RVVaT59bGWDBuwdm0BeVuL6NAhnffeW87G5dsoK69izdfb2bCmgFMH9Yh2iDFH1c/2vRNIc59G14wHox2OMSaqFGfT+hkWqWrWUR5898HqTxHpRmA4T4ACoG6fth7BdSFFaji2uLNieR4+n5+Kihqqy6v56ovN1NR4KSmp4L03lkY7vJh0oOpzarz5lFS+h8e3K9rhGGOi6OB8hi00As27wM3B9zcD79RZf5MEnAWUNOV5IVjJsEE3jjuHG8edAwQa0dx+x/OU5nnxux30GWalwsOp+iksmwKA31/D3gOv0LW9lQ6NSVQiSpLDG4bjyOsEGstkikg+gb7ofwTeEJHbCPREuDa4+ywC3So2E+haMb6p57Fk2ASFRWXs2V+Kup2owufZG7jiiiy8Pj9JLme0w4sJPn8JIqm4XYGWtF7f3ihHZIyJNmcYxh5V1esa2HRhPfsq8LNjOY8lwyYo1zJwenG1A78Hypz7+DBnAx/lbGDyLT+sbUyTyFzODpzU6dXQOxpjEkKgAY2NTdqqdOnYkR//5DQqvOW4Hckcn5nJy/OXsb+8imW5+Zx+cvPHIDXGmNZFw1IyjBRrQNMEHdLbMm7UBazOX0anntXgak9pSSVup4Pn5y0lUDI3xhhzkABJ4g35ihVWMmxEZY2HxbnfcH7/Pny89hNy/1TCS1lfcNKlTg5sKMXZ0U2BQ9ixv5TuHa2zvTHGHBRvM91bMmzE+1+v55m5i+makcbzf5qH44CXqkUVuM/fg6jiKvXxyk+u4bgObSIem9+zFvXX4EweFvFzG2NMUzjDMBxbpFg1aQMqazxMW5QDwNMffE753HIApMbP8hdzERE8NV5efHdxxGNT9eMtfRRf2aOoeiJ+fmOMCUUAp/hDvmKFlQwPs3NbEW9NnUvny/uzt7yC1CQXq9fupK048DkURJDyakhxoKos3rQt4jFqzULUtwtQ/FVzcaaOingMxhjTGBGbzzDuqGpt94g3n53Lwg9WcMmATlxx2gAOeCpIPsnJtm3K1pw8ktqmcvENZzDyutMB6Jga2TE4VRXvgWdAywHBVz4VR8pFiBx9Id/v3YKn/GXc7X53TJ83xpjGhGPWikixZAj8+asFnNi+A99N7kr2/HWkt0tl3dur+NE/LuDFvM/41Ym38+C/V+HZvQ8vGaS3z+DMHtHpTiEiOFJ+EEyGgKQe87FqDvwTX/WnuFJG4UweGaYIjTEm0IAmTPMZRkTCJ8PtZSW8v2UDKU4XpZs7UFPjJTUtmYK8Ql795D0qTqri411f4ty/H3UC+0pJdkb3H9iVfn2zj+H3bMJfsxiRttQc+Ccp7u9Y6dAYEzZCfDWgSfhk+PKq5YBQ5fXiH9mJh0fdCsDakq2845xHuiuFz7Ysp3DlNvwOcHi9LJy1hFtuO2IkoLjiqZwBWg2Sjno34fesxOkeFu2wjDGtRGA+w9jpRxhKQifDKp+Xed9sxbXsAL52Dj5O28Yd5waqC7ft3EHPwsAkvmltPRSNcXCgqhpB6HrGidEMOyyS0sfjShlTu+xIOjWK0RhjWiOrJo0TKU4Xz5x9KY/OmE5yShITf3EppSUVfDF3HaOvGMkl3QKJcWd5GVftfY2y/Epc7Z34+2fi8/txOuK3WtHh7AbObtEOwxjTSgWmcIqfatL4/WseJotmrcUpgq/Gx8qFeXwwcxmv/nMem9cHpsCqqqihnTuZS7ufQlKBkLJHuPyk/twzYxYfr9kU5eiNMSY2CYpbvCFfsSKhk2F1tZcF89YhCH6f8t6by5jzQQ5JSU5m/usLcpZt4ac3TEErfcxfthmHAzz7fHy2NpeVBbuYunApNd746UdjjDGR1IKT+4ZdQleTJie7+N2ka/EGE9qnH3zNx++uQBxCztItbNhcwI6Cffz12VkU7a5AAPXBjHmr6Ni5LaWVVXy6PpfRg06J7oUYY0yMEdGYGmEmlIQuGQKc0CuTwowiPJ2rGXl+fwad04eyJDjze/3Ymb8Pf6qTpR+t45eXjOTey8/l9kvOJC0tBb9f8fr9zF67MdqXYIwxMckZnMapsVesSOiSIUClt5rntrxPB3cbftfvNpbl7aQ8BRat2oJPFfFCpc9DZ5K55LtDAbj5vKzaaZuSbaZ7Y4w5gnWtiDNzdi/D4/ewp6qYKW/Noay0EnEKRV4vWfecSWpGCiJCn2E9aj/TJtkdxYiNMSb2BTrdx07JL5SEToZev493CxZS7ffiVz+fFm6gpqcTlzioQRl96ilcMKhvtMM0xpi4ZPMZxgkR4YZeF+PxB4rye7rUsKewio/nZfM/14xhSI/joxyhMcbEJ0FtOLZ4ULhjPx6vl/NOGP6t9T94aiLeT/exKyuPzL6DohSdMcbENwGSrDVpbPP7/fzl3n8zZ+ZX31q/ZNsaKj7fj3h8zHx6CV5//Dz8NcaYWHLwmaG1Jo1hKxZsZHfBfnxSw+78faR1TMPtcvGnJz7AWeVDXQ6cG8p5be58BnXuS9aQ+B+L1BhjIi2Wkl0oCZcM/X4/05+Zg/r9eD1e3n35cz7btJOuPTLo5uhARYf9AIhD2L7iAO/lzuap31zNCd07RjlyY4yJHwI4JNpRNF1EkqGI9AReBboACkxV1cki8mfgB0ANkAuMV9Xiloyl8kA17TLSUYWMrinkbSuiaMteivL28ey0OzixRyYA1TVe7vzVv/H5/PznvWU8cOfFLRmWMca0KoKSFEclw0g9M/QC96rqAOAs4GciMgD4BBikqkOAjcCvWjqQ9Hap/PoftyBdM+g2oAd5VTUooH5l8t8/rt1v3qIN7C0ux+128mX2Fr4p2NfSoRljTOsh4GzCK1ZEpGSoqjuBncH3ZSKyDuiuqh/X2W0xcHUk4lm+dAu7dhRT46mgeHsxDgn8i2xdWVC7T3qam/PO6ktRaTkZbVLxa/z8wjHGmGgTBCcxlO1CiPgzQxHpBQwHlhy26VZgekuf3+9Xpr+2iL2VFaTUwKjLhnHjLd8DIDnl0Nfx3TP6MmhAD25+8j9cPnggvXoc19KhGWNMqxJP3RUimgxFpA3wX+BuVS2ts/5hAlWp0xr43E+AnwD06NGDoqKiY46hqrKGtp1ctPMnkdbBhSPJi58qACqrYGdRIZ+v3sIPzxrIB0vXcVyKgyWrN3JB/+60TU0+5vMeq5KSkoif82hZjOFhMYaHxRgbAv0M4ycdRiwZikgSgUQ4TVXfrLP+FuAy4ELV+usiVXUqMBUgKytLMzMzj/r8qsryvQWM6NGdHd8RVuZX08vlZNCwTOoeb9qCBby3eD0nn9iTGUs2oapUeauYu66A2y4+46jPGw7Hcr2RZjGGh8UYHhZj9AX6GcZPNWlE0raICPACsE5Vn6yz/hLgAeCHqlrRkjFkF+Xzyy/fZuHOrSzasR0IJMhp65bV7rNrXxmf5mwmNTmJaXNXMHJAL87u34sLBp9MxzZpLRmeMca0Og4k5CtWRKpkOBIYB6wSkZzgul8DfwOSgU8C+ZLFqnpnuE+uqkxeM5/C6hJe2LSI609Lp9JbTZsqN+2Oa1O739uLVlNZ7SEtOYm83fu46/KRDOrVNdzhGGNMqycITglPsgsWnCYDTuB5Vf1jWA5cR6Raky6Een8CzIrE+bOL8lm5rwCH+MjZW8AvB93I0OO6U1RURGZmJt/sLybd7WZUVj8Gntil9nMnds6IRHjGGNPqCOCi+fO9iogTmAJcBOQDS0XkXVVd2+yD15EQI9Bsrygkze3BKU58WkNBxV66VKRS46vGf5zyv7Pm0COjHY9dejEndbWRZowxpvkEZ3ga0JwBbFbVLQAi8h/gcsCS4dFql+JhUBc/ig8HQpqrmifvnU6/rOPp96NhFJSUsqO0lM2Fezm5k3WhMLGnpqaG3NxcKipa9NF6k2zbti3aIYRkMdYvLS2NPn364Ha3/ATlAjjC0yylO7C9znI+cGY4DlxXQiTDC7ucyeltBvPgX/7FE/fdzJqvtlJQsA+vo4ZPk0rxig+fX3nxq2weu9SGXTOxJzc3l4yMDPr164fDET/N1U3s8Pv97N69m9zcXPr379/i58teWT3b2W1TU5rMpojIsjrLU4M9CCIqYe6qP017k3Vf7ucv/3mb6f+cx94D5ZSUV+JcUczQ47sxosfxtInAryVjjkVFRQVdunSJSiJ0Op0MGzaMgQMHMnToUP7yl7/g9wfmqVu2bBkTJkyIeEzNkZeXx6BBR85VmpeXR2pqKsOHD6d///6cccYZvPzyy7XbX375ZRwOBytXrqxdN2jQIPLy8gDo1asXV111Ve22mTNncssttxxxnvnz5yMiPP/887XrcnJyEBGeeOKJZl9HQ/s4HA66dOkSsdoFVb1EVbOa8Bp02PLhibAA6FlnuUdwXVglRMmwtLyCL2bn43fD/Pe3cmq79lR0cFHVzsGg4zvzqysvi3aIxoQUrRJhamoqOTk5AOzZs4frr7+e0tJSHnnkEbKyssjKyopKXIfzer24XM37k9anTx9WrFgBwJYtW7jyyitRVcaPHw8EBv2YOHEi06fXP1hWdnY2a9euZcCAAY2eZ9CgQbzxxhvcfvvtALz++usMHTq0WbE3RZzWKiwF+orISQSS4I+B68N9krj8Zo7WUzPexl8FIuCrFlZ0qWb3qI7szWrL1mGRH1XGmHjVuXNnpk6dyt///ndUlfnz53PZZYEfk5999hnDhg1j2LBhDB8+nLKyMgAmTZrE4MGDGTp0KA899BAQKAmdddZZDBkyhCuuuIL9+/ezfv16zjjj0MAWeXl5DB48GAgkmXPPPZfTTjuNUaNGsXPnTgDOO+887r77brKyspg8eXKD+2VnZzN06FCGDh3KlClTmnStvXv35sknn+Rvf/tb7brLLruMNWvWsGHDhno/c++99zJx4sSQxz7xxBOpqqpi9+7dqCofffQRo0ePrt1e3/fT2HX4fD7uv/9+Tj/9dIYMGcKzzz7bpGuMB6rqBX4OzAbWAW+o6ppwnychkuGAIT3peVk7ul/cluPHtKWqswN3qgNnsrCrpiza4RkTVv6KN/FXvBl6x2PUu3dvfD4fe/bs+db6J554gilTppCTk8OCBQtITU3lww8/5J133mHJkiV8/fXXPPDAAwDcdNNNTJo0iZUrVzJ48GAeeeQRTj31VGpqati6dSsA06dPZ+zYsXg8Hu666y5mzpxJdnY2t956Kw8//HDteWtqamqraxvab/z48Tz99NN8/fXXR3WtI0aMYP369bXLDoeDBx54gMcee6ze/a+99lqWL1/O5s2bQx776quvZsaMGSxatIgRI0aQnHzoh3l9309j1/HCCy/Qvn17li5dytKlS3nuuedqv8fWQFVnqeopqtpHVUP/2jgGCVFNes2I73J670Hc8eG7TLnkMt7evJZKr4f0Gi/tOnSIdnjGhI36D0D5c4CgKRcjjjYhPxMuI0eO5J577uGGG27gyiuvpEePHsyZM4fx48eTlhYYwaljx46UlJRQXFzMueeeC8DNN9/MNddcAwSSyfTp03nooYeYPn0606dPZ8OGDaxevZqLLroICJSCunXrVnvesWPHAjS4X3FxMcXFxXzve4EB+ceNG8eHH37YpGuqb4TI66+/nokTJ9abbJxOJ/fffz+PP/74t0p69bn22msZO3Ys69ev57rrrmPRokUADX4/jV3Hxx9/zMqVK5k5c2btMTZt2sQpp5zSpOs0CZIMAV74Ops9FQd4fc0qfnPOeQC1ne6NaS208k3AE3z/FpI+Luzn2LJlC06nk86dO7Nu3bra9Q899BCXXnops2bNYuTIkcyePfuojz127FiuueYarrzySkSEvn37smrVKgYOHMiXX35Z72fS09OBQOKqb7/i4uKjjuOgFStWHNHy0uVyce+99zJp0qR6PzNu3Dgef/zxkI1bunbtSlJSEp988gmTJ0+uTYbHQlV5+umnGTVq1LfWH2zcY0JLiGrSvOL9fPZNHl3S2jBn62a2l7b+EeNN4lH1Q+UMCExXDZVvBNaFUWFhIXfeeSc///nPkcOG2srNzWXw4ME8+OCDnH766axfv56LLrqIl156qbYF4759+2jfvj0dOnRgwYIFAPzrX/+qLQX16dMHp9PJo48+Wlvi69evH4WFhbVJzuPxsGbNkY+MGtovIyODjIwMFi5cCMC0afVOjnOEvLw87rvvPu66664jtt1yyy3MmTOHwsLCI7YlJSXxy1/+kqeeeirkOX7/+98zadIknM5DI7U09P00dh2jRo3imWeeweMJ/BDauHEj5eXlTbpOE5AQJcPsXTsQESq8Hhwi5OzaSc927aMdljFhJkj7J4HK4HIq9Y+CeHQqKysZNmwYHo8Hl8vFuHHjuOeee47Y769//Svz5s3D4XAwcOBARo8eTXJyMjk5OWRlZeF2uxkzZgyPPfYYr7zyCnfeeScVFRX07t2bl156qfY4Y8eO5f7776+thnS73cycOZMJEyZQUlKC1+vl7rvvZuDAgd86f2P7vfTSS9x6662ICBdf3HBf4tzcXIYPH05VVRVt27ZlwoQJ9XaPcLvdTJgwgV/84hf1Hue2227jD3/4Q8jv9uyzz653fUPfT0PXcfvtt5OXl8eIESNQVTp16sTbb78d8vzmEGlg1qSYlZWVpcuWLQu9YxPEQzWpxRge8R5jdnY2p512WoQjMq1Rff8tiUi2qsZGH5koSYhqUmOMMaYxlgyNiRMHR30x5ljZf0MNs2RoTBxIS0tj9+7d9sfMHLODY5Me7OZivi0hGtAYE+/69OlDbm4uO3bsiHYoJo4dnLXCHMmSoTFxwO12R2SmgVDivSFSrIiHGBONVZMaY4xJeJYMjTHGJDxLhsYYYxJe3HW6F5FCYFuYDpcJFIXpWC3FYgwPizE8LMbwiLUYT1TVTtEOIpriLhmGk4gsi/VRFyzG8LAYw8NiDI94iDHRWDWpMcaYhGfJ0BhjTMJL9GQ4NdoBNIHFGB4WY3hYjOERDzEmlIR+ZmiMMcaAlQyNMcaY1p8MReRFEdkjIqsb2C4i8jcR2SwiK0VkRAzGeEMwtlUiskhEhsZajHX2O11EvCJydaRiq3PukDGKyHkikiMia0Tks0jGFzx/qH/r9iLynoh8HYxxfBRi7Cki80RkbTCGI2awjeZ908T4onrPNCXGOvtG7Z4xdahqq34B3wNGAKsb2D4G+JDAlOBnAUtiMMazgQ7B96NjMcbgPk5gLjALuDrWYgQygLXACcHlzjEY46+BScH3nYB9gDvCMXYDRgTftwU2AgMO2ydq900T44vqPdOUGIPbonrP2OvQq9WXDFX1cwJ/UBpyOfCqBiwGMkSkW2SiCwgVo6ouUtX9wcXFQI+IBPbtGEJ9jwB3Af8F9rR8REdqQozXA2+q6jfB/SMeZxNiVKCtiAjQJrivNxKx1QagulNVlwfflwHrgO6H7Ra1+6Yp8UX7nmnidwhRvmfMIa0+GTZBd2B7neV86v+PNlbcRuAXeUwRke7AFcAz0Y6lEacAHURkvohki8hN0Q6oHn8H+gM7gFXAL1Q1apMYikgvYDiw5LBNMXHfNBJfXVG9ZxqKMU7umYRhUzjFERE5n8CNfU60Y6nHX4EHVdUfKNTEJBdwGnAhkAp8KSKLVXVjdMP6llFADnAB0Af4REQWqGpppAMRkTYESi13R+P8oTQlvmjfMyFi/Cuxf88kDEuGUAD0rLPcI7gupojIEOB5YLSq7o12PPXIAv4TvKkzgTEi4lXVt6Ma1bflA3tVtRwoF5HPgaEEnufEivHAH1VVgc0ishU4FfgqkkGISBKBP+LTVPXNenaJ6n3ThPiifs80IcZ4uGcShlWTwrvATcHWcWcBJaq6M9pB1SUiJwBvAuNirBRTS1VPUtVeqtoLmAn8vxi8qd8BzhERl4ikAWcSeJYTS74hUHJFRLoA/YAtkQwg+LzyBWCdqj7ZwG5Ru2+aEl+075mmxBgn90zCaPUlQxF5HTgPyBSRfOC3QBKAqv6TQCuuMcBmoILAL/NYi/H/gOOAfwR/RXo1woP8NiHGqAsVo6quE5GPgJWAH3heVRvtKhLpGIFHgZdFZBWBlpoPqmqkZzcYCYwDVolITnDdr4ET6sQZzfumKfFF+55pSowmhtgINMYYYxKeVZMaY4xJeJYMjTHGJDxLhsYYYxKeJUNjjDEJz5KhMcbEqaYOoB/c90QR+TQ4gPl8EYn4sI6xzJKhMcbEr5eBS5q47xMExpMdAvweeLylgopHlgyNqUfwl7OKyLWHrT8zuD7vsPU3BNf/toHjXSsiC0WkTET2icgKEblPRNwteBmmlatv4HcR6SMiHwXH310gIqcGNw0gMEMGwDwCg62bIEuGxjRsHfA/h637H+ofteYOAn+UbhMRZ90NwQQ5lcCIJCeoakfgRgJDwUV0hhSTEKYCd6nqacB9wD+C678Grgy+v4LA7CjHRSG+mGTJ0JiGvQkMF5HeACLSFrgKeKnuTiLSH/gucDOB5Da6zrZewG+ACar60sFphVR1jaqOU9VtwSHNJorIjmDJMU9E7orEBZrWJTgw+NnAjODIN89y6AfXfcC5IrICOJfAWLK+aMQZi1r9cGzGNEMVMI3ArAcPA9cBnwGHj8H5E2Clqr4vIrMIlBLfD267mMCwav9p5DwXEUikZ6rqdhHpTGxPI2ZilwMoVtVhh29Q1R0ES4bBpHmVqhZHNLoYZiVDYxr3HDBeRFwEkt5zdTeKSApwE4dKiy8Ao+u01OsEFKlqTSPnqAFSgIEikqKqe1R1RTgvwiSG4DRRW0XkGggMGC4iQ4PvM0Xk4N/8XwEvRinMmGTJ0JhGBAfy3kagqrMz8NFhu1xDYEb614LLs4BC4PbgciGBQbkbbCijqvMJDOL8v8AeEflYRCI6ELuJT8GB378E+olIvojcBtxA4Nn118AaDjWUOQ/YICIbgS7AxCiEHLNsoG5j6iEi84E5qvoHERlPoMT3e1X9nYjcCPxBVXuJyALgO0DdmSUyCCTBXgTm/NsMjFfVfzXhvGnA74Afq+oJ4bsiY0xj7JmhMaG9DmwHsuuuFJEBBGZQ/yGwtM6mzsF9x6jqeyLyKDA5WEX1jqoWB5u7P0gg8XUBkglM4FsNlGENG4yJKEuGxoSgqlXAnHo23QEsV9X3Dlu/S0RmBLe/p6qPiMh6YAIwRURqCFS9vkagMU4fAh2i+xJIgquAsS1yMcaYelk1qTHGmIRnDWiMMcYkPEuGxhhjEp4lQ2OMMQnPkqExxpiEZ8nQGGNMwrNkaIwxJuFZMjTGGJPwLBkaY4xJeJYMjTHGJLz/DyjxtfQtRy+JAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.cm import ScalarMappable\n", + "\n", + "fig, ax = plt.subplots(figsize=(7,5))\n", + "\n", + "number_of_evals = 500\n", + "df_dynas = pd.read_csv(config.dynas.results_csv_path)[:number_of_evals]\n", + "df_dynas.columns = ['config', 'date', 'lat', 'macs', 'top1']\n", + "\n", + "cm = plt.cm.get_cmap('viridis_r')\n", + "count = [x for x in range(len(df_dynas))]\n", + "\n", + "ax.scatter(df_dynas['macs'].values, df_dynas['top1'].values, marker='^', alpha=0.8, c=count, \n", + " cmap=cm, label='Discovered DNN Model', s=10)\n", + "ax.set_title(f'Intel® Neural Compressor\\nDynamic NAS (DyNAS)\\nSupernet:{config.dynas.supernet}')\n", + "ax.set_xlabel('MACs', fontsize=13)\n", + "ax.set_ylabel('BLEU Score (%)', fontsize=13)\n", + "ax.legend(fancybox=True, fontsize=10, framealpha=1, borderpad=0.2, loc='lower right')\n", + "ax.grid(True, alpha=0.3)\n", + "\n", + "# Eval Count bar\n", + "norm = plt.Normalize(0, len(df_dynas))\n", + "sm = ScalarMappable(norm=norm, cmap=cm)\n", + "cbar = fig.colorbar(sm, ax=ax, shrink=0.85)\n", + "cbar.ax.set_title(\" Evaluation\\n Count\", fontsize=8)\n", + "\n", + "fig.tight_layout(pad=2)\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", + "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", + "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", + "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", + "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/notebook/dynas/results_transformerlt_macs.csv b/examples/notebook/dynas/results_transformerlt_macs.csv new file mode 100644 index 00000000000..326d9894762 --- /dev/null +++ b/examples/notebook/dynas/results_transformerlt_macs.csv @@ -0,0 +1,501 @@ +Sub-network,Date,Latency (ms),MACs,BLEU +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, -1, -1]}",2022-11-29 22:54:58.796773,0,1397702484,23.35221720436182 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, 2, 2, -1]}",2022-11-29 22:55:36.708362,0,2117790828,25.699488742308187 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-29 22:56:06.143948,0,1700582490,25.0628359775166 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 22:56:40.372306,0,1593972576,25.51774692114225 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-29 22:57:09.483908,0,1234590804,22.56186718543443 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, -1, 1]}",2022-11-29 22:57:47.479253,0,2320469868,26.46877217919795 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, -1]}",2022-11-29 22:58:16.629295,0,1269811290,24.64774544301779 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 2]}",2022-11-29 22:58:57.917629,0,2481530994,26.07415311884126 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 1024, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, 1, 1, -1]}",2022-11-29 22:59:26.905633,0,1319024724,22.493311676649537 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 2, 2, 1]}",2022-11-29 23:00:07.328829,0,1880709234,26.00344571579533 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, -1, 1]}",2022-11-29 23:00:38.792088,0,1671939936,25.692425623480723 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 23:01:14.719074,0,1804297062,26.07342689295033 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, 1, 2, -1]}",2022-11-29 23:01:56.128203,0,2350798194,26.332192395799687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 1]}",2022-11-29 23:02:26.540354,0,1397483610,25.69929087830039 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 2]}",2022-11-29 23:03:07.262003,0,2119699314,26.35980541802738 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, -1, -1]}",2022-11-29 23:03:35.089457,0,1110604884,22.97494000005183 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, -1, 1, 2]}",2022-11-29 23:04:06.642167,0,1801651290,25.757473996484833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, -1, -1, -1]}",2022-11-29 23:04:41.625679,0,1888961382,25.85426108217189 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 1]}",2022-11-29 23:05:11.833000,0,1490960730,25.63143521434478 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, 2]}",2022-11-29 23:05:41.444763,0,1364183130,25.072061221515387 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, 1, 1, -1]}",2022-11-29 23:06:20.219204,0,2281236594,26.08920225424034 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 2, 1, -1, -1]}",2022-11-29 23:06:54.484344,0,1688332896,25.54971935098368 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 1024, 3072, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, 1, 1, 2]}",2022-11-29 23:07:31.477666,0,1540039776,25.66937359699742 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 2, 1, 1]}",2022-11-29 23:08:00.927883,0,1543246170,25.23650526106691 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, 2, -1, 2]}",2022-11-29 23:08:37.748235,0,1840608102,25.84950449942653 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:09:14.377541,0,1662908256,25.748175360241753 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 1, -1]}",2022-11-29 23:09:53.068127,0,2074525548,26.02259252150837 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, -1, 2, 2]}",2022-11-29 23:10:31.871226,0,2061818988,26.071718195164653 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, 1, 2]}",2022-11-29 23:11:01.734997,0,1412290650,25.399141175298542 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, -1, -1, 1]}",2022-11-29 23:11:40.315729,0,1971394674,26.21330617046487 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, 1, -1]}",2022-11-29 23:12:16.427543,0,1830900582,26.184771020867597 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 2, 1]}",2022-11-29 23:12:57.379424,0,2421538668,26.175261088262666 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, -1]}",2022-11-29 23:13:25.381877,0,1251290964,23.320679652947288 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-29 23:13:58.935098,0,1588224102,25.972021275557776 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 2]}",2022-11-29 23:14:35.478235,0,1960738668,26.14494989795422 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 2]}",2022-11-29 23:15:03.906597,0,1419452244,22.7811520313731 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 2, 2]}",2022-11-29 23:15:33.346144,0,1316075610,24.898481627702125 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 1, 2]}",2022-11-29 23:16:12.674460,0,1893596268,26.420978678385804 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 2, -1]}",2022-11-29 23:16:45.186860,0,1642325856,25.982442735663543 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 2, 2, 2]}",2022-11-29 23:17:21.824602,0,1856336742,25.94442144683277 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-29 23:17:58.260936,0,2023653228,26.07457768169323 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, -1, 2, 2]}",2022-11-29 23:18:27.089691,0,1582529364,23.950252879196924 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 2, -1]}",2022-11-29 23:19:02.726800,0,2035372902,26.447028779186226 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, 2, 2, 2]}",2022-11-29 23:19:39.439886,0,1945854822,26.003986822056245 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 1, 1]}",2022-11-29 23:20:17.584270,0,2299887468,26.442901941442834 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 2]}",2022-11-29 23:20:58.503235,0,2382305394,26.517600251211515 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 1, -1]}",2022-11-29 23:21:34.739346,0,1693274982,25.604765879724265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, 2, 2, 1]}",2022-11-29 23:22:15.291570,0,2189491308,25.71548559680124 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 1, -1, 2]}",2022-11-29 23:22:53.173666,0,2225126508,26.022116504070834 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 1, 1, 1]}",2022-11-29 23:23:30.358756,0,1890804582,25.70580338518658 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:24:17.798475,0,1156869204,23.410008497520735 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:24:59.431502,0,2226789234,26.12743322887944 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:25:28.533368,0,1253161050,24.92578691671575 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:25:56.800152,0,1173519444,23.28780146013261 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:26:33.876814,0,1510425696,25.417654573154596 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:27:12.966148,0,1784417388,25.760464304216683 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:27:50.697432,0,1478968416,25.19809949508387 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:28:21.688106,0,1334569050,24.839033226584537 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:28:58.223688,0,1765847142,25.798742018362613 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:29:28.855348,0,1334569050,24.797771375743167 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:10.461923,0,1943623794,25.731563707029 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:51.410770,0,2006538354,25.93412075285396 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:31:27.644610,0,1640018022,25.48925301539262 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:31:58.151027,0,1303111770,24.636635975207156 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:32:34.035788,0,1623367782,25.55649399276896 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:33:09.644672,0,1591910502,25.363108576481086 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:33:50.627277,0,1943623794,25.471976693004432 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:34:27.530520,0,1734389862,26.022537181002058 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:08.733562,0,2148146034,25.76800258460572 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:48.076930,0,1784417388,25.746269422993464 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:36:18.846293,0,1397483610,24.957529704052245 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:00.512837,0,2226789234,26.00635517076823 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:41.235777,0,1912166514,25.566969946150067 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:38:18.262099,0,1541882976,25.33456038818163 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:38:57.094675,0,1815874668,25.598195468771692 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:39:33.146325,0,1640018022,25.552118216389637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:40:09.706551,0,1462318176,25.29731961246495 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, -1]}",2022-11-29 23:40:38.756573,0,1253161050,24.86012555534481 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:41:07.530840,0,1253161050,24.9440878831812 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 2, 1, -1]}",2022-11-29 23:41:34.610233,0,1156869204,23.274975491818346 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:42:01.811817,0,1173519444,23.245718341488995 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:42:37.425300,0,1591910502,25.413674094921433 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:43:13.878058,0,1608560742,25.365752658832324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:43:50.046834,0,1608560742,25.36745967365502 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:44:18.851277,0,1253161050,25.005251094503805 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:44:46.110775,0,1156869204,23.277213964898888 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 2, -1]}",2022-11-29 23:45:14.844805,0,1253161050,24.834074772858695 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:45:42.403812,0,1156869204,23.178044546083612 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:46:23.812450,0,2116688754,25.77854866366567 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:47:03.270121,0,1815874668,25.683722427952674 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 1, -1]}",2022-11-29 23:47:43.427381,0,1926973554,25.6886566287921 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:48:14.256367,0,1303111770,24.786229852100394 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:48:50.842520,0,1702932582,25.571343061345555 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:49:31.529598,0,1975081074,25.722542126362086 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:13.472772,0,2163874674,26.02132010597597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:49.447197,0,1671475302,25.60807605923095 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:51:20.051939,0,1303111770,24.844209249533108 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:01.725725,0,2116688754,25.774689748379366 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:43.523756,0,2037995634,25.839133901981196 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:53:22.095038,0,1573340256,25.369471344799734 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-29 23:54:11.017282,0,982932564,21.75634266526977 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:54:50.536084,0,2047077234,26.357977252559444 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-29 23:55:18.749444,0,1079224410,23.671710345039983 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:55:45.598303,0,1062497364,22.630106123134603 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:56:23.927095,0,1799301234,26.237657577301754 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:02.821107,0,1799301234,26.235752390680105 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:41.722481,0,1976298354,26.409237334320427 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:58:20.422891,0,1862215794,26.41037129035317 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-29 23:58:51.406394,0,1334645856,25.47756540184132 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:59:24.731374,0,1479045222,25.751217013602062 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-29 23:59:57.442326,0,1462394982,25.872011121812324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:00:30.249710,0,1399480422,25.17096109375348 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:04.136585,0,1573417062,26.100985377959983 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:43.420676,0,2015619954,26.590522766056612 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:02:22.331936,0,1830758514,26.2075294199728 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:02:51.964958,0,1269811290,25.475704676620662 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:03:29.946063,0,1751193714,26.105643502743355 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:04:07.937498,0,1751193714,25.93245158011976 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:04:37.760555,0,1206896730,24.91733058699306 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:05:12.938706,0,1541959782,25.94168073173522 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:05:50.741638,0,1719736434,26.03682929942318 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:06:25.032545,0,1447587942,25.535328515473058 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:06:58.207200,0,1319838816,25.37435112751941 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:07:37.942726,0,2007755634,26.51729964072713 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:08:09.189292,0,1271731296,25.166134982432663 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:08:43.141792,0,1416130662,25.449204232481016 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:11.020741,0,999582804,21.951713272811833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:40.649180,0,1238354010,24.98037680186457 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:13.227827,0,1303188576,25.448120268873193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:43.351246,0,1175439450,24.62386693490815 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:11:23.467151,0,2007755634,26.232304249661833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:11:51.316006,0,1031040084,22.17006946878601 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:21.453576,0,1238354010,24.98730164778759 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:51.154193,0,1175439450,24.370328478496752 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:21.253989,0,1143982170,24.242154307788027 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:48.722881,0,1062497364,22.741194138868078 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:14:18.241161,0,1127331930,24.234335725130748 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:14:57.044258,0,1830758514,26.44666962861665 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:15:32.368557,0,1541959782,26.050701171511186 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:16:01.654068,0,1221703770,24.996951152986856 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:16:33.903998,0,1288381536,25.159811999467085 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:07.162316,0,1399480422,25.443390153441023 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:35.512007,0,982932564,21.871299717957186 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:09.340609,0,1416130662,25.447142262784027 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:37.410682,0,1045847124,22.47970882205465 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:19:06.749493,0,1095874650,23.814093767809908 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:19:39.893348,0,1319838816,25.478955124339844 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:09.251167,0,1095874650,23.750972433853825 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:38.348608,0,1127331930,23.993329632521927 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:21:06.810591,0,1014389844,22.26199233258481 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 00:22:10.464888,0,2500270194,26.490267758415033 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 2]}",2022-11-30 00:22:38.706298,0,1077304404,23.03639520261316 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:23:08.174166,0,1142138970,24.176833511635046 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:23:37.202278,0,1156869204,23.451597618181914 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:12.037672,0,1894982502,26.570692923549505 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:48.692945,0,1795538028,26.656160779307765 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:18.655715,0,1221703770,24.876611482664103 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:53.952187,0,1571573862,26.243286807849493 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:26:28.198620,0,1397560416,25.575803459047332 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:27:03.351000,0,1619681382,26.29731327668079 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, -1, 1]}",2022-11-30 00:27:32.454206,0,1205053530,24.652752198325597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:28:00.103253,0,1125411924,23.2436921421545 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:28:38.079240,0,2318626668,26.646036771408653 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:29:17.534918,0,2097273714,26.452370488677506 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:29:52.746931,0,1651138662,26.151231823189608 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:30:22.172110,0,1284618330,25.49140283243417 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:30:52.032834,0,1284618330,25.483324803555185 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:31:26.157137,0,1682595942,26.243800774630134 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:32:02.794238,0,1412367456,25.73672237614388 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:32:38.498129,0,1412367456,25.694815754009024 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:33:14.792750,0,1460474976,25.87514706229428 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:33:55.738731,0,2419783794,26.642604563885367 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:34:35.635995,0,2168052594,26.669695273057105 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:35:13.689253,0,2015788908,26.70123813916216 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:35:50.004608,0,1460474976,25.829650262816568 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:19.755425,0,1221703770,24.917846107035597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:48.610538,0,1093954644,23.08549976883442 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:23.140196,0,1571573862,26.495457209152665 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:58.602077,0,1349452896,25.103371841310192 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:38:34.600291,0,1523389536,26.028214026012492 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:39:10.001278,0,1349452896,24.972561656938478 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:39:44.443816,0,1714053222,26.16022517555216 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:40:21.100754,0,1491932256,25.983857779417527 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:01.553620,0,2459105394,26.362258167185217 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:41.438856,0,2136595314,26.728063906094565 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:42:09.810325,0,1093954644,22.9713116425012 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:42:38.643170,0,1156869204,23.4523958088576 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:43:07.305868,0,1093954644,23.166873916592454 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:43:36.751041,0,1316075610,25.51338789773949 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:44:07.237775,0,1253161050,25.004771948649424 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:44:43.589272,0,1747430508,26.62802459531885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 00:45:13.240897,0,1142138970,24.356415715383232 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:45:43.655137,0,1301268570,25.49676417511692 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:46:13.344754,0,1142138970,24.224366625169438 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:46:50.784832,0,1945010028,26.612602269295685 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:47:21.162395,0,1238354010,24.912296082302873 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:47:50.863610,0,1190246490,24.44551868443464 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:48:20.655219,0,1190246490,24.332970362633144 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:48:57.884646,0,1984331628,26.656284353019814 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:49:36.098176,0,1976467308,26.4326896541673 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:50:27.405530,0,982932564,21.902795709633452 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:08.032064,0,2180770674,26.599782674768583 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:35.807356,0,1062497364,22.371131012623422 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, 1, 2]}",2022-11-30 00:52:05.763177,0,1079224410,23.52409715160256 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:52:39.045430,0,1571573862,26.256831208948178 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:12.642643,0,1508659302,26.00462272595473 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:42.368319,0,1158789210,24.50401996100462 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:12.059480,0,1127331930,24.087230950865788 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:51.957601,0,1956587634,26.41692984127425 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:55:29.312918,0,1826995308,26.542101655879033 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:07.198728,0,2007924588,26.616730692591272 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:48.032095,0,2149313394,26.572361617382416 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:57:18.152332,0,1095874650,23.782891130205417 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:57:47.584518,0,1173596250,24.547205997391597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:17.345898,0,1110681690,23.94407626548769 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:47.056374,0,1190246490,24.491800170896195 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:16.745616,0,1253161050,25.254425153820556 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:47.189514,0,1236510810,25.16579743713827 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:16.840252,0,1347532890,25.676132056093994 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:46.759507,0,1221703770,24.768354572582403 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:21.302832,0,1412367456,25.746119527915003 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:55.146488,0,1634488422,26.23053149772474 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:02:23.005266,0,1062497364,22.439008935256105 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:02:59.282910,0,1778887788,26.56040601873973 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:03:36.281891,0,1826995308,26.583100181248412 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:12.687089,0,1715973228,26.303092956441024 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:48.509486,0,1475282016,25.672530277500815 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:05:25.099950,0,1747430508,26.33985155284849 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:05:53.329815,0,1031040084,22.262405116542624 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:06:30.890827,0,1976467308,26.580438511490023 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:00.560411,0,1079224410,23.728343099659895 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:28.617193,0,999582804,21.72906013722468 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:57.530176,0,1014389844,22.471135438983048 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:08:32.633405,0,1682595942,26.278826842615302 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:03.054659,0,1284618330,25.50001897765904 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:31.687974,0,999582804,21.967989901189092 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:06.993955,0,1506739296,25.812328256697807 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:36.948650,0,1284618330,25.440948757479756 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:11.600524,0,1380910176,25.703810906558953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:46.811777,0,1380910176,25.597256331992767 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:14.807619,0,982932564,21.644880427454428 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:45.339890,0,1378990170,25.719053744735888 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:13:19.554133,0,1651138662,26.296467387644455 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:13:55.039207,0,1443824736,25.92682154046769 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:25.339862,0,1378990170,25.618097500353073 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:58.311883,0,1443824736,26.05836967026129 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:15:35.880349,0,1826995308,26.665846653894516 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:05.964065,0,1316075610,25.603591515089885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:35.543255,0,1316075610,25.726771790425637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:17:09.354304,0,1588224102,26.44933722401916 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:13.173994,0,2136595314,26.65406908362065 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:40.903167,0,982932564,21.753935985565093 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:19:09.095827,0,1077304404,22.592892971887863 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, -1]}",2022-11-30 01:19:38.452316,0,1079224410,23.708705837824187 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:20:15.347627,0,1651138662,26.467237911633077 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:20:53.408627,0,2015788908,26.720937123019265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:21:30.489630,0,1732623468,26.539145946856273 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:04.657299,0,1588224102,26.359690429140286 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:41.410352,0,1795538028,26.7016667761388 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:11.191925,0,1045847124,22.30727022285813 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:48.495707,0,1945010028,26.92853123605158 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:24:18.247397,0,1142138970,24.635872154023456 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:24:48.348618,0,1127331930,24.1530263191461 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:25:26.938625,0,1860372594,26.730853774460698 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:25:59.999243,0,1366103136,25.823752934217687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:26:38.109367,0,1460474976,25.87027954805624 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:27:14.992048,0,1826995308,26.80291796821574 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 1]}",2022-11-30 01:27:44.593561,0,1110681690,24.51106684934533 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:13.315260,0,999582804,21.754138520655324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:43.054739,0,1095874650,23.72840125694677 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:29:16.453346,0,1540116582,26.383471679803876 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:29:44.596431,0,1014389844,21.82697530935213 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:22.103158,0,1523389536,25.605380278566003 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:52.162081,0,1158789210,24.84491236430293 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:31:27.756932,0,1571573862,26.52406333073416 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:31:58.252949,0,1253161050,25.329648091984946 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:32:34.692819,0,1669708908,26.5312087342803 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:04.851690,0,1316075610,25.720879408387013 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:34.153425,0,1079224410,24.103822174703968 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:13.928399,0,2073680754,26.650073433583966 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:52.003469,0,1491932256,25.68166831220549 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:35:22.255164,0,1316075610,25.63882428357699 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:35:51.968182,0,1347532890,25.833426111273635 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:36:31.293492,0,1877022834,26.470763981649853 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:01.112958,0,1062497364,22.484020387193706 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:41.228117,0,2105138034,26.83517275846072 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:38:19.780658,0,1443824736,25.801099953730787 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:38:50.000595,0,1077304404,22.347079305577246 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:39:21.802091,0,1380910176,25.69437359008145 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:39:51.661673,0,1253161050,25.278872112854447 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:40:28.495428,0,1701166188,26.376182230389777 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:40:58.659095,0,1284618330,25.38778340159123 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:41:29.227300,0,1284618330,25.42396262714132 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:42:01.693769,0,1491932256,26.049173770932143 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:42:39.003275,0,1412367456,25.450957445704358 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:43:08.560591,0,1205053530,24.982224495498585 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:43:38.655869,0,1190246490,24.935927330385375 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:44:07.301698,0,1045847124,22.58554082987191 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:44:37.461976,0,1190246490,24.671958393096833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:45:07.650100,0,1221703770,24.956348430879675 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:46:01.404524,0,1031040084,22.152066191159324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:46:38.986518,0,2082647148,26.632249118046865 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:47:07.608826,0,1125411924,23.429789667811992 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 01:47:37.867613,0,1127331930,24.145917266173598 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:15.090054,0,1826995308,26.770198001318732 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:53.000294,0,2011868268,26.415120168334965 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:49:26.441014,0,1382830182,25.316083834952625 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 2]}",2022-11-30 01:49:59.544683,0,1414287462,25.711571467826957 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:50:30.849009,0,1223546970,24.831513194585497 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:51:01.107149,0,1079147604,22.675323576984212 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, 1, -1, 2]}",2022-11-30 01:51:31.946530,0,1143982170,24.30020519062015 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:52:01.944178,0,1158789210,24.760354519096623 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:52:32.644831,0,1175439450,24.745534135337422 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 01:53:01.116235,0,1062497364,22.613029220889654 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:53:35.041431,0,1619681382,26.2082672911056 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:54:05.943378,0,1255004250,25.072608209842993 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:54:35.408367,0,1110604884,23.1627621167102 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:55:06.650719,0,1317918810,25.578281446432214 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:55:36.475238,0,1127331930,24.221637357380914 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:12.790186,0,1669708908,26.54140556368842 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:43.360950,0,1206896730,24.98000861493876 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:13.953244,0,1364183130,25.71705181089393 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:51.090401,0,1701166188,26.74516092088886 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:58:27.981238,0,1732623468,26.832353130261968 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:58:56.696835,0,1047690324,21.97995176988117 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:59:31.345276,0,1588224102,26.3358070373563 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:00:00.113102,0,1062497364,22.362618000056415 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 02:00:29.945995,0,1127331930,24.648057370763627 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:01:00.265931,0,1301268570,25.436935433814305 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:01:31.151981,0,1380833370,25.654131643295717 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:02:09.301256,0,1795538028,26.84364156318439 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:02:37.517073,0,1031040084,21.937041004808965 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:03:12.651254,0,1556766822,26.361614746242726 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:03:43.953623,0,1286461530,25.33590186847325 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 02:04:14.640998,0,1127331930,24.26800692018226 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:04:48.236910,0,1445744742,25.8605605687801 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:05:22.538414,0,1430937702,25.736368197822884 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:05:51.408503,0,1031040084,22.160851653528795 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:06:26.648873,0,1525309542,26.283505470568386 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:01.841145,0,1651138662,26.200923396852648 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:36.826535,0,1493852262,26.022938040890846 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:08:11.743622,0,1651138662,26.323507870111342 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:08:40.810079,0,1093954644,23.10914434974903 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:09:10.497316,0,1093954644,22.93525727677559 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:09:45.035662,0,1525309542,26.303765162822714 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 02:10:15.853041,0,1269811290,25.14969354058586 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:10:53.034327,0,1764080748,26.719747425528375 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:11:27.448102,0,1493852262,26.147631963028342 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:12:01.745228,0,1462394982,25.806638854264037 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 2]}",2022-11-30 02:12:33.064042,0,1192089690,24.67646662934568 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 1, -1, 1]}",2022-11-30 02:13:31.905219,0,1014389844,22.07444316375671 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:14:13.239756,0,2196499314,26.808937868203532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:14:45.016226,0,1108761684,22.99185474514479 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:15:16.151531,0,1110681690,24.451311787293157 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:15:52.689296,0,1653058668,26.388890341662112 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:16:30.799650,0,1945010028,26.832269839482606 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:17:09.628703,0,1810345068,26.87035240576452 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:17:41.163075,0,1158789210,24.73994162716646 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:18:12.461543,0,1253161050,25.18416515633912 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:18:46.947088,0,1445744742,25.761961494652613 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:19:21.548056,0,1540116582,26.36816465236295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:19:55.543285,0,1477202022,26.259735292233113 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:20:26.257628,0,1410447450,25.769535189140193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:21:00.935996,0,1508659302,26.45714906422908 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:21:39.693771,0,2064818028,27.05141228767061 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:22:16.858717,0,1634488422,26.322548970809848 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:22:53.592330,0,1715973228,26.657027696998984 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:23:23.753177,0,1142138970,24.539293515717535 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:24:02.314804,0,1860295788,27.020732069299534 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:24:40.894881,0,2023653228,26.816836839483663 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:25:19.593071,0,1860295788,26.957189752523504 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:25:50.458625,0,1284618330,25.61733025863949 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:26:20.744436,0,1236510810,25.25415313294958 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:26:53.298888,0,1045847124,22.08090715962085 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 02:27:27.137456,0,1492009062,26.052263535617904 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:28:01.945411,0,1571573862,26.118237209470863 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:28:38.594669,0,1747430508,26.903183105861135 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:29:18.253776,0,2076691314,26.879803077690028 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:29:56.298060,0,2003070828,26.839090098499142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:30:33.772050,0,1603031142,26.482417706814886 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:31:13.542551,0,2155334514,26.87436116692265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:31:47.410084,0,1077304404,22.604758748152854 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, -1]}",2022-11-30 02:32:17.856179,0,1173596250,24.7651262424877 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:32:48.975305,0,1127331930,24.704098583912252 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-30 02:33:19.260398,0,1205053530,25.137127615371046 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 2, 1]}",2022-11-30 02:33:51.759557,0,1364183130,25.966442456302147 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:34:21.926894,0,1093954644,23.220261036558885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:34:53.358603,0,1190246490,25.1359189611972 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:22.930318,0,1062497364,22.7755095138295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:53.858887,0,1284618330,25.537416313175953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:36:31.595724,0,1764080748,26.882073104251866 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:37:02.267669,0,1316075610,25.570143211664274 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:37:41.647836,0,2134752114,26.898452516723687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:13.005215,0,1031040084,22.282220573694513 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:44.151950,0,1347532890,25.784134961825835 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:39:15.266274,0,1221703770,25.202375572779086 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 2, 1]}",2022-11-30 02:39:46.953641,0,1332725850,25.851297056506393 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:17.653376,0,1173596250,24.972344992193374 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:49.609980,0,1062497364,22.535667407831653 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:41:20.761069,0,1110681690,24.40576457445596 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:42:27.496173,0,1828838508,26.85145321742662 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:42:57.919093,0,982932564,21.761604389716634 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:28.655608,0,1079224410,23.718368472465446 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:58.512911,0,1062497364,22.74182664332034 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:44:32.816447,0,1445667936,26.19212122197997 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:45:10.899684,0,1477125216,26.010153160637532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:45:41.313907,0,1045847124,22.827474314422002 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:46:19.207836,0,1749273708,26.60418713411001 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:46:53.592587,0,1414210656,26.00726305388265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:47:23.850237,0,1142138970,24.6489542661405 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:47:54.746779,0,1316075610,25.689218653291295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:48:25.548201,0,1347532890,25.751911210030475 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:04.198324,0,1780730988,27.00579454971298 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:42.700124,0,1812188268,26.874801178813083 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:13.662203,0,1158789210,24.921368325030492 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:43.904715,0,999582804,22.026558500552152 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:51:22.015853,0,1686359148,26.62215586535193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:52:00.755827,0,1667788902,26.61057212093059 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:52:37.454316,0,1636331622,26.6512297923577 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-30 02:53:07.714810,0,1014389844,22.393618015178536 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:53:43.144481,0,1510502502,25.993038659880476 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:19.052536,0,1573417062,26.421662873667255 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:49.657459,0,1095874650,23.883133083803862 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:21.070738,0,1206896730,25.148678288815 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:52.009648,0,1014389844,22.06913488473211 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:56:25.509329,0,1382753376,25.54875613062997 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:56:56.957773,0,1301268570,25.649734062232383 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 02:57:31.518411,0,1493775456,26.10832383442528 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:01.613773,0,1031040084,22.362002479551165 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:31.705698,0,1062497364,22.819328594276566 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-30 02:59:02.697733,0,1173596250,24.988685667678173 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:59:33.659490,0,1221703770,25.302962108892185 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:00:09.446922,0,1541959782,26.112077468690867 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:00:39.473024,0,1079224410,23.955772671551667 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:15.155182,0,1541959782,26.31776013053777 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:46.072247,0,1253161050,25.489643989727142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, 1]}",2022-11-30 03:02:17.596453,0,1364183130,25.815981278057624 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 03:02:48.933726,0,1045847124,22.32432265753782 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:03:21.469495,0,1269811290,25.513369223069635 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:03:53.160286,0,1284618330,25.46582706181441 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:04:31.684525,0,1717816428,26.710511535879036 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:05:10.042233,0,1717816428,26.84283267727433 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:05:41.855922,0,1190246490,24.970583908771232 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 03:06:20.184503,0,1636331622,26.504345633971067 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:06:51.460598,0,1127331930,24.268248941796838 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:28.142691,0,1573417062,26.430998131687453 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:59.749480,0,1253161050,25.38845190370804 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:08:30.871157,0,1190246490,25.041571400304573 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:09:02.664589,0,1110681690,24.006237352801346 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:09:34.238610,0,1238354010,25.34276030239052 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:10:34.311288,0,999582804,21.80118151877014 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:11:13.593310,0,1797381228,27.043140669460854 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:11:43.503387,0,1093954644,23.007414463752117 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:12:14.163528,0,1095874650,23.79086433289906 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:12:50.848022,0,1621524582,26.025864778342207 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:13:21.556759,0,1190246490,25.077358466768757 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:13:55.398877,0,1429017696,26.12059006524399 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:14:26.326007,0,999582804,22.127524836343532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-30 03:14:57.762642,0,1221703770,25.21914419948441 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:15:36.252929,0,1667788902,26.6927938370969 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:16:14.254276,0,1652981862,26.66556123105007 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:16:45.445260,0,1127331930,24.13548008816253 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:17:15.639603,0,1031040084,22.152066191159324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:17:54.414138,0,1477125216,26.149095479536744 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:18:25.964419,0,1347532890,25.790673448942552 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:18:56.523414,0,1031040084,22.518290658576667 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, 1]}",2022-11-30 03:19:27.666139,0,1316075610,25.42608049975173 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:19:59.551900,0,1221703770,25.355575788780996 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:20:30.914307,0,1062497364,22.58490426133199 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:21:03.259812,0,1316075610,25.632790518407255 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:21:37.308205,0,1445667936,26.01769230789309 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:22:13.654111,0,1493852262,26.206006379852774 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:22:44.661526,0,1127331930,24.704098583912252 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:23:16.791537,0,1158789210,24.4930221168223 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:23:48.818752,0,1062497364,22.608186762946183 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:24:26.151864,0,1590067302,26.365434081423825 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:25:04.997659,0,1686359148,26.898935796393708 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:25:38.612617,0,1397560416,26.061090394566637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:26:18.290877,0,1734466668,26.887018361917384 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:26:50.457197,0,1284618330,25.456784215173144 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:27:24.405212,0,1397560416,26.060964017387292 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:28:00.762632,0,1558610022,26.24033664128142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:28:30.909491,0,1093954644,22.91490934985308 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:29:06.861622,0,1573417062,26.51638836290174 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:29:38.763394,0,1253161050,25.32548928774925 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:30:14.354144,0,1541959782,26.36876929191619 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:30:45.758366,0,1095874650,24.13357019256553 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:31:22.344668,0,1527152742,26.27025764535305 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:31:54.461060,0,1253161050,25.41836880480659 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:32:26.060470,0,1284618330,25.472978060833974 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:33:02.551992,0,1510502502,26.241380252439953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:33:33.960758,0,1347532890,25.707442276004617 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:34:12.877897,0,1717816428,26.937139740910155 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:34:43.973196,0,1095874650,23.88536811980862 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:35:22.799736,0,1749273708,27.035785346779857 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:36:01.488138,0,1780730988,26.94260959266693 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:36:41.098531,0,1765923948,27.1368398440508 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 1]}",2022-11-30 03:37:11.314304,0,1062497364,22.486957414672982 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:37:43.678526,0,1190246490,24.977323840246868 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:38:15.756806,0,1095874650,23.82580846204717 diff --git a/examples/notebook/tensorflow/vgg19_ibean/README.md b/examples/notebook/tensorflow/vgg19_ibean/README.md new file mode 100644 index 00000000000..f311bf0bb81 --- /dev/null +++ b/examples/notebook/tensorflow/vgg19_ibean/README.md @@ -0,0 +1,251 @@ +# Accelerate VGG19 Inference on Intel® Gen4 Xeon® Sapphire Rapids + + +## Introduction + +Intel® Gen4 Xeon® Sapphire Rapids supports new hardware feature: [Intel® Advanced Matrix Extensions (AMX)](https://www.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-amx-instructions.html) which accelerates deep learning inference by INT8/BF16 data type. + +AMX is better than VNNI ([AVX-512 Vector Neural Network Instructions](https://www.intel.com/content/dam/www/public/us/en/documents/product-overviews/dl-boost-product-overview.pdf) supported by older Xeon®) to accelerate INT8 model. It's 8 times performance of VNNI in theory. + +Intel® Neural Compressor helps quantize the FP32 model to INT8 and control the accuracy loss as expected. + +This example shows a whole pipeline: + +1. Train an image classification model [VGG19](https://arxiv.org/abs/1409.1556) by transfer learning based on [TensorFlow Hub](https://tfhub.dev) trained model. + +2. Quantize the FP32 Keras model and get a INT8 PB model by Intel® Neural Compressor. + +3. Test and compare the performance of FP32 & INT8 models. + +This exmaple can be executed on Intel® CPU supports VNNI or AMX. There will be more performance improvement on Intel® CPU with AMX. + + +To learn more about Intel® Neural Compressor, please refer to the official website for detailed info and news: [https://github.com/intel/neural-compressor](https://github.com/intel/neural-compressor) + + +We will learn the acceleration of AI inference by Intel AI technology: + +1. Intel® Advanced Matrix Extensions + +2. Intel® Deep Learning Boost + +3. Intel® Neural Compressor + +4. Intel® Optimization for Tensorflow* + + +## Quantization Plus BF16 on Sapphire Rapids (SPR) + +As we know, SPR support AMX-INT8 and AMX-BF16 instructions which accelerate the INT8 and BF16 layer inference. + +Intel® Neural Compressor has this special function for SPR: during quantizing the model, it will convert the FP32 layers to BF16 which can't be quantized when execute the quantization on SPR automatically. Convert FP32 to BF16 is following the rule of AI framework too. + +It will help accelerate the model on SPR as possible and control the accuracy loss as expected. + +How to enable it? + +1. Install Intel® Optimization for Tensorflow*/Intel® Extension for Tensorflow* of the release support this feature. + +Note, the public release can't support it now. + +2. Execute quantization process by calling Intel® Neural Compressor API on SPR. + +we could force to enable this feature by setting environment variables, if the quantization is executed on the Xeon which doesn't support AMX. + +``` +import os +os.environ["FORCE_BF16"] = "1" +os.environ["MIX_PRECISION_TEST"] = "1" +``` + +How to disable it? +``` +import os +os.environ["FORCE_BF16"] = "0" +os.environ["MIX_PRECISION_TEST"] = "0" +``` +This example is used to highlight to this feature. + +## Code + +|Function|Code|Input|Output| +|-|-|-|-| +|Train and quantize a CNN model|train_model.py|dataset: ibean|model_keras.fp32
model_pb.int8| +|Test performance|profiling_inc.py|model_keras.fp32
model_pb.int8|32.json
8.json| +|Compare the performance|compare_perf.py|32.json
8.json|stdout/stderr
log file
fp32_int8_absolute.png
fp32_int8_times.png| + +Execute **run_sample.sh** in shell will call above scripts to finish the demo. Or execute **inc_quantize_vgg19.ipynbrun_sample.sh** in jupyter notebook to finish the demo. + +## Hardware Environment + +### Local Server or Cloud + +It's recommended to use 4nd Generation Intel® Xeon® Scalable Processors (SPR) or newer, which include: + +1. AVX512 instruction to speed up training & inference AI model. + +2. Intel® Advanced Matrix Extensions (AMX) to accelerate AI/DL Inference with INT8/BF16 Model. + +It's also executed on other Intel CPUs. If the CPU support Intel® Deep Learning Boost, the performance will be increased obviously. Without it, maybe it's 1.x times of FP32. + + +### Intel® DevCloud + +If you have no such hardware platform to support Intel® Advanced Matrix Extensions (AMX) or Intel® Deep Learning Boost, you could register to Intel® DevCloud and try this example on new Xeon with Intel® Deep Learning Boost freely. To learn more about working with Intel® DevCloud, please refer to [Intel® DevCloud](https://www.intel.com/content/www/us/en/developer/tools/devcloud/overview.html) + + +## Running Environment + + +### Local Server or Cloud + +Set up own running environment in local server, cloud (including Intel® DevCloud): + +#### Install by PyPi + +Create virtual environment **env_inc**: + +``` +pip_set_env.sh +``` +Activate it by: + +``` +source env_inc/bin/activate +``` + +#### Install by Conda + +Create virtual environment **env_inc**: + +``` +conda_set_env.sh +``` + +Activate it by: + +``` +conda activate env_inc +``` + +#### Run by Jupyter Notebook + +Startup Jupyter Notebook: + +``` +./run_jupyter.sh +``` + +Please open **inc_quantize_vgg19.ipynb** in Jupyter Notebook. + +After set the right kernel, following the guide in it to run this demo. + + +### Intel® DevCloud + + +#### Getting Started with Intel® DevCloud + +This article assumes you are familiar with Intel® DevCloud environment. To learn more about working with Intel® DevCloud, please refer to [Intel® DevCloud](https://www.intel.com/content/www/us/en/developer/tools/devcloud/overview.html). +Specifically, this article assumes: + +1. You have an Intel® DevCloud account. +2. You are familiar with usage of Intel® DevCloud, like login by SSH client.. +3. Developers are familiar with Python, AI model training and inference based on Tensorflow*. + +#### Setup based on Intel® oneAPI AI Analytics Toolkit + +1. SSH to Intel® DevCloud or Open terminal by Jupyter notebook. + +2. Create virtual environment **env_inc**: + +``` +./devcloud_setup_env.sh +``` + +Activate it by: + +``` +conda activate env_inc +``` + +#### Run in SSH Login Intel® DevCloud for oneAPI + +If you have no SPR server, you can try on Intel® DevCloud which provides SPR server running environment. + +Job submit to compute node with the property 'clx' or 'icx' which support Intel® Deep Learning Boost (avx512_vnni); 'spr' which supports Intel® Advanced Matrix Extensions (AMX). + + +##### Job Submit +``` +!qsub run_in_intel_devcloud.sh -d `pwd` -l nodes=1:spr:ppn=2 +28029.v-qsvr-nda.aidevcloud +``` + +Note, please run above command in login node. There will be error as below if run it on compute node: +``` +qsub: submit error (Bad UID for job execution MSG=ruserok failed validating uXXXXX/uXXXXX from s001-n054.aidevcloud) +``` + +##### Check job status + +``` +qstat +``` + +After the job is over (successfully or fault), there will be log files, like: + +1. **run_in_intel_devcloud.sh.o28029** +2. **run_in_intel_devcloud.sh.e28029** + +##### Check Result + +##### Check Result in Log File + +``` +tail -23 `ls -lAtr run_in_intel_devcloud.sh.o* | tail -1 | awk '{print $9}'` +``` +Or +Check the result in a log file, like : **run_in_intel_devcloud.sh.o28029**: + +``` +!tail -23 run_in_intel_devcloud.sh.o1842253 + + +Model FP32 INT8 +throughput(fps) 572.4982883964987 X030.70552731285 +latency(ms) 2.8339174329018104 X.128233714979522 +accuracy(%) 0.9799 X.9796 + +Save to fp32_int8_absolute.png + +Model FP32 INT8 +throughput_times 1 X.293824608282245 +latency_times 1 X.7509864932092611 +accuracy_times 1 X.9996938463108482 + +Save to fp32_int8_times.png +Please check the PNG files to see the performance! +This demo is finished successfully! +Thank you! + +######################################################################## +# End of output for job 1842253.v-qsvr-1.aidevcloud +# Date: Thu 27 Jan 2022 07:05:52 PM PST +######################################################################## + +... + +``` + +We will see the performance and accuracy of FP32 and INT8 model. The performance could be obviously increased if running on Xeon with VNNI. + +##### Check Result in PNG file + +The demo creates figure files: fp32_int8_absolute.png, fp32_int8_times.png to show performance bar. They could be used in report. + +Copy files from DevCloud in host: + +``` +scp devcloud:~/xxx/*.png ./ +``` diff --git a/examples/notebook/tensorflow/vgg19_ibean/compare_perf.py b/examples/notebook/tensorflow/vgg19_ibean/compare_perf.py new file mode 100644 index 00000000000..57537206dc7 --- /dev/null +++ b/examples/notebook/tensorflow/vgg19_ibean/compare_perf.py @@ -0,0 +1,82 @@ +import json +import matplotlib.pyplot as plt + + +def autolabel(ax, rects): + """ + Attach a text label above each bar displaying its height + """ + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x() + rect.get_width()/2., 1.05*height, + '%0.4f' % float(height), + ha='center', va='bottom') + +def draw_bar(x, t, y, subplot, color, x_lab, y_lab, width=0.2): + plt.subplot(subplot) + plt.xticks(x, t) + ax1 = plt.gca() + ax1.set_xlabel(x_lab) + ax1.set_ylabel(y_lab, color=color) + rects1 = ax1.bar(x, y, color=color, width=width) + ax1.tick_params(axis='y', labelcolor=color) + autolabel(ax1, rects1) + +def fix_len(name, length): + if len(name) +For example, bash command will look like the following, where *``* is the address of the master node, it won't be necessary for single node case, *``* is the desired processes to use in current node, for node with GPU, usually set to number of GPUs in this node, for node without GPU and use CPU for training, it's recommended set to 1, *``* is the number of nodes to use, *``* is the rank of the current node, rank starts from 0 to *``*`-1`. +
+Also please note that to use CPU for training in each node with multi nodes settings, argument `--no_cuda` is mandatory. In multi nodes setting, following command needs to be launched in each node, and all the commands should be the same except for *``*, which should be integer from 0 to *``*`-1` assigned to each node. + +```bash +python -m torch.distributed.launch --master_addr= --nproc_per_node= --nnodes= --node_rank= \ + main.py --epochs 200 --lr 0.02 --name CNN-2-distillation --student_type CNN-2 --teacher_type CNN-10 --teacher_model runs/CNN-10/model_best.pth.tar --tensorboard +``` \ No newline at end of file diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py b/examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py index e24eb7767ff..685a0109450 100644 --- a/examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py +++ b/examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py @@ -10,6 +10,7 @@ import torchvision.datasets as datasets import torchvision.transforms as transforms +from accelerate import Accelerator from plain_cnn_cifar import ConvNetMaker, plane_cifar100_book # used for logging to TensorBoard @@ -60,6 +61,7 @@ help='loss weights of distillation, should be a list of length 2, ' 'and sum to 1.0, first for student targets loss weight, ' 'second for teacher student loss weight.') +parser.add_argument("--no_cuda", action='store_true', help='use cpu for training.') parser.set_defaults(augment=True) @@ -75,10 +77,13 @@ def set_seed(seed): def main(): global args, best_prec1 args, _ = parser.parse_known_args() + accelerator = Accelerator(cpu=args.no_cuda) + best_prec1 = 0 if args.seed is not None: set_seed(args.seed) - if args.tensorboard: configure("runs/%s" % (args.name)) + with accelerator.local_main_process_first(): + if args.tensorboard: configure("runs/%s"%(args.name)) # Data loading code normalize = transforms.Normalize(mean=[0.5071, 0.4866, 0.4409], std=[0.2675, 0.2565, 0.2761]) @@ -121,9 +126,9 @@ def main(): raise NotImplementedError('Unsupported student model type') # get the number of model parameters - print('Number of teacher model parameters: {}'.format( + accelerator.print('Number of teacher model parameters: {}'.format( sum([p.data.nelement() for p in teacher_model.parameters()]))) - print('Number of student model parameters: {}'.format( + accelerator.print('Number of student model parameters: {}'.format( sum([p.data.nelement() for p in student_model.parameters()]))) kwargs = {'num_workers': 0, 'pin_memory': True} @@ -135,10 +140,10 @@ def main(): if args.loss_weights[1] > 0: from tqdm import tqdm def get_logits(teacher_model, train_dataset): - print("***** Getting logits of teacher model *****") - print(f" Num examples = {len(train_dataset) }") + accelerator.print("***** Getting logits of teacher model *****") + accelerator.print(f" Num examples = {len(train_dataset) }") logits_file = os.path.join(os.path.dirname(args.teacher_model), 'teacher_logits.npy') - if not os.path.exists(logits_file): + if not os.path.exists(logits_file) and accelerator.is_local_main_process: teacher_model.eval() train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) train_dataloader = tqdm(train_dataloader, desc="Evaluating") @@ -147,8 +152,8 @@ def get_logits(teacher_model, train_dataset): outputs = teacher_model(input) teacher_logits += [x for x in outputs.numpy()] np.save(logits_file, np.array(teacher_logits)) - else: - teacher_logits = np.load(logits_file) + accelerator.wait_for_everyone() + teacher_logits = np.load(logits_file) train_dataset.targets = [{'labels':l, 'teacher_logits':tl} \ for l, tl in zip(train_dataset.targets, teacher_logits)] return train_dataset @@ -163,15 +168,15 @@ def get_logits(teacher_model, train_dataset): # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) + accelerator.print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] student_model.load_state_dict(checkpoint['state_dict']) - print("=> loaded checkpoint '{}' (epoch {})" + accelerator.print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: - print("=> no checkpoint found at '{}'".format(args.resume)) + accelerator.print("=> no checkpoint found at '{}'".format(args.resume)) # define optimizer optimizer = torch.optim.SGD(student_model.parameters(), args.lr, @@ -179,13 +184,18 @@ def get_logits(teacher_model, train_dataset): weight_decay=args.weight_decay) # cosine learning rate - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*args.epochs) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, len(train_loader) * args.epochs // accelerator.num_processes + ) + + student_model, teacher_model, train_loader, val_loader, optimizer = \ + accelerator.prepare(student_model, teacher_model, train_loader, val_loader, optimizer) def train_func(model): - return train(train_loader, model, scheduler, distiller, best_prec1) + return train(train_loader, model, scheduler, distiller, best_prec1, accelerator) def eval_func(model): - return validate(val_loader, model, distiller) + return validate(val_loader, model, distiller, accelerator) from neural_compressor.experimental import Distillation, common from neural_compressor.experimental.common.criterion import PyTorchKnowledgeDistillationLoss @@ -204,11 +214,12 @@ def eval_func(model): directory = "runs/%s/"%(args.name) os.makedirs(directory, exist_ok=True) + model._model = accelerator.unwrap_model(model.model) model.save(directory) # change to framework model for further use model = model.model -def train(train_loader, model, scheduler, distiller, best_prec1): +def train(train_loader, model, scheduler, distiller, best_prec1, accelerator): distiller.on_train_begin() for epoch in range(args.start_epoch, args.epochs): """Train for one epoch on the training set""" @@ -233,13 +244,15 @@ def train(train_loader, model, scheduler, distiller, best_prec1): loss = distiller.on_after_compute_loss(input, output, loss, teacher_logits) # measure accuracy and record loss + output = accelerator.gather(output) + target = accelerator.gather(target) prec1 = accuracy(output.data, target, topk=(1,))[0] - losses.update(loss.data.item(), input.size(0)) - top1.update(prec1.item(), input.size(0)) + losses.update(accelerator.gather(loss).sum().data.item(), input.size(0)*accelerator.num_processes) + top1.update(prec1.item(), input.size(0)*accelerator.num_processes) # compute gradient and do SGD step distiller.optimizer.zero_grad() - loss.backward() + accelerator.backward(loss) # loss.backward() distiller.optimizer.step() scheduler.step() @@ -248,7 +261,7 @@ def train(train_loader, model, scheduler, distiller, best_prec1): end = time.time() if i % args.print_freq == 0: - print('Epoch: [{0}][{1}/{2}]\t' + accelerator.print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' @@ -260,19 +273,20 @@ def train(train_loader, model, scheduler, distiller, best_prec1): # remember best prec@1 and save checkpoint is_best = distiller.best_score > best_prec1 best_prec1 = max(distiller.best_score, best_prec1) - save_checkpoint({ - 'epoch': distiller._epoch_runned + 1, - 'state_dict': model.state_dict(), - 'best_prec1': best_prec1, - }, is_best) - # log to TensorBoard - if args.tensorboard: - log_value('train_loss', losses.avg, epoch) - log_value('train_acc', top1.avg, epoch) - log_value('learning_rate', scheduler._last_lr[0], epoch) + if accelerator.is_local_main_process: + save_checkpoint({ + 'epoch': distiller._epoch_runned + 1, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + }, is_best) + # log to TensorBoard + if args.tensorboard: + log_value('train_loss', losses.avg, epoch) + log_value('train_acc', top1.avg, epoch) + log_value('learning_rate', scheduler._last_lr[0], epoch) -def validate(val_loader, model, distiller): +def validate(val_loader, model, distiller, accelerator): """Perform validation on the validation set""" batch_time = AverageMeter() top1 = AverageMeter() @@ -287,6 +301,8 @@ def validate(val_loader, model, distiller): output = model(input) # measure accuracy + output = accelerator.gather(output) + target = accelerator.gather(target) prec1 = accuracy(output.data, target, topk=(1,))[0] top1.update(prec1.item(), input.size(0)) @@ -295,15 +311,15 @@ def validate(val_loader, model, distiller): end = time.time() if i % args.print_freq == 0: - print('Test: [{0}/{1}]\t' + accelerator.print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, top1=top1)) - print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) + accelerator.print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) # log to TensorBoard - if args.tensorboard: + if accelerator.is_local_main_process and args.tensorboard: log_value('val_acc', top1.avg, distiller._epoch_runned) return top1.avg diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt b/examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt index 8db2f310ef5..71252629880 100644 --- a/examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt +++ b/examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt @@ -2,3 +2,4 @@ torch==1.5.0+cpu torchvision==0.6.0+cpu tensorboard_logger +accelerate \ No newline at end of file diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md index 14841061fdc..d449d5f797b 100644 --- a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md +++ b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md @@ -8,4 +8,15 @@ pip install -r requirements.txt python train_without_distillation.py --epochs 200 --lr 0.1 --layers 40 --widen-factor 2 --name WideResNet-40-2 --tensorboard # for distillation of the teacher model WideResNet40-2 to the student model MobileNetV2-0.35 python main.py --epochs 200 --lr 0.02 --name MobileNetV2-0.35-distillation --teacher_model runs/WideResNet-40-2/model_best.pth.tar --tensorboard --seed 9 +``` + +We also supported Distributed Data Parallel training on single node and multi nodes settings for distillation. To use Distributed Data Parallel to speedup training, the bash command needs a small adjustment. +
+For example, bash command will look like the following, where *``* is the address of the master node, it won't be necessary for single node case, *``* is the desired processes to use in current node, for node with GPU, usually set to number of GPUs in this node, for node without GPU and use CPU for training, it's recommended set to 1, *``* is the number of nodes to use, *``* is the rank of the current node, rank starts from 0 to *``*`-1`. +
+Also please note that to use CPU for training in each node with multi nodes settings, argument `--no_cuda` is mandatory. In multi nodes setting, following command needs to be launched in each node, and all the commands should be the same except for *``*, which should be integer from 0 to *``*`-1` assigned to each node. + +```bash +python -m torch.distributed.launch --master_addr= --nproc_per_node= --nnodes= --node_rank= \ + main.py --epochs 200 --lr 0.02 --name MobileNetV2-0.35-distillation --teacher_model runs/WideResNet-40-2/model_best.pth.tar --tensorboard --seed 9 ``` \ No newline at end of file diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py index e7f4e56888b..3778162d968 100644 --- a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py +++ b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py @@ -10,6 +10,7 @@ import torchvision.datasets as datasets import torchvision.transforms as transforms +from accelerate import Accelerator from wideresnet import WideResNet # used for logging to TensorBoard @@ -60,6 +61,7 @@ help='loss weights of distillation, should be a list of length 2, ' 'and sum to 1.0, first for student targets loss weight, ' 'second for teacher student loss weight.') +parser.add_argument("--no_cuda", action='store_true', help='use cpu for training.') parser.set_defaults(augment=True) def set_seed(seed): @@ -73,10 +75,13 @@ def set_seed(seed): def main(): global args, best_prec1 args, _ = parser.parse_known_args() + accelerator = Accelerator(cpu=args.no_cuda) + best_prec1 = 0 if args.seed is not None: set_seed(args.seed) - if args.tensorboard: configure("runs/%s"%(args.name)) + with accelerator.local_main_process_first(): + if args.tensorboard: configure("runs/%s"%(args.name)) # Data loading code normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]], @@ -111,9 +116,9 @@ def main(): student_model = mobilenet.MobileNetV2(num_classes=10, width_mult=0.35) # get the number of model parameters - print('Number of teacher model parameters: {}'.format( + accelerator.print('Number of teacher model parameters: {}'.format( sum([p.data.nelement() for p in teacher_model.parameters()]))) - print('Number of student model parameters: {}'.format( + accelerator.print('Number of student model parameters: {}'.format( sum([p.data.nelement() for p in student_model.parameters()]))) kwargs = {'num_workers': 0, 'pin_memory': True} @@ -125,10 +130,10 @@ def main(): if args.loss_weights[1] > 0: from tqdm import tqdm def get_logits(teacher_model, train_dataset): - print("***** Getting logits of teacher model *****") - print(f" Num examples = {len(train_dataset) }") + accelerator.print("***** Getting logits of teacher model *****") + accelerator.print(f" Num examples = {len(train_dataset) }") logits_file = os.path.join(os.path.dirname(args.teacher_model), 'teacher_logits.npy') - if not os.path.exists(logits_file): + if not os.path.exists(logits_file) and accelerator.is_local_main_process: teacher_model.eval() train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) train_dataloader = tqdm(train_dataloader, desc="Evaluating") @@ -137,8 +142,8 @@ def get_logits(teacher_model, train_dataset): outputs = teacher_model(input) teacher_logits += [x for x in outputs.numpy()] np.save(logits_file, np.array(teacher_logits)) - else: - teacher_logits = np.load(logits_file) + accelerator.wait_for_everyone() + teacher_logits = np.load(logits_file) train_dataset.targets = [{'labels':l, 'teacher_logits':tl} \ for l, tl in zip(train_dataset.targets, teacher_logits)] return train_dataset @@ -153,15 +158,15 @@ def get_logits(teacher_model, train_dataset): # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) + accelerator.print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] student_model.load_state_dict(checkpoint['state_dict']) - print("=> loaded checkpoint '{}' (epoch {})" + accelerator.print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: - print("=> no checkpoint found at '{}'".format(args.resume)) + accelerator.print("=> no checkpoint found at '{}'".format(args.resume)) # define optimizer optimizer = torch.optim.SGD(student_model.parameters(), args.lr, @@ -169,13 +174,18 @@ def get_logits(teacher_model, train_dataset): weight_decay=args.weight_decay) # cosine learning rate - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*args.epochs) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, len(train_loader) * args.epochs // accelerator.num_processes + ) + + student_model, teacher_model, train_loader, val_loader, optimizer = \ + accelerator.prepare(student_model, teacher_model, train_loader, val_loader, optimizer) def train_func(model): - return train(train_loader, model, scheduler, distiller, best_prec1) + return train(train_loader, model, scheduler, distiller, best_prec1, accelerator) def eval_func(model): - return validate(val_loader, model, distiller) + return validate(val_loader, model, distiller, accelerator) from neural_compressor.experimental import Distillation, common from neural_compressor.experimental.common.criterion import PyTorchKnowledgeDistillationLoss @@ -194,11 +204,12 @@ def eval_func(model): directory = "runs/%s/"%(args.name) os.makedirs(directory, exist_ok=True) + model._model = accelerator.unwrap_model(model.model) model.save(directory) # change to framework model for further use model = model.model -def train(train_loader, model, scheduler, distiller, best_prec1): +def train(train_loader, model, scheduler, distiller, best_prec1, accelerator): distiller.on_train_begin() for epoch in range(args.start_epoch, args.epochs): """Train for one epoch on the training set""" @@ -222,13 +233,15 @@ def train(train_loader, model, scheduler, distiller, best_prec1): loss = distiller.on_after_compute_loss(input, output, loss, teacher_logits) # measure accuracy and record loss + output = accelerator.gather(output) + target = accelerator.gather(target) prec1 = accuracy(output.data, target, topk=(1,))[0] - losses.update(loss.data.item(), input.size(0)) - top1.update(prec1.item(), input.size(0)) + losses.update(accelerator.gather(loss).sum().data.item(), input.size(0)*accelerator.num_processes) + top1.update(prec1.item(), input.size(0)*accelerator.num_processes) # compute gradient and do SGD step distiller.optimizer.zero_grad() - loss.backward() + accelerator.backward(loss) # loss.backward() distiller.optimizer.step() scheduler.step() @@ -237,7 +250,7 @@ def train(train_loader, model, scheduler, distiller, best_prec1): end = time.time() if i % args.print_freq == 0: - print('Epoch: [{0}][{1}/{2}]\t' + accelerator.print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' @@ -249,19 +262,20 @@ def train(train_loader, model, scheduler, distiller, best_prec1): # remember best prec@1 and save checkpoint is_best = distiller.best_score > best_prec1 best_prec1 = max(distiller.best_score, best_prec1) - save_checkpoint({ - 'epoch': distiller._epoch_runned + 1, - 'state_dict': model.state_dict(), - 'best_prec1': best_prec1, - }, is_best) - # log to TensorBoard - if args.tensorboard: - log_value('train_loss', losses.avg, epoch) - log_value('train_acc', top1.avg, epoch) - log_value('learning_rate', scheduler._last_lr[0], epoch) + if accelerator.is_local_main_process: + save_checkpoint({ + 'epoch': distiller._epoch_runned + 1, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + }, is_best) + # log to TensorBoard + if args.tensorboard: + log_value('train_loss', losses.avg, epoch) + log_value('train_acc', top1.avg, epoch) + log_value('learning_rate', scheduler._last_lr[0], epoch) -def validate(val_loader, model, distiller): +def validate(val_loader, model, distiller, accelerator): """Perform validation on the validation set""" batch_time = AverageMeter() top1 = AverageMeter() @@ -276,6 +290,8 @@ def validate(val_loader, model, distiller): output = model(input) # measure accuracy + output = accelerator.gather(output) + target = accelerator.gather(target) prec1 = accuracy(output.data, target, topk=(1,))[0] top1.update(prec1.item(), input.size(0)) @@ -284,15 +300,15 @@ def validate(val_loader, model, distiller): end = time.time() if i % args.print_freq == 0: - print('Test: [{0}/{1}]\t' + accelerator.print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, top1=top1)) - print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) + accelerator.print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) # log to TensorBoard - if args.tensorboard: + if accelerator.is_local_main_process and args.tensorboard: log_value('val_acc', top1.avg, distiller._epoch_runned) return top1.avg diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt index 8db2f310ef5..71252629880 100644 --- a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt +++ b/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt @@ -2,3 +2,4 @@ torch==1.5.0+cpu torchvision==0.6.0+cpu tensorboard_logger +accelerate \ No newline at end of file diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md index f2afe8c4ddd..dfbc8f5f556 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md @@ -177,7 +177,7 @@ Here we choose topk built-in metric and set accuracy target as tolerating 0.01 r ### Prepare -The related code please refer to examples/pytorch/fx/image_recognition/imagenet/cpu/ptq/main.py. +The related code please refer to examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py. ### Code Update diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index d1dab0d2f43..f11483acd16 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -77,4 +77,4 @@ tuning: relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. exit_policy: timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. + random_seed: 9527 # optional. random seed for deterministic tuning. \ No newline at end of file diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md index 84da4e1a2c6..878a21271f5 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md @@ -203,7 +203,7 @@ As ResNet18/50/101 series are typical classification models, use Top-K as metric ### Prepare -The related code please refer to examples/pytorch/ipex/image_recognition/imagenet/cpu/ptq/main.py. +The related code please refer to examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py. ### Tuning With Intel PyTorch Extension diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py index 7186718ab61..a5ce5a9885c 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py @@ -24,6 +24,7 @@ import torchvision.models as models from neural_compressor.adaptor.pytorch import get_torch_version from packaging.version import Version +import intel_extension_for_pytorch model_names = sorted(name for name in models.__dict__ diff --git a/examples/pytorch/nlp/huggingface_models/common/examples/research_projects/lxmert/requirements.txt b/examples/pytorch/nlp/huggingface_models/common/examples/research_projects/lxmert/requirements.txt index 21d81e1ed1b..b32386055b7 100644 --- a/examples/pytorch/nlp/huggingface_models/common/examples/research_projects/lxmert/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/common/examples/research_projects/lxmert/requirements.txt @@ -4,7 +4,7 @@ async-generator==1.10 attrs==20.2.0 backcall==0.2.0 CacheControl==0.12.6 -certifi==2020.6.20 +certifi==2022.12.7 cffi==1.14.2 chardet==3.0.4 click==7.1.2 diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md new file mode 100644 index 00000000000..b3599c59a88 --- /dev/null +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md @@ -0,0 +1,38 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing PyTorch BERT tuning zoo result. + +# Prerequisite + +## 1. Installation + +The dependent packages are all in requirements, please install as following. + +``` +pip install -r requirements.txt +``` + +## 2. Run + +If the automatic download from modelhub fails, you can download [EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B?text=My+name+is+Clara+and+I+am) offline. + +```shell + +python run_clm.py \ + --model_name_or_path EleutherAI/gpt-j-6B \ + --dataset_name wikitext\ + --dataset_config_name wikitext-2-raw-v1 \ + --do_train \ + --do_eval \ + --tune \ + --output_dir /path/to/checkpoint/dir +``` + + +## 3. Command + +``` +bash run_tuning.sh --topology=gpt_j_wikitext +bash run_benchmark.sh --topology=gpt_j_wikitext --mode=performance --int8=true +``` diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/conf.yaml b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/conf.yaml similarity index 64% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/conf.yaml rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/conf.yaml index bedae633c06..0f75f809781 100644 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/conf.yaml +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/conf.yaml @@ -17,15 +17,15 @@ version: 1.0 model: # mandatory. used to specify model specific information. name: bert - framework: pytorch # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops. + framework: pytorch_fx # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops. quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - approach: post_training_dynamic_quant + approach: post_training_static_quant tuning: accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. + relative: 0.5 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. + higher_is_better: False exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. max_trials: 600 random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt new file mode 100644 index 00000000000..763bed755a8 --- /dev/null +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt @@ -0,0 +1,5 @@ +sentencepiece != 0.1.92 +protobuf +evaluate +datasets +transformers >= 4.22.0 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh similarity index 59% rename from examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_benchmark.sh rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh index f5a8a058c33..a36507f4fca 100644 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_benchmark.sh +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh @@ -10,7 +10,11 @@ function main { # init params function init_params { + iters=100 + batch_size=16 tuned_checkpoint=saved_results + max_eval_samples=`expr ${iters} \* ${batch_size}` + echo ${max_eval_samples} for var in "$@" do case $var in @@ -44,41 +48,44 @@ function init_params { ;; esac done + } + # run_benchmark function run_benchmark { + extra_cmd='' if [[ ${mode} == "accuracy" ]]; then - mode_cmd="--accuracy_only " + mode_cmd=" --accuracy_only " elif [[ ${mode} == "benchmark" ]]; then - mode_cmd="--benchmark " + mode_cmd=" --benchmark " + extra_cmd=$extra_cmd" --max_eval_samples ${max_eval_samples}" else echo "Error: No such mode: ${mode}" exit 1 fi - extra_cmd="" - if [ -n "$dataset_location" ];then - extra_cmd=$extra_cmd"--dataset_dir ${dataset_location} " - fi - if [ -n "$input_model" ];then - extra_cmd=$extra_cmd"--pytorch_checkpoint ${input_model} " - fi - if [ -n "$tuned_checkpoint" ];then - extra_cmd=$extra_cmd"--tuned_checkpoint ${tuned_checkpoint} " + if [ "${topology}" = "gpt_j_wikitext" ]; then + TASK_NAME='wikitext' + model_name_or_path=$input_model + extra_cmd='--dataset_config_name=wikitext-2-raw-v1' fi + if [[ ${int8} == "true" ]]; then - extra_cmd=$extra_cmd"--int8" + extra_cmd=$extra_cmd" --int8" fi + echo $extra_cmd + + python -u run_clm.py \ + --model_name_or_path ${model_name_or_path} \ + --dataset_name ${TASK_NAME} \ + --do_eval \ + --per_device_eval_batch_size ${batch_size} \ + --output_dir ${tuned_checkpoint} \ + ${mode_cmd} \ + ${extra_cmd} - python run_tune.py \ - --backend pytorch \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --scenario SingleStream \ - ${mode_cmd} \ - ${extra_cmd} } main "$@" diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py new file mode 100644 index 00000000000..17a32f1b57a --- /dev/null +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py @@ -0,0 +1,650 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2020 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset. +Here is the full list of checkpoints on the hub that can be fine-tuned by this script: +https://huggingface.co/models?filter=text-generation +""" +# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments. + +import logging +import math +import os +import sys +from dataclasses import dataclass, field +from itertools import chain +from typing import Optional + +import datasets +from datasets import load_dataset + +import evaluate +import transformers +from transformers import ( + CONFIG_MAPPING, + MODEL_FOR_CAUSAL_LM_MAPPING, + AutoConfig, + AutoModelForCausalLM, + AutoTokenizer, + HfArgumentParser, + Trainer, + TrainingArguments, + default_data_collator, + is_torch_tpu_available, + set_seed, +) +from transformers.testing_utils import CaptureLogger +from transformers.trainer_utils import get_last_checkpoint +from transformers.utils import check_min_version, send_example_telemetry +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.22.0.dev0") + +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") + +logger = logging.getLogger(__name__) + + +MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys()) +MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch. + """ + + model_name_or_path: Optional[str] = field( + default=None, + metadata={ + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) + }, + ) + model_type: Optional[str] = field( + default=None, + metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)}, + ) + config_overrides: Optional[str] = field( + default=None, + metadata={ + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) + }, + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": ( + "Will use the token generated when running `huggingface-cli login` (necessary to use this script " + "with private models)." + ) + }, + ) + tune: bool = field( + default=False, metadata={"help": "tune quantized model with Neural Compressor"} + ) + int8: bool = field( + default=False, metadata={"help": "use int8 model to get accuracy or benchmark"} + ) + benchmark: bool = field( + default=False, metadata={"help": "get benchmark instead of accuracy"} + ) + accuracy_only: bool = field( + default=False, metadata={"help": "get accuracy"} + ) + + def __post_init__(self): + if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): + raise ValueError( + "--config_overrides can't be used in combination with --config_name or --model_name_or_path" + ) + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: Optional[str] = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."}) + validation_file: Optional[str] = field( + default=None, + metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) + }, + ) + + block_size: Optional[int] = field( + default=None, + metadata={ + "help": ( + "Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) + }, + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + validation_split_percentage: Optional[int] = field( + default=5, + metadata={ + "help": "The percentage of the train set used as validation set in case there's no validation split" + }, + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + keep_linebreaks: bool = field( + default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."} + ) + + def __post_init__(self): + if self.dataset_name is None and self.train_file is None and self.validation_file is None: + raise ValueError("Need either a dataset name or a training/validation file.") + else: + if self.train_file is not None: + extension = self.train_file.split(".")[-1] + assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file." + if self.validation_file is not None: + extension = self.validation_file.split(".")[-1] + assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file." + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clm", model_args, data_args) + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + logger.info(f"Training/evaluation parameters {training_args}") + + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) + # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ + # (the dataset will be downloaded automatically from the datasets Hub). + # + # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called + # 'text' is found. You can easily tweak this behavior (see below). + # + # In distributed training, the load_dataset function guarantee that only one local process can concurrently + # download the dataset. + if data_args.dataset_name is not None: + # Downloading and loading a dataset from the hub. + raw_datasets = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + if "validation" not in raw_datasets.keys(): + raw_datasets["validation"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=f"train[:{data_args.validation_split_percentage}%]", + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + raw_datasets["train"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=f"train[{data_args.validation_split_percentage}%:]", + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + else: + data_files = {} + dataset_args = {} + if data_args.train_file is not None: + data_files["train"] = data_args.train_file + if data_args.validation_file is not None: + data_files["validation"] = data_args.validation_file + extension = ( + data_args.train_file.split(".")[-1] + if data_args.train_file is not None + else data_args.validation_file.split(".")[-1] + ) + if extension == "txt": + extension = "text" + dataset_args["keep_linebreaks"] = data_args.keep_linebreaks + raw_datasets = load_dataset( + extension, + data_files=data_files, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + **dataset_args, + ) + # If no validation data is there, validation_split_percentage will be used to divide the dataset. + if "validation" not in raw_datasets.keys(): + raw_datasets["validation"] = load_dataset( + extension, + data_files=data_files, + split=f"train[:{data_args.validation_split_percentage}%]", + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + **dataset_args, + ) + raw_datasets["train"] = load_dataset( + extension, + data_files=data_files, + split=f"train[{data_args.validation_split_percentage}%:]", + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + **dataset_args, + ) + + # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at + # https://huggingface.co/docs/datasets/loading_datasets.html. + + # Load pretrained model and tokenizer + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + # download model & vocab. + + config_kwargs = { + "cache_dir": model_args.cache_dir, + "revision": model_args.model_revision, + "use_auth_token": True if model_args.use_auth_token else None, + } + if model_args.config_name: + config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) + elif model_args.model_name_or_path: + config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) + else: + config = CONFIG_MAPPING[model_args.model_type]() + logger.warning("You are instantiating a new config instance from scratch.") + if model_args.config_overrides is not None: + logger.info(f"Overriding config: {model_args.config_overrides}") + config.update_from_string(model_args.config_overrides) + logger.info(f"New config: {config}") + + tokenizer_kwargs = { + "cache_dir": model_args.cache_dir, + "use_fast": model_args.use_fast_tokenizer, + "revision": model_args.model_revision, + "use_auth_token": True if model_args.use_auth_token else None, + } + if model_args.tokenizer_name: + tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) + elif model_args.model_name_or_path: + tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) + else: + raise ValueError( + "You are instantiating a new tokenizer from scratch. This is not supported by this script." + "You can do it from another script, save it, and load it from here, using --tokenizer_name." + ) + + if model_args.model_name_or_path: + model = AutoModelForCausalLM.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + else: + model = AutoModelForCausalLM.from_config(config) + n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) + logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + + # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch + # on a small vocab and want a smaller embedding size, remove this test. + embedding_size = model.get_input_embeddings().weight.shape[0] + if len(tokenizer) > embedding_size: + model.resize_token_embeddings(len(tokenizer)) + + # Preprocessing the datasets. + # First we tokenize all the texts. + if training_args.do_train: + column_names = raw_datasets["train"].column_names + else: + column_names = raw_datasets["validation"].column_names + text_column_name = "text" if "text" in column_names else column_names[0] + + # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function + tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base") + + def tokenize_function(examples): + with CaptureLogger(tok_logger) as cl: + output = tokenizer(examples[text_column_name]) + # clm input could be much much longer than block_size + if "Token indices sequence length is longer than the" in cl.out: + tok_logger.warning( + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." + ) + return output + + with training_args.main_process_first(desc="dataset map tokenization"): + tokenized_datasets = raw_datasets.map( + tokenize_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on dataset", + ) + + if data_args.block_size is None: + block_size = tokenizer.model_max_length + if block_size > 1024: + logger.warning( + f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). " + "Picking 1024 instead. You can change that default value by passing --block_size xxx." + ) + block_size = 1024 + else: + if data_args.block_size > tokenizer.model_max_length: + logger.warning( + f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model" + f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}." + ) + block_size = min(data_args.block_size, tokenizer.model_max_length) + + # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. + def group_texts(examples): + # Concatenate all texts. + concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + total_length = len(concatenated_examples[list(examples.keys())[0]]) + # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can + # customize this part to your needs. + if total_length >= block_size: + total_length = (total_length // block_size) * block_size + # Split by chunks of max_len. + result = { + k: [t[i : i + block_size] for i in range(0, total_length, block_size)] + for k, t in concatenated_examples.items() + } + result["labels"] = result["input_ids"].copy() + return result + + # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder + # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower + # to preprocess. + # + # To speed up this part, we use multiprocessing. See the documentation of the map method for more information: + # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map + + with training_args.main_process_first(desc="grouping texts together"): + lm_datasets = tokenized_datasets.map( + group_texts, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + desc=f"Grouping texts in chunks of {block_size}", + ) + + if training_args.do_train: + if "train" not in tokenized_datasets: + raise ValueError("--do_train requires a train dataset") + train_dataset = lm_datasets["train"] + if data_args.max_train_samples is not None: + max_train_samples = min(len(train_dataset), data_args.max_train_samples) + train_dataset = train_dataset.select(range(max_train_samples)) + + if training_args.do_eval: + if "validation" not in tokenized_datasets: + raise ValueError("--do_eval requires a validation dataset") + eval_dataset = lm_datasets["validation"] + if data_args.max_eval_samples is not None: + max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) + eval_dataset = eval_dataset.select(range(max_eval_samples)) + + def preprocess_logits_for_metrics(logits, labels): + if isinstance(logits, tuple): + # Depending on the model and config, logits may contain extra tensors, + # like past_key_values, but logits always come first + logits = logits[0] + return logits.argmax(dim=-1) + + metric = evaluate.load("accuracy") + + def compute_metrics(eval_preds): + preds, labels = eval_preds + # preds have the same shape as the labels, after the argmax(-1) has been calculated + # by preprocess_logits_for_metrics but we need to shift the labels + labels = labels[:, 1:].reshape(-1) + preds = preds[:, :-1].reshape(-1) + return metric.compute(predictions=preds, references=labels) + + # Initialize our Trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset if training_args.do_train else None, + eval_dataset=eval_dataset if training_args.do_eval else None, + tokenizer=tokenizer, + # Data collator will default to DataCollatorWithPadding, so we change it. + data_collator=default_data_collator, + compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None, + preprocess_logits_for_metrics=preprocess_logits_for_metrics + if training_args.do_eval and not is_torch_tpu_available() + else None, + ) + + # Tune + if model_args.tune: + def eval_func_for_nc(model_tuned): + trainer.model = model_tuned + eval_output = trainer.evaluate(eval_dataset=eval_dataset) + perplexity = math.exp(eval_output["eval_loss"]) + results = {"perplexity":perplexity,"eval_loss":eval_output["eval_loss"],\ + "eval_samples_per_second":eval_output['eval_samples_per_second']} + clm_task_metrics_keys = ["perplexity","eval_loss"] + for key in clm_task_metrics_keys: + if key in results.keys(): + logger.info("Finally Eval {}:{}".format(key, results[key])) + if key=="eval_loss": + eval_loss = results[key] + break + print("Accuracy: %.5f" % eval_loss) + print('Throughput: %.3f samples/sec' % (results["eval_samples_per_second"])) + print('Latency: %.3f ms' % (1 * 1000 / results["eval_samples_per_second"])) + print('Batch size = %d' % training_args.per_device_eval_batch_size) + + return eval_loss + + from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./conf.yaml") + quantizer.model = common.Model(model) + quantizer.calib_dataloader = trainer.get_eval_dataloader() + quantizer.eval_func = eval_func_for_nc + q_model = quantizer.fit() + q_model.save(training_args.output_dir) + exit(0) + + # Benchmark or accuracy + if model_args.benchmark or model_args.accuracy_only: + if model_args.int8: + from neural_compressor.utils.pytorch import load + new_model = load( + os.path.abspath(os.path.expanduser(training_args.output_dir)), model) + else: + new_model = model + trainer.model = new_model + eval_output = trainer.evaluate(eval_dataset=eval_dataset) + perplexity = math.exp(eval_output["eval_loss"]) + results = {"perplexity":perplexity,"eval_loss":eval_output["eval_loss"],\ + "eval_samples_per_second":eval_output['eval_samples_per_second']} + clm_task_metrics_keys = ["eval_loss"] + for key in clm_task_metrics_keys: + if key in results.keys(): + acc = results[key] + break + print("Accuracy: %.5f" % acc) + print('Throughput: %.3f samples/sec' % (results["eval_samples_per_second"])) + print('Latency: %.3f ms' % (1 * 1000 / results["eval_samples_per_second"])) + print('Batch size = %d' % training_args.per_device_eval_batch_size) + exit(0) + + # Training + if training_args.do_train: + checkpoint = None + if training_args.resume_from_checkpoint is not None: + checkpoint = training_args.resume_from_checkpoint + elif last_checkpoint is not None: + checkpoint = last_checkpoint + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() # Saves the tokenizer too for easy upload + + metrics = train_result.metrics + + max_train_samples = ( + data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) + ) + metrics["train_samples"] = min(max_train_samples, len(train_dataset)) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Evaluation + if training_args.do_eval: + logger.info("*** Evaluate ***") + + metrics = trainer.evaluate() + + max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) + metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) + try: + perplexity = math.exp(metrics["eval_loss"]) + except OverflowError: + perplexity = float("inf") + metrics["perplexity"] = perplexity + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) + + +def _mp_fn(index): + # For xla_spawn (TPUs) + main() + + +if __name__ == "__main__": + main() diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_tuning.sh new file mode 100644 index 00000000000..04b16872a59 --- /dev/null +++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_tuning.sh @@ -0,0 +1,63 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_tuning + +} + +# init params +function init_params { + tuned_checkpoint=saved_results + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + tuned_checkpoint=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# run_tuning +function run_tuning { + extra_cmd='' + batch_size=8 + model_type='bert' + approach='post_training_static_quant' + + if [ "${topology}" = "gpt_j_wikitext" ]; then + TASK_NAME='wikitext' + model_name_or_path=$input_model + extra_cmd='--dataset_config_name=wikitext-2-raw-v1' + fi + + + python -u run_clm.py \ + --model_name_or_path ${model_name_or_path} \ + --dataset_name ${TASK_NAME} \ + --do_eval \ + --per_device_eval_batch_size ${batch_size} \ + --output_dir ${tuned_checkpoint} \ + --tune \ + ${extra_cmd} + +} + +main "$@" diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/pytorch_pruner/eager/run_qa_no_trainer.py b/examples/pytorch/nlp/huggingface_models/question-answering/pruning/pytorch_pruner/eager/run_qa_no_trainer.py index a3966af4845..ddb785e6c5b 100644 --- a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/pytorch_pruner/eager/run_qa_no_trainer.py +++ b/examples/pytorch/nlp/huggingface_models/question-answering/pruning/pytorch_pruner/eager/run_qa_no_trainer.py @@ -57,6 +57,8 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions +from neural_compressor.pruning import Pruning +from neural_compressor.pruner.utils import WeightPruningConfig # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.21.0.dev0") @@ -118,32 +120,32 @@ def parse_args(): help="The configuration name of the dataset to use (via the datasets library).", ) parser.add_argument( - "--train_file", - type=str, - default=None, + "--train_file", + type=str, + default=None, help="A csv or a json file containing the training data." ) parser.add_argument( - "--preprocessing_num_workers", - type=int, default=4, + "--preprocessing_num_workers", + type=int, default=4, help="A csv or a json file containing the training data." ) parser.add_argument( - "--do_predict", - action="store_true", + "--do_predict", + action="store_true", help="To do prediction on the question answering model" ) parser.add_argument( - "--validation_file", - type=str, - default=None, + "--validation_file", + type=str, + default=None, help="A csv or a json file containing the validation data." ) parser.add_argument( - "--test_file", - type=str, - default=None, + "--test_file", + type=str, + default=None, help="A csv or a json file containing the Prediction data." ) parser.add_argument( @@ -163,15 +165,14 @@ def parse_args(): parser.add_argument( "--model_name_or_path", type=str, - help="Path to pretrained model or model identifier from huggingface.co/models.", - required=False, + help="Path to pretrained model or model identifier from huggingface.co/models." ) parser.add_argument( "--teacher_model_name_or_path", type=str, default=None, help="Path to pretrained model or model identifier from huggingface.co/models.", - required=False, + required=False ) parser.add_argument( "--config_name", @@ -199,8 +200,8 @@ def parse_args(): parser.add_argument( "--distill_loss_weight", type=float, - default=1.0, - help="distiller loss weight", + default=0.0, + help="distiller loss weight" ) parser.add_argument( "--per_device_eval_batch_size", @@ -215,15 +216,15 @@ def parse_args(): help="Initial learning rate (after the potential warmup period) to use.", ) parser.add_argument( - "--weight_decay", - type=float, - default=0.0, + "--weight_decay", + type=float, + default=0.0, help="Weight decay to use." ) parser.add_argument( - "--num_train_epochs", - type=int, - default=3, + "--num_train_epochs", + type=int, + default=3, help="Total number of training epochs to perform." ) parser.add_argument( @@ -245,29 +246,28 @@ def parse_args(): help="The scheduler type to use.", choices=["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"], ) - parser.add_argument( - "--warm_epochs", - type=int, - default=0, + "--warm_epochs", + type=int, + default=0, help="Number of epochs the network not be purned" ) parser.add_argument( - "--num_warmup_steps", - type=int, - default=0, + "--num_warmup_steps", + type=int, + default=0, help="Number of steps for the warmup in the lr scheduler." ) parser.add_argument( - "--output_dir", - type=str, - default=None, + "--output_dir", + type=str, + default=None, help="Where to store the final model." ) parser.add_argument( - "--seed", - type=int, - default=None, + "--seed", + type=int, + default=None, help="A seed for reproducible training." ) parser.add_argument( @@ -341,33 +341,18 @@ def parse_args(): choices=MODEL_TYPES, ) parser.add_argument( - "--cooldown_epochs", - type=int, default=0, - help="Cooling epochs after pruning." - ) - parser.add_argument( - "--do_prune", action="store_true", - help="Whether or not to prune the model" - ) - parser.add_argument( - "--pruning_config", - type=str, - help="pruning_config", - ) - - parser.add_argument( - "--push_to_hub", - action="store_true", + "--push_to_hub", + action="store_true", help="Whether or not to push the model to the Hub." ) parser.add_argument( - "--hub_model_id", - type=str, + "--hub_model_id", + type=str, help="The name of the repository to keep in sync with the local `output_dir`." ) parser.add_argument( - "--hub_token", - type=str, + "--hub_token", + type=str, help="The token to use to push to the Model Hub." ) parser.add_argument( @@ -382,13 +367,6 @@ def parse_args(): default=None, help="If the training should continue from a checkpoint folder.", ) - parser.add_argument( - "--distiller", - type=str, - default=None, - help="teacher model path", - ) - parser.add_argument( "--with_tracking", action="store_true", @@ -405,6 +383,35 @@ def parse_args(): ), ) + parser.add_argument( + "--cooldown_epochs", + type=int, default=0, + help="Cooling epochs after pruning." + ) + parser.add_argument( + "--do_prune", action="store_true", + help="Whether or not to prune the model" + ) + # parser.add_argument( + # "--keep_conf", action="store_true", + # help="Whether or not to keep the prune config infos" + # ) + parser.add_argument( + "--pruning_pattern", + type=str, default="1x1", + help="pruning pattern type, we support NxM and N:M." + ) + parser.add_argument( + "--target_sparsity", + type=float, default=0.8, + help="Target sparsity of the model." + ) + parser.add_argument( + "--pruning_frequency", + type=int, default=-1, + help="Sparse step frequency for iterative pruning, default to a quarter of pruning steps." + ) + args = parser.parse_args() # Sanity checks @@ -435,7 +442,7 @@ def parse_args(): def main(): args = parse_args() - # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. # send_example_telemetry("run_qa_no_trainer", args) @@ -528,10 +535,13 @@ def main(): "You are instantiating a new tokenizer from scratch. This is not supported by this script." "You can do it from another script, save it, and load it from here, using --tokenizer_name." ) - - if args.teacher_model_name_or_path != None: + + if args.distill_loss_weight > 0: + teacher_path = args.teacher_model_name_or_path + if teacher_path is None: + teacher_path = args.model_name_or_path teacher_model = AutoModelForQuestionAnswering.from_pretrained( - args.teacher_model_name_or_path, + teacher_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, ) @@ -815,7 +825,6 @@ def post_processing_function(examples, features, predictions, stage="eval"): def create_and_fill_np_array(start_or_end_logits, dataset, max_len): """ Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor - Args: start_or_end_logits(:obj:`tensor`): This is the output predictions of the model. We can only enter either start or end logits. @@ -847,6 +856,7 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): # Optimizer # Split weights in two groups, one with weight decay and the other not. no_decay = ["bias", "LayerNorm.weight"] + no_decay_outputs = ["bias", "LayerNorm.weight", "qa_outputs"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], @@ -876,10 +886,11 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): num_training_steps=args.max_train_steps, ) - if args.teacher_model_name_or_path != None: + if args.distill_loss_weight > 0: teacher_model, model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare( teacher_model, model, optimizer, train_dataloader, eval_dataloader, lr_scheduler ) + teacher_model.eval() else: # Prepare everything with our `accelerator`. model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare( @@ -949,36 +960,36 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): starting_epoch = resume_step // len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader) - params = [(n, p) for (n, p) in model.named_parameters() if - "bias" not in n and "LayerNorm" not in n and "embeddings" not in n \ - and "layer.3.attention.output.dense.weight" not in n and "qa_outputs" not in n] - - params_keys = [n for (n, p) in params] - for key in params_keys: - print(key) - # Pruning preparation + pruning_configs=[ + { + "pruning_type": "snip_momentum", + "pruning_scope": "global", + "sparsity_decay_type": "exp" + } + ] + config = WeightPruningConfig( + pruning_configs, + target_sparsity=args.target_sparsity, + excluded_op_names=["qa_outputs", "pooler", ".*embeddings*"], + pruning_op_types=["Linear"], + max_sparsity_ratio_per_op=0.98, + pruning_scope="global", + pattern=args.pruning_pattern, + pruning_frequency=1000 + ) + pruner = Pruning(config) num_iterations = len(train_dataset) / total_batch_size - total_iterations = num_iterations * (args.num_train_epochs \ - - args.warm_epochs - args.cooldown_epochs) - args.num_warmup_steps - completed_pruned_cnt = 0 - total_cnt = 0 - for n, param in params: - total_cnt += param.numel() - print(f"The total param quantity is {total_cnt}") - - if args.teacher_model_name_or_path != None: - teacher_model.eval() - - from pytorch_pruner.pruning import Pruning - pruner = Pruning(args.pruning_config) + total_iterations = num_iterations * (args.num_train_epochs - args.warm_epochs - args.cooldown_epochs) \ + - args.num_warmup_steps if args.do_prune: - pruner.update_items_for_all_pruners( \ - start_step = int(args.warm_epochs*num_iterations+args.num_warmup_steps), \ - end_step = int(total_iterations))##iterative + start = int(args.warm_epochs * num_iterations+args.num_warmup_steps) + end = int(total_iterations) + frequency = int((end - start + 1) / 4) if (args.pruning_frequency == -1) else args.pruning_frequency + pruner.update_config(start_step=start, end_step=end, pruning_frequency=frequency)##iterative else: total_step = num_iterations * args.num_train_epochs + 1 - pruner.update_items_for_all_pruners(start_step=total_step, end_step=total_step) + pruner.update_config(start_step=total_step, end_step=total_step) pruner.model = model pruner.on_train_begin() @@ -994,14 +1005,14 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): # We keep track of the loss at each epoch if args.with_tracking: total_loss += loss.detach().float() - if args.teacher_model_name_or_path != None: + if args.distill_loss_weight > 0: distill_loss_weight = args.distill_loss_weight with torch.no_grad(): teacher_outputs = teacher_model(**batch) loss = (distill_loss_weight) / 2 * get_loss_one_logit(outputs['start_logits'], - teacher_outputs['start_logits']) \ + teacher_outputs['start_logits']) \ + (distill_loss_weight) / 2 * get_loss_one_logit(outputs['end_logits'], - teacher_outputs['end_logits']) + teacher_outputs['end_logits']) loss = loss / args.gradient_accumulation_steps accelerator.backward(loss) @@ -1160,3 +1171,4 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): if __name__ == "__main__": main() + diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md index ac449cdb781..9eb4fba2708 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md @@ -4,10 +4,6 @@ Step-by-Step This document is used to list steps of reproducing PyTorch BERT tuning zoo result. Original BERT documents please refer to [BERT README](../../../../common/README.md) and [README](../../../../common/examples/text-classification/README.md). -> **Note** -> -> Dynamic Quantization is the recommended method for huggingface models. - # Prerequisite ## 1. Installation @@ -57,7 +53,7 @@ sh run_tuning.sh --topology=topology_name --dataset_location=/path/to/glue/data/ or ```bash -python -u ./run_glue_tune.py \ +python -u ./run_glue.py \ --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \ --task_name sst2 \ --do_eval \ @@ -73,7 +69,7 @@ python -u ./run_glue_tune.py \ ### 2. To get the benchmark of tuned model, includes batch_size and throughput: ```bash -python -u ./run_glue_tune.py \ +python -u ./run_glue.py \ --model_name_or_path ./int8_model_dir \ --task_name sst2 \ --do_eval \ @@ -158,7 +154,7 @@ Here we set accuracy target as tolerating 0.01 relative accuracy loss of baselin ### Code Prepare -We just need update run_squad_tune.py and run_glue_tune.py like below +We just need update run_glue.py like below ```python if model_args.tune: @@ -195,8 +191,13 @@ if model_args.tune: ### Using Shapley MSE as Objective Shapley values originate from cooperative game theory that come with desirable properties, and now are widely used as a tool to fulfill Explainable AI. The run_glue_tune_with_shap.py is designed to help build a bert-based model using Shapley MSE as an objective. Here, the Shapley MSE means that we can get one result from FP32 and several results from INT8 model, so we use MSE to calculate how different between the two shapley values. It can reflect the explainability of INT8 model. -> **Note** : run_glue_tune_with_shap.py is the example of "SST2" task. If you want to execute other glue task, you may take some slight change under "ShapleyMSE" class. +> **Note** : run_glue_with_shap.py is the example of "SST2" task. If you want to execute other glue task, you may take some slight change under "ShapleyMSE" class. + +# Appendix +## Export to ONNX +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt index 7ad9dc04d0c..688b5217718 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt @@ -6,4 +6,6 @@ torch >= 1.3 transformers>=4.10.0 shap scipy -sacremoses \ No newline at end of file +sacremoses +onnx +onnxruntime \ No newline at end of file diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_benchmark.sh index b5a32182174..d8137c5ffb7 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_benchmark.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_benchmark.sh @@ -110,7 +110,7 @@ function run_benchmark { fi echo $extra_cmd - python -u run_glue_tune.py \ + python -u run_glue.py \ --task_name ${TASK_NAME} \ --do_eval \ --max_seq_length ${MAX_SEQ_LENGTH} \ diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue.py similarity index 95% rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py rename to examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue.py index 13812b30b4e..763d2e4c855 100755 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue.py @@ -144,18 +144,25 @@ class ModelArguments: tune: bool = field( default=False, metadata={ - "help": "tune quantized model with Intel Neural Compressor)." - }, + "help": "tune quantized model with Intel Neural Compressor)."}, ) benchmark: bool = field( default=False, - metadata={"help": "run benchmark."}) + metadata={"help": "run benchmark."}, + ) int8: bool = field( default=False, - metadata={"help":"run benchmark."}) + metadata={"help":"initialize int8 model."}, + ) accuracy_only: bool = field( default=False, - metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."}) + metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."}, + ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) + + def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. @@ -432,13 +439,31 @@ def eval_func_for_nc(model_tuned): acc = result[key] break return acc - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - tuning_criterion = TuningCriterion(max_trials=600) - conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", tuning_criterion=tuning_criterion) - q_model = fit(model, conf=conf, eval_func=eval_func_for_nc) + from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./conf.yaml") + quantizer.model = common.Model(model) + quantizer.eval_func = eval_func_for_nc + q_model = quantizer.fit() from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) + + if model_args.onnx: + eval_dataloader = trainer.get_eval_dataloader() + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-model.onnx', int8_onnx_config) exit(0) if model_args.accuracy_only: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune_with_shap.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_with_shap.py similarity index 100% rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune_with_shap.py rename to examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_with_shap.py diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh index e01add178fb..a04e8bb2ed6 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh @@ -76,7 +76,7 @@ function run_tuning { sed -i "/: bert/s|name:.*|name: $model_type|g" conf.yaml sed -i "/approach:/s|approach:.*|approach: $approach|g" conf.yaml - python -u ./run_glue_tune.py \ + python -u ./run_glue.py \ --model_name_or_path ${model_name_or_path} \ --task_name ${TASK_NAME} \ --do_eval \ @@ -86,6 +86,7 @@ function run_tuning { --no_cuda \ --output_dir ${tuned_checkpoint} \ --tune \ + --onnx \ ${extra_cmd} } diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md index 3c30897b618..fd46c343e63 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md @@ -106,43 +106,12 @@ We also upstreamed several int8 models into HuggingFace [model hub](https://hugg ## This is a tutorial of how to enable NLP model with Intel® Neural Compressor. -### Intel® Neural Compressor supports two usages: - -1. User specifies fp32 'model', calibration dataset 'q_dataloader', evaluation dataset "eval_dataloader" and metrics in tuning.metrics field of model-specific yaml config file. -2. User specifies fp32 'model', calibration dataset 'q_dataloader' and a custom "eval_func" which encapsulates the evaluation dataset and metrics by itself. - -As MRPC's metrics are 'f1', 'acc_and_f1', mcc', 'spearmanr', 'acc', so customer should provide evaluation function 'eval_func', it's suitable for the second use case. - -### Write Yaml config file - -In examples directory, there is conf.yaml. We could remove most of the items and only keep mandatory item for tuning. - -```yaml -model: - name: bert - framework: pytorch_fx - -device: cpu - -quantization: - approach: post_training_dynamic_quant - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 300 - random_seed: 9527 -``` - -Here we set accuracy target as tolerating 0.01 relative accuracy loss of baseline. The default tuning strategy is basic strategy. The timeout 0 means early stop as well as a tuning config meet accuracy target. - -> **Note** : neural_compressor does NOT support "mse" tuning strategy for pytorch framework +### Intel® Neural Compressor supports usage: +* User specifies fp32 'model', calibration dataset 'q_dataloader' and a custom "eval_func" which encapsulates the evaluation dataset and metrics by itself. ### Code Prepare -We just need update run_squad_tune.py and run_glue.py like below +We just need update run_glue.py like below ```python trainer = Trainer( @@ -170,20 +139,10 @@ def take_eval_steps(model, trainer, metric_name, save_metrics=False): def eval_func(model): return take_eval_steps(model, trainer, metric_name) -from neural_compressor.experimental import Quantization, common -if ( - not training_args.dataloader_drop_last - and eval_dataset.shape[0] % training_args.per_device_eval_batch_size != 0 -): - raise ValueError( - "The number of samples of the dataset is not a multiple of the batch size." - "Use --dataloader_drop_last to overcome." - ) -calib_dataloader = eval_dataloader -quantizer = Quantization('conf.yaml') -quantizer.eval_func = eval_func -quantizer.calib_dataloader = calib_dataloader -quantizer.model = common.Model(model) -model = quantizer.fit() -model.save(training_args.output_dir) +from neural_compressor.quantization import fit +from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion +tuning_criterion = TuningCriterion(max_trials=600) +conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", + tuning_criterion=tuning_criterion) +q_model = fit(model, conf=conf, eval_func=eval_func) ``` diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/conf.yaml b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/conf.yaml index f14a50371bb..414e68c191f 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/conf.yaml +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/conf.yaml @@ -28,4 +28,4 @@ tuning: exit_policy: timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. max_trials: 600 - random_seed: 9527 # optional. random seed for deterministic tuning. + random_seed: 9527 # optional. random seed for deterministic tuning. \ No newline at end of file diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py index 8ea43ea4a41..199a4d25da3 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py @@ -498,15 +498,13 @@ def eval_func(model): # optimize and quantize with Neural Compressor if model_args.tune: - from neural_compressor.experimental import Quantization, common - calib_dataloader = eval_dataloader - quantizer = Quantization('conf.yaml') - quantizer.eval_func = eval_func - quantizer.calib_dataloader = calib_dataloader - quantizer.model = common.Model(model) - model = quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=600) + conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", tuning_criterion=tuning_criterion) + q_model = fit(model, conf=conf, eval_func=eval_func) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream - save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) + save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) return if model_args.benchmark or model_args.accuracy_only: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md index 881332a1314..423265dbfde 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md @@ -106,43 +106,12 @@ We also upstreamed several int8 models into HuggingFace [model hub](https://hugg ## This is a tutorial of how to enable NLP model with Intel® Neural Compressor. -### Intel® Neural Compressor supports two usages: - -1. User specifies fp32 'model', calibration dataset 'q_dataloader', evaluation dataset "eval_dataloader" and metrics in tuning.metrics field of model-specific yaml config file. -2. User specifies fp32 'model', calibration dataset 'q_dataloader' and a custom "eval_func" which encapsulates the evaluation dataset and metrics by itself. - -As MRPC's metrics are 'f1', 'acc_and_f1', mcc', 'spearmanr', 'acc', so customer should provide evaluation function 'eval_func', it's suitable for the second use case. - -### Write Yaml config file - -In examples directory, there is conf.yaml. We could remove most of the items and only keep mandatory item for tuning. - -```yaml -model: - name: bert - framework: pytorch_fx - -device: cpu - -quantization: - approach: post_training_static_quant - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 300 - random_seed: 9527 -``` - -Here we set accuracy target as tolerating 0.01 relative accuracy loss of baseline. The default tuning strategy is basic strategy. The timeout 0 means early stop as well as a tuning config meet accuracy target. - -> **Note** : neural_compressor does NOT support "mse" tuning strategy for pytorch framework +### Intel® Neural Compressor supports usage: +* User specifies fp32 'model', calibration dataset 'q_dataloader' and a custom "eval_func" which encapsulates the evaluation dataset and metrics by itself. ### Code Prepare -We just need update run_squad_tune.py and run_glue.py like below +We just need update run_glue.py like below ```python trainer = Trainer( @@ -170,20 +139,19 @@ def take_eval_steps(model, trainer, metric_name, save_metrics=False): def eval_func(model): return take_eval_steps(model, trainer, metric_name) -from neural_compressor.experimental import Quantization, common -if ( - not training_args.dataloader_drop_last - and eval_dataset.shape[0] % training_args.per_device_eval_batch_size != 0 -): - raise ValueError( - "The number of samples of the dataset is not a multiple of the batch size." - "Use --dataloader_drop_last to overcome." - ) -calib_dataloader = eval_dataloader -quantizer = Quantization('conf.yaml') -quantizer.eval_func = eval_func -quantizer.calib_dataloader = calib_dataloader -quantizer.model = common.Model(model) -model = quantizer.fit() -model.save(training_args.output_dir) +from neural_compressor.quantization import fit +from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion +tuning_criterion = TuningCriterion(max_trials=600) +conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) +q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) +from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream +save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) ``` + +# Appendix + +## Export to ONNX + +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. + +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/conf.yaml b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/conf.yaml index 2e0fa792eb4..91c12c4567e 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/conf.yaml +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/conf.yaml @@ -28,4 +28,4 @@ tuning: exit_policy: timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. max_trials: 600 - random_seed: 9527 # optional. random seed for deterministic tuning. + random_seed: 9527 # optional. random seed for deterministic tuning. \ No newline at end of file diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt index fbbce5e4433..01afab8e2ae 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt @@ -4,6 +4,8 @@ protobuf scipy scikit-learn Keras-Preprocessing +onnx +onnxruntime transformers >= 4.16.0 --find-links https://download.pytorch.org/whl/torch_stable.html torch >= 1.8.0+cpu diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py index 717ae91d886..ff1807358d4 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py @@ -195,6 +195,9 @@ class ModelArguments: accuracy_only: bool = field( default=False, metadata={"help": "get accuracy"} ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) def main(): @@ -501,10 +504,47 @@ def eval_func(model): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion tuning_criterion = TuningCriterion(max_trials=600) - conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) - model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) + conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) + q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream - save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) + save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) + + if model_args.onnx: + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + fp32_onnx_config = Torch2ONNXConfig( + dtype="fp32", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('fp32-model.onnx', fp32_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) return if model_args.benchmark or model_args.accuracy_only: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh index a3f5c6934c7..19712872786 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh @@ -92,6 +92,7 @@ function run_tuning { --no_cuda \ --output_dir ${tuned_checkpoint} \ --tune \ + --onnx \ --overwrite_output_dir \ ${extra_cmd} } diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md index e1c802c7ff2..909408a2fe8 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md @@ -38,9 +38,7 @@ PyTorch 1.8 is needed for pytorch_fx backend and huggingface/transformers. ### 1. Enable bert-base-cased/uncased example with the auto quantization aware training strategy of Neural Compressor. The changes made are as follows: - 1. add conf_qat.yaml: - This file contains the configuration of quantization. - 2. edit run_glue_tune.py: + * edit run_glue.py: - For quantization, We used neural_compressor in it. - For training, we enbaled early stop strategy. @@ -50,7 +48,7 @@ PyTorch 1.8 is needed for pytorch_fx backend and huggingface/transformers. or - python run_glue_tune.py \ + python run_glue.py \ --model_name_or_path ${input_model} \ --task_name ${task_name} \ --do_train \ @@ -77,7 +75,7 @@ or or - python run_glue_tune.py \ + python run_glue.py \ --model_name_or_path ${input_model}/${tuned_checkpoint} \ --task_name ${task_name} \ --do_train \ @@ -117,3 +115,11 @@ model = OptimizedModel.from_pretrained( ``` We also upstreamed several int8 models into HuggingFace [model hub](https://huggingface.co/models?other=Intel%C2%AE%20Neural%20Compressor) for users to ramp up. + +# Appendix + +## Export to ONNX + +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. + +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/conf_qat.yaml b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/conf_qat.yaml index d0183b5f889..63d65fe513b 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/conf_qat.yaml +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/conf_qat.yaml @@ -13,4 +13,4 @@ tuning: exit_policy: timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. max_trials: 600 - random_seed: 9527 # optional. random seed for deterministic tuning. + random_seed: 9527 # optional. random seed for deterministic tuning. \ No newline at end of file diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt index 5386769210e..2bb6fc03b2d 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt @@ -4,5 +4,7 @@ datasets == 1.18.0 sentencepiece != 0.1.92 protobuf scipy +onnx +onnxruntime --find-links https://download.pytorch.org/whl/torch_stable.html torch >= 1.8.0+cpu diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh index b516d98d2a2..6bfe5f1ef70 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh @@ -64,7 +64,7 @@ function run_benchmark { extra_cmd=$extra_cmd" --int8" fi - python run_glue_tune.py \ + python run_glue.py \ --task_name ${task_name} \ --do_train \ --do_eval \ diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py index 257a5938344..5755401148a 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py @@ -35,6 +35,7 @@ DataCollatorWithPadding, EvalPrediction, HfArgumentParser, + IntervalStrategy, PretrainedConfig, Trainer, TrainingArguments, @@ -184,6 +185,18 @@ class ModelArguments: "with private models)." }, ) + tune: bool = field( + default=False, metadata={"help": "tune quantized model with Neural Compressor"} + ) + int8: bool = field( + default=False, metadata={"help": "use int8 model to get accuracy or benchmark"} + ) + benchmark: bool = field( + default=False, metadata={"help": "get benchmark instead of accuracy"} + ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) def main(): @@ -327,14 +340,25 @@ def main(): revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) - model = AutoModelForSequenceClassification.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) + if model_args.int8: + from neural_compressor.utils.load_huggingface import OptimizedModel + model = OptimizedModel.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + else: + model = AutoModelForSequenceClassification.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) # Preprocessing the raw_datasets if data_args.task_name is not None: @@ -370,9 +394,9 @@ def main(): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( - "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." - "\nIgnoring the model labels as a result.", + f"Your model seems to have been trained with labels, but they don't match the dataset: " + f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}.\n" + f"Ignoring the model labels as a result." ) elif data_args.task_name is None and not is_regression: label_to_id = {v: i for i, v in enumerate(label_list)} @@ -473,6 +497,87 @@ def compute_metrics(p: EvalPrediction): data_collator=data_collator, ) + early_stopping_patience = 2 + early_stopping_threshold = 0.001 # optional + trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \ + early_stopping_threshold)) + + eval_dataloader = trainer.get_eval_dataloader() + batch_size = eval_dataloader.batch_size + + def eval_func(model): + trainer.model = model + result = trainer.evaluate(eval_dataset=eval_dataset) + accu = result['eval_f1'] + print('Accuracy: %.3f ' % (accu), flush=True) + return accu + + def benchmark(model): + print(model) + trainer.model = model + result = trainer.evaluate(eval_dataset=eval_dataset) + throughput = result['eval_samples_per_second'] + print('Batch size = %d' % batch_size) + print('Latency: %.3f ms' % (1000 / throughput)) + print('Throughput: %.3f samples/sec' % result['eval_samples_per_second']) + + # optimize and quantize with Neural Compressor + if model_args.tune: + from neural_compressor.training import prepare_compression + from neural_compressor.config import QuantizationAwareTrainingConfig + conf = QuantizationAwareTrainingConfig() + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + trainer.train() + compression_manager.callbacks.on_train_end() + + from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream + save_for_huggingface_upstream(compression_manager.model, tokenizer, training_args.output_dir) + + if model_args.onnx: + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + fp32_onnx_config = Torch2ONNXConfig( + dtype="fp32", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + compression_manager.export('fp32-model.onnx', fp32_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + compression_manager.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) + return + + if model_args.benchmark: + benchmark(model) + else: + eval_func(model) + return + # Training if training_args.do_train: checkpoint = None diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py deleted file mode 100644 index f5bc771e712..00000000000 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py +++ /dev/null @@ -1,631 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2020 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" -# You can also adapt this script on your own text classification task. Pointers for this are left as comments. - -import logging -import os -import random -import sys -from dataclasses import dataclass, field -from typing import Optional - -import datasets -import numpy as np -from datasets import load_dataset, load_metric - -import transformers -from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, - AutoTokenizer, - DataCollatorWithPadding, - EvalPrediction, - HfArgumentParser, - IntervalStrategy, - PretrainedConfig, - Trainer, - TrainingArguments, - default_data_collator, - set_seed, -) -from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version -from transformers.utils.versions import require_version - - -# Will error if the minimal version of Transformers is not installed. Remove at your own risks. -check_min_version("4.10.0.dev0") - -require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") - -task_to_keys = { - "cola": ("sentence", None), - "mnli": ("premise", "hypothesis"), - "mrpc": ("sentence1", "sentence2"), - "qnli": ("question", "sentence"), - "qqp": ("question1", "question2"), - "rte": ("sentence1", "sentence2"), - "sst2": ("sentence", None), - "stsb": ("sentence1", "sentence2"), - "wnli": ("sentence1", "sentence2"), -} - -logger = logging.getLogger(__name__) - - -@dataclass -class DataTrainingArguments: - """ - Arguments pertaining to what data we are going to input our model for training and eval. - - Using `HfArgumentParser` we can turn this class - into argparse arguments to be able to specify them on - the command line. - """ - - task_name: Optional[str] = field( - default=None, - metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())}, - ) - dataset_name: Optional[str] = field( - default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} - ) - dataset_config_name: Optional[str] = field( - default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} - ) - max_seq_length: int = field( - default=128, - metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." - }, - ) - overwrite_cache: bool = field( - default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."} - ) - pad_to_max_length: bool = field( - default=True, - metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." - }, - ) - max_train_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." - }, - ) - max_eval_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." - }, - ) - max_predict_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." - }, - ) - train_file: Optional[str] = field( - default=None, metadata={"help": "A csv or a json file containing the training data."} - ) - validation_file: Optional[str] = field( - default=None, metadata={"help": "A csv or a json file containing the validation data."} - ) - test_file: Optional[str] = field(default=None, metadata={"help": "A csv or a json file containing the test data."}) - - def __post_init__(self): - if self.task_name is not None: - self.task_name = self.task_name.lower() - if self.task_name not in task_to_keys.keys(): - raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) - elif self.dataset_name is not None: - pass - elif self.train_file is None or self.validation_file is None: - raise ValueError("Need either a GLUE task, a training/validation file or a dataset name.") - else: - train_extension = self.train_file.split(".")[-1] - assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." - validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." - - -@dataclass -class ModelArguments: - """ - Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. - """ - - model_name_or_path: str = field( - metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} - ) - config_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} - ) - tokenizer_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} - ) - cache_dir: Optional[str] = field( - default=None, - metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, - ) - use_fast_tokenizer: bool = field( - default=True, - metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, - ) - model_revision: str = field( - default="main", - metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, - ) - use_auth_token: bool = field( - default=False, - metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." - }, - ) - tune: bool = field( - default=False, metadata={"help": "tune quantized model with Neural Compressor"} - ) - int8: bool = field( - default=False, metadata={"help": "use int8 model to get accuracy or benchmark"} - ) - benchmark: bool = field( - default=False, metadata={"help": "get benchmark instead of accuracy"} - ) - - -def main(): - # See all possible arguments in src/transformers/training_args.py - # or by passing the --help flag to this script. - # We now keep distinct sets of args, for a cleaner separation of concerns. - - parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) - if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - # If we pass only one argument to the script and it's the path to a json file, - # let's parse it to get our arguments. - model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) - else: - model_args, data_args, training_args = parser.parse_args_into_dataclasses() - - # Setup logging - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - handlers=[logging.StreamHandler(sys.stdout)], - ) - - log_level = training_args.get_process_log_level() - logger.setLevel(log_level) - datasets.utils.logging.set_verbosity(log_level) - transformers.utils.logging.set_verbosity(log_level) - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() - - # Log on each process the small summary: - logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" - + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" - ) - logger.info(f"Training/evaluation parameters {training_args}") - - # Detecting last checkpoint. - last_checkpoint = None - if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: - last_checkpoint = get_last_checkpoint(training_args.output_dir) - if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: - raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. " - "Use --overwrite_output_dir to overcome." - ) - elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: - logger.info( - f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " - "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." - ) - - # Set seed before initializing model. - set_seed(training_args.seed) - - # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) - # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). - # - # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the - # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named - # label if at least two columns are provided. - # - # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this - # single column. You can easily tweak this behavior (see below) - # - # In distributed training, the load_dataset function guarantee that only one local process can concurrently - # download the dataset. - if data_args.task_name is not None: - # Downloading and loading a dataset from the hub. - raw_datasets = load_dataset("glue", data_args.task_name, cache_dir=model_args.cache_dir) - elif data_args.dataset_name is not None: - # Downloading and loading a dataset from the hub. - raw_datasets = load_dataset( - data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir - ) - else: - # Loading a dataset from your local files. - # CSV/JSON training and evaluation files are needed. - data_files = {"train": data_args.train_file, "validation": data_args.validation_file} - - # Get the test dataset: you can provide your own CSV/JSON test file (see below) - # when you use `do_predict` without specifying a GLUE benchmark task. - if training_args.do_predict: - if data_args.test_file is not None: - train_extension = data_args.train_file.split(".")[-1] - test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." - data_files["test"] = data_args.test_file - else: - raise ValueError("Need either a GLUE task or a test file for `do_predict`.") - - for key in data_files.keys(): - logger.info(f"load a local file for {key}: {data_files[key]}") - - if data_args.train_file.endswith(".csv"): - # Loading a dataset from local csv files - raw_datasets = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir) - else: - # Loading a dataset from local json files - raw_datasets = load_dataset("json", data_files=data_files, cache_dir=model_args.cache_dir) - # See more about loading any type of standard or custom dataset at - # https://huggingface.co/docs/datasets/loading_datasets.html. - - # Labels - if data_args.task_name is not None: - is_regression = data_args.task_name == "stsb" - if not is_regression: - label_list = raw_datasets["train"].features["label"].names - num_labels = len(label_list) - else: - num_labels = 1 - else: - # Trying to have good defaults here, don't hesitate to tweak to your needs. - is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"] - if is_regression: - num_labels = 1 - else: - # A useful fast method: - # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique - label_list = raw_datasets["train"].unique("label") - label_list.sort() # Let's sort it for determinism - num_labels = len(label_list) - - # Load pretrained model and tokenizer - # - # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently - # download model & vocab. - config = AutoConfig.from_pretrained( - model_args.config_name if model_args.config_name else model_args.model_name_or_path, - num_labels=num_labels, - finetuning_task=data_args.task_name, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - tokenizer = AutoTokenizer.from_pretrained( - model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - use_fast=model_args.use_fast_tokenizer, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - if model_args.int8: - from neural_compressor.utils.load_huggingface import OptimizedModel - model = OptimizedModel.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - else: - model = AutoModelForSequenceClassification.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - - # Preprocessing the raw_datasets - if data_args.task_name is not None: - sentence1_key, sentence2_key = task_to_keys[data_args.task_name] - else: - # Again, we try to have some nice defaults but don't hesitate to tweak to your use case. - non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"] - if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names: - sentence1_key, sentence2_key = "sentence1", "sentence2" - else: - if len(non_label_column_names) >= 2: - sentence1_key, sentence2_key = non_label_column_names[:2] - else: - sentence1_key, sentence2_key = non_label_column_names[0], None - - # Padding strategy - if data_args.pad_to_max_length: - padding = "max_length" - else: - # We will pad later, dynamically at batch creation, to the max sequence length in each batch - padding = False - - # Some models have set the order of the labels to use, so let's make sure we do use it. - label_to_id = None - if ( - model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id - and data_args.task_name is not None - and not is_regression - ): - # Some have all caps in their config, some don't. - label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): - label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} - else: - logger.warning( - f"Your model seems to have been trained with labels, but they don't match the dataset: " - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}.\n" - f"Ignoring the model labels as a result." - ) - elif data_args.task_name is None and not is_regression: - label_to_id = {v: i for i, v in enumerate(label_list)} - - if label_to_id is not None: - model.config.label2id = label_to_id - model.config.id2label = {id: label for label, id in config.label2id.items()} - - if data_args.max_seq_length > tokenizer.model_max_length: - logger.warning( - f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" - f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." - ) - max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) - - def preprocess_function(examples): - # Tokenize the texts - args = ( - (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key]) - ) - result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True) - - # Map labels to IDs (not necessary for GLUE tasks) - if label_to_id is not None and "label" in examples: - result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]] - return result - - with training_args.main_process_first(desc="dataset map pre-processing"): - raw_datasets = raw_datasets.map( - preprocess_function, - batched=True, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on dataset", - ) - if training_args.do_train: - if "train" not in raw_datasets: - raise ValueError("--do_train requires a train dataset") - train_dataset = raw_datasets["train"] - if data_args.max_train_samples is not None: - train_dataset = train_dataset.select(range(data_args.max_train_samples)) - - if training_args.do_eval: - if "validation" not in raw_datasets and "validation_matched" not in raw_datasets: - raise ValueError("--do_eval requires a validation dataset") - eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] - if data_args.max_eval_samples is not None: - eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) - - if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None: - if "test" not in raw_datasets and "test_matched" not in raw_datasets: - raise ValueError("--do_predict requires a test dataset") - predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"] - if data_args.max_predict_samples is not None: - predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) - - # Log a few random samples from the training set: - if training_args.do_train: - for index in random.sample(range(len(train_dataset)), 3): - logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") - - # Get the metric function - if data_args.task_name is not None: - metric = load_metric("glue", data_args.task_name) - else: - metric = load_metric("accuracy") - - # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a - # predictions and label_ids field) and has to return a dictionary string to float. - def compute_metrics(p: EvalPrediction): - preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions - preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1) - if data_args.task_name is not None: - result = metric.compute(predictions=preds, references=p.label_ids) - if len(result) > 1: - result["combined_score"] = np.mean(list(result.values())).item() - return result - elif is_regression: - return {"mse": ((preds - p.label_ids) ** 2).mean().item()} - else: - return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()} - - # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding. - if data_args.pad_to_max_length: - data_collator = default_data_collator - elif training_args.fp16: - data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) - else: - data_collator = None - - # Initialize our Trainer - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_dataset if training_args.do_train else None, - eval_dataset=eval_dataset if training_args.do_eval else None, - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=data_collator, - ) - - early_stopping_patience = 2 - early_stopping_threshold = 0.001 # optional - trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \ - early_stopping_threshold)) - - eval_dataloader = trainer.get_eval_dataloader() - batch_size = eval_dataloader.batch_size - - def eval_func(model): - trainer.model = model - result = trainer.evaluate(eval_dataset=eval_dataset) - accu = result['eval_f1'] - print('Accuracy: %.3f ' % (accu), flush=True) - return accu - - def benchmark(model): - print(model) - trainer.model = model - result = trainer.evaluate(eval_dataset=eval_dataset) - throughput = result['eval_samples_per_second'] - print('Batch size = %d' % batch_size) - print('Latency: %.3f ms' % (1000 / throughput)) - print('Throughput: %.3f samples/sec' % result['eval_samples_per_second']) - - # optimize and quantize with Neural Compressor - if model_args.tune: - from neural_compressor.training import prepare_compression - from neural_compressor.config import QuantizationAwareTrainingConfig - conf = QuantizationAwareTrainingConfig(backend="pytorch_fx") - compression_manager = prepare_compression(model, conf) - compression_manager.callbacks.on_train_begin() - model = compression_manager.model - trainer.model_wrapped = model - trainer.model = model - trainer.train() - compression_manager.callbacks.on_train_end() - - from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream - save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) - return - - if model_args.benchmark: - benchmark(model) - else: - eval_func(model) - return - - # Training - if training_args.do_train: - checkpoint = None - if training_args.resume_from_checkpoint is not None: - checkpoint = training_args.resume_from_checkpoint - elif last_checkpoint is not None: - checkpoint = last_checkpoint - train_result = trainer.train(resume_from_checkpoint=checkpoint) - metrics = train_result.metrics - max_train_samples = ( - data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) - ) - metrics["train_samples"] = min(max_train_samples, len(train_dataset)) - - trainer.save_model() # Saves the tokenizer too for easy upload - - trainer.log_metrics("train", metrics) - trainer.save_metrics("train", metrics) - trainer.save_state() - - # Evaluation - if training_args.do_eval: - logger.info("*** Evaluate ***") - - # Loop to handle MNLI double evaluation (matched, mis-matched) - tasks = [data_args.task_name] - eval_datasets = [eval_dataset] - if data_args.task_name == "mnli": - tasks.append("mnli-mm") - eval_datasets.append(raw_datasets["validation_mismatched"]) - - for eval_dataset, task in zip(eval_datasets, tasks): - metrics = trainer.evaluate(eval_dataset=eval_dataset) - - max_eval_samples = ( - data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) - ) - metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) - - trainer.log_metrics("eval", metrics) - trainer.save_metrics("eval", metrics) - - if training_args.do_predict: - logger.info("*** Predict ***") - - # Loop to handle MNLI double evaluation (matched, mis-matched) - tasks = [data_args.task_name] - predict_datasets = [predict_dataset] - if data_args.task_name == "mnli": - tasks.append("mnli-mm") - predict_datasets.append(raw_datasets["test_mismatched"]) - - for predict_dataset, task in zip(predict_datasets, tasks): - # Removing the `label` columns because it contains -1 and Trainer won't like that. - predict_dataset = predict_dataset.remove_columns("label") - predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions - predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1) - - output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt") - if trainer.is_world_process_zero(): - with open(output_predict_file, "w") as writer: - logger.info(f"***** Predict results {task} *****") - writer.write("index\tprediction\n") - for index, item in enumerate(predictions): - if is_regression: - writer.write(f"{index}\t{item:3.3f}\n") - else: - item = label_list[item] - writer.write(f"{index}\t{item}\n") - - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} - if data_args.task_name is not None: - kwargs["language"] = "en" - kwargs["dataset_tags"] = "glue" - kwargs["dataset_args"] = data_args.task_name - kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}" - - trainer.push_to_hub(**kwargs) - - -def _mp_fn(index): - # For xla_spawn (TPUs) - main() - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh index 888a8968d24..21dd5f0a558 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh @@ -41,7 +41,7 @@ function init_params { # run_tuning function run_tuning { - python run_glue_tune.py \ + python run_glue.py \ --model_name_or_path ${input_model} \ --task_name ${task_name} \ --do_train \ @@ -60,6 +60,7 @@ function run_tuning { --save_strategy steps \ --metric_for_best_model f1 \ --save_total_limit 1 \ + --onnx \ --tune } diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py index 5edbe2580ad..4e2db16cb99 100644 --- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py +++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py @@ -24,6 +24,7 @@ from base_model import ResNet34 from typing import List +import intel_extension_for_pytorch Vector = List[torch.Tensor] diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/conf.yaml b/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/conf.yaml deleted file mode 100644 index 9889995c3d9..00000000000 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/conf.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: dlrm - framework: pytorch # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops. - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/dlrm_s_pytorch_tune.py b/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/dlrm_s_pytorch_tune.py index 1ae0a3544ce..d93f48c883f 100644 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/dlrm_s_pytorch_tune.py +++ b/examples/pytorch/recommendation/dlrm/quantization/ptq/eager/dlrm_s_pytorch_tune.py @@ -903,13 +903,13 @@ def eval_func(model): dlrm.bot_l.append(DeQuantStub()) dlrm.top_l.insert(0, QuantStub()) dlrm.top_l.insert(len(dlrm.top_l) - 1, DeQuantStub()) - from neural_compressor.experimental import Quantization, common - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(dlrm) - quantizer.calib_dataloader = eval_dataloader - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save(args.tuned_checkpoint) + from neural_compressor import PostTrainingQuantConfig, quantization + quant_conf = PostTrainingQuantConfig(approach="static", backend="pytorch") + q_model = quantization.fit( + dlrm, + quant_conf, + calib_dataloader=eval_dataloader + ) exit(0) if args.benchmark: diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml b/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml deleted file mode 100644 index 757eb044581..00000000000 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: dlrm - framework: pytorch_fx # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops. - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py b/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py index d8a4169960e..56f78b3f361 100644 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py +++ b/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py @@ -897,12 +897,13 @@ def eval_func(model): if args.tune: print('tune') dlrm.eval() - from neural_compressor.experimental import Quantization, common - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(dlrm) - quantizer.calib_dataloader = eval_dataloader - quantizer.eval_func = eval_func - q_model = quantizer.fit() + from neural_compressor import PostTrainingQuantConfig, quantization + conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx") + q_model = quantization.fit( + dlrm, + conf=conf, + calib_dataloader=eval_dataloader + ) q_model.save(args.tuned_checkpoint) exit(0) diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml b/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml deleted file mode 100644 index e0181e5aa88..00000000000 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: dlrm - framework: pytorch_ipex # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 102400 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - weight: - granularity: per_channel - scheme: sym - dtype: int8 - algorithm: minmax - activation: - granularity: per_tensor - scheme: sym - dtype: int8 - algorithm: minmax -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py b/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py index d78e7ac209c..c577bd80db8 100644 --- a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py +++ b/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py @@ -844,11 +844,17 @@ def eval_func(model): ) assert args.inference_only, "Please set inference_only in arguments" - quantizer = Quantization("./conf_ipex.yaml") - quantizer.model = common.Model(dlrm) - quantizer.calib_dataloader = DLRM_DataLoader(train_ld) - quantizer.eval_func = eval_func - q_model = quantizer.fit() + eval_dataloader = DLRM_DataLoader(train_ld) + from neural_compressor import PostTrainingQuantConfig, quantization + conf = PostTrainingQuantConfig(approach="static", + backend="pytorch_ipex" + ) + q_model = quantization.fit( + dlrm, + conf=conf, + eval_func=eval_func, + calib_dataloader=eval_dataloader + ) q_model.save(args.save_model) exit(0) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/QSL.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/QSL.py deleted file mode 100644 index 9c0abe4e734..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/QSL.py +++ /dev/null @@ -1,68 +0,0 @@ -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -from parts.manifest import Manifest -from parts.segment import AudioSegment - -import numpy as np - -import mlperf_loadgen as lg - - -class AudioQSL: - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels), - normalize=True, max_duration=15.0) - self.sample_rate = sample_rate - self.count = len(self.manifest) - perf_count = self.count if perf_count is None else perf_count - self.sample_id_to_sample = {} - self.qsl = lg.ConstructQSL(self.count, perf_count, - self.load_query_samples, - self.unload_query_samples) - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600, - self.count)) - - def load_query_samples(self, sample_list): - for sample_id in sample_list: - self.sample_id_to_sample[sample_id] = self._load_sample(sample_id) - - def unload_query_samples(self, sample_list): - for sample_id in sample_list: - del self.sample_id_to_sample[sample_id] - - def _load_sample(self, index): - sample = self.manifest[index] - segment = AudioSegment.from_file(sample['audio_filepath'][0], - target_sr=self.sample_rate) - waveform = segment.samples - assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32 - return waveform - - def __getitem__(self, index): - return self.sample_id_to_sample[index] - - def __del__(self): - lg.DestroyQSL(self.qsl) - print("Finished destroying QSL.") - -# We have no problem fitting all data in memory, so we do that, in -# order to speed up execution of the benchmark. -class AudioQSLInMemory(AudioQSL): - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - super().__init__(dataset_dir, manifest_filepath, labels, - sample_rate, perf_count) - super().load_query_samples(range(self.count)) - - def load_query_samples(self, sample_list): - pass - - def unload_query_samples(self, sample_list): - pass diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/README.md deleted file mode 100644 index 6e29f4ab66e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/README.md +++ /dev/null @@ -1,77 +0,0 @@ -Step-by-Step -============ - -This document list steps of reproducing Intel Optimized PyTorch RNNT models tuning results via Neural Compressor. - -Our example comes from MLPerf Inference Benchmark Suite - - -# Prerequisite - -### 1. Installation - Recommend python 3.6 or higher version. - - ```shell - cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager - pip install -r requirements.txt - ``` - Check your gcc version with command : **gcc -v** - - GCC5 or above is needed. - - ```shell - bash prepare_loadgen.sh - ``` - -### 2. Prepare Dataset - - ```shell - cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager - bash prepare_dataset.sh --download_dir=origin_dataset --convert_dir=convert_dataset - ``` - - Prepare_dataset.sh contains two stages: - - stage1: download LibriSpeech/dev-clean dataset and extract it. - - stage2: convert .flac file to .wav file - -### 3. Prepare pre-trained model - - ```shell - cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O rnnt.pt - ``` - -# Run - -### 1. Enable RNNT example with the auto dynamic quantization strategy of Neural Compressor. - - The changes made are as follows: - 1. add conf.yaml: - This file contains the configuration of quantization. - 2. run.py->run_tune.py: - we added neural_compressor support in it. - 3. edit pytorch_SUT.py: - remove jit script convertion - 4. edit pytorch/decoders.py: - remove assertion of torch.jit.ScriptModule - -### 2. To get the tuned model and its accuracy: - - bash run_tuning.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --output_model=saved_results - -### 3. To get the benchmark of tuned model, includes Batch_size and Throughput: - - bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=benchmark/accuracy --int8=true/false - -### 4. The following is the brief output information: - -Left part is accuracy/percentage, right part is time_usage/second. - - - FP32 baseline is: [92.5477, 796.7552]. - - Tune 1 result is: [91.5872, 1202.2529] - - Tune 2 result is: [91.5894, 1201.3231] - - Tune 3 result is: [91.5195, 1211.5965] - - Tune 4 result is: [91.6030, 1218.2211] - - Tune 5 result is: [91.4812, 1169.5080] - - ... - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/accuracy_eval.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/accuracy_eval.py deleted file mode 100644 index ea81792855b..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/accuracy_eval.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python - -import argparse -import array -import json -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch")) - -from QSL import AudioQSL -from helpers import process_evaluation_epoch, __gather_predictions -from parts.manifest import Manifest - -dtype_map = { - "int8": 'b', - "int16": 'h', - "int32": 'l', - "int64": 'q', -} - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log_dir", required=True) - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type") - args = parser.parse_args() - return args - -def main(): - args = get_args() - labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] - qsl = AudioQSL(args.dataset_dir, args.manifest, labels) - manifest = qsl.manifest - with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh: - results = json.load(fh) - hypotheses = [] - references = [] - for result in results: - hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist()) - references.append(manifest[result["qsl_idx"]]["transcript"]) - - references = __gather_predictions([references], labels=labels) - hypotheses = __gather_predictions([hypotheses], labels=labels) - - d = dict(predictions=hypotheses, - transcripts=references) - wer = process_evaluation_epoch(d) - print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100)) - -if __name__ == '__main__': - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/conf.yaml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/conf.yaml deleted file mode 100644 index 87357108792..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/conf.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: rnnt - framework: pytorch # mandatory. this model runs on pytorch framework - -quantization: - approach: post_training_dynamic_quant # mandatory. default value is post_training_dynamic_quant. - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - max_trials: 600 - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/dev-clean-wav.json b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/dev-clean-wav.json deleted file mode 100644 index 1e95c863c80..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/dev-clean-wav.json +++ /dev/null @@ -1,48656 +0,0 @@ -[ - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.59, - "num_samples": 105440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.59, - "original_num_samples": 105440, - "transcript": "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.145, - "num_samples": 114320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.145, - "original_num_samples": 114320, - "transcript": "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "hurstwood walked the floor mentally arranging the chief points of his situation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.84, - "num_samples": 45440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.84, - "original_num_samples": 45440, - "transcript": "he also thought of his managerial position" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.955, - "num_samples": 31280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0004.wav", - "speed": 1 - } - ], - "original_duration": 1.955, - "original_num_samples": 31280, - "transcript": "how would the papers talk about it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.6, - "num_samples": 89600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.6, - "original_num_samples": 89600, - "transcript": "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.97, - "num_samples": 79520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.97, - "original_num_samples": 79520, - "transcript": "he could arrange that satisfactorily for carrie would be glad to wait if necessary" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.375, - "num_samples": 86000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.375, - "original_num_samples": 86000, - "transcript": "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.94, - "num_samples": 111040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.94, - "original_num_samples": 111040, - "transcript": "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious noticed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.815, - "num_samples": 93040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.815, - "original_num_samples": 93040, - "transcript": "while the danger had not lessened it had not as yet materialised and with him no news was good news" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.275, - "num_samples": 84400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0010.wav", - "speed": 1 - } - ], - "original_duration": 5.275, - "original_num_samples": 84400, - "transcript": "so little did he consider drouet that it never once occurred to him to worry about his finding out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.465, - "num_samples": 71440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.465, - "original_num_samples": 71440, - "transcript": "he grew restless as he ruminated and then decided that perhaps it was nothing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.495, - "num_samples": 39920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.495, - "original_num_samples": 39920, - "transcript": "she had not been able to get away this morning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.09, - "num_samples": 97440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.09, - "original_num_samples": 97440, - "transcript": "he would get one to day it would probably be on his desk when he got back he would look for it at once" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.07, - "num_samples": 65120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.07, - "original_num_samples": 65120, - "transcript": "after a time he gave up waiting and drearily headed for the madison car" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.675, - "num_samples": 58800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.675, - "original_num_samples": 58800, - "transcript": "he went in and examined his letters but there was nothing from carrie" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "fortunately there was nothing from his wife either" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.985, - "num_samples": 79760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.985, - "original_num_samples": 79760, - "transcript": "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.545, - "num_samples": 72720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.545, - "original_num_samples": 72720, - "transcript": "his first impulse was to write but four words in reply go to the devil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.56, - "num_samples": 56960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.56, - "original_num_samples": 56960, - "transcript": "but he compromised by telling the boy that there would be no reply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.285, - "num_samples": 84560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.285, - "original_num_samples": 84560, - "transcript": "then he sat down in his chair and gazed without seeing contemplating the result of his work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.37, - "num_samples": 53920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.37, - "original_num_samples": 53920, - "transcript": "what would she do about that the confounded wretch" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.38, - "num_samples": 54080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.38, - "original_num_samples": 54080, - "transcript": "later however his old discretion asserted itself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.89, - "num_samples": 62240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.89, - "original_num_samples": 62240, - "transcript": "something had to be done a climax was near and she would not sit idle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.5, - "num_samples": 72000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.5, - "original_num_samples": 72000, - "transcript": "he knew her well enough to know that when she had decided upon a plan she would follow it up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.4, - "num_samples": 54400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.4, - "original_num_samples": 54400, - "transcript": "he arose from his chair and went and looked out into the street" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.92, - "num_samples": 78720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.92, - "original_num_samples": 78720, - "transcript": "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.595, - "num_samples": 57520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.595, - "original_num_samples": 57520, - "transcript": "hurstwood almost exclaimed out loud at the insistency of this thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.9, - "num_samples": 46400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.9, - "original_num_samples": 46400, - "transcript": "he put on his hat and looked around for his umbrella" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.6, - "num_samples": 41600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0029.wav", - "speed": 1 - } - ], - "original_duration": 2.6, - "original_num_samples": 41600, - "transcript": "he would have some arrangement of this thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.3, - "num_samples": 100800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0030.wav", - "speed": 1 - } - ], - "original_duration": 6.3, - "original_num_samples": 100800, - "transcript": "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.61, - "num_samples": 57760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0031.wav", - "speed": 1 - } - ], - "original_duration": 3.61, - "original_num_samples": 57760, - "transcript": "he would go in and see anyhow he would have no row" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.77, - "num_samples": 172320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0032.wav", - "speed": 1 - } - ], - "original_duration": 10.77, - "original_num_samples": 172320, - "transcript": "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.845, - "num_samples": 45520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0033.wav", - "speed": 1 - } - ], - "original_duration": 2.845, - "original_num_samples": 45520, - "transcript": "then he rang the bell no answer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.005, - "num_samples": 64080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149896/2277-149896-0034.wav", - "speed": 1 - } - ], - "original_duration": 4.005, - "original_num_samples": 64080, - "transcript": "he rang again this time harder still no answer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.395, - "num_samples": 70320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.395, - "original_num_samples": 70320, - "transcript": "when hurstwood got back to his office again he was in a greater quandary than ever" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.755, - "num_samples": 44080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.755, - "original_num_samples": 44080, - "transcript": "he could hardly realise how it had all come about" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.54, - "num_samples": 104640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.54, - "original_num_samples": 104640, - "transcript": "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.84, - "num_samples": 77440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.84, - "original_num_samples": 77440, - "transcript": "he saw that in the excitement of recent events he had not formulated a plan upon that score" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.735, - "num_samples": 91760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.735, - "original_num_samples": 91760, - "transcript": "he was getting some vague comfort out of a good cigar but it was no panacea for the ill which affected him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.465, - "num_samples": 183440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0005.wav", - "speed": 1 - } - ], - "original_duration": 11.465, - "original_num_samples": 183440, - "transcript": "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.385, - "num_samples": 54160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.385, - "original_num_samples": 54160, - "transcript": "then he called harry the boy of all work around the place" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.855, - "num_samples": 125680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0007.wav", - "speed": 1 - } - ], - "original_duration": 7.855, - "original_num_samples": 125680, - "transcript": "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.88, - "num_samples": 46080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.88, - "original_num_samples": 46080, - "transcript": "any answer i guess not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.725, - "num_samples": 59600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.725, - "original_num_samples": 59600, - "transcript": "the boy hastened away and the manager fell to his musings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.625, - "num_samples": 58000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.625, - "original_num_samples": 58000, - "transcript": "he was beaten for to night and he might just as well make the best of it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.3, - "num_samples": 52800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.3, - "original_num_samples": 52800, - "transcript": "she would take the envelope and know that she had triumphed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.09, - "num_samples": 49440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.09, - "original_num_samples": 49440, - "transcript": "if he only had that letter back he wouldn't send it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.415, - "num_samples": 70640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.415, - "original_num_samples": 70640, - "transcript": "for relief he arose and joined in conversation with a few friends who were drinking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.62, - "num_samples": 73920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.62, - "original_num_samples": 73920, - "transcript": "all the time his thoughts would run out to his home and see the scene being therein enacted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.04, - "num_samples": 48640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.04, - "original_num_samples": 48640, - "transcript": "in about an hour and three quarters the boy returned" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.540063, - "num_samples": 104641, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.540063, - "original_num_samples": 104641, - "transcript": "he fancied as he sat at his desk that nothing would be done for a week or two meanwhile he would have time to think" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.3, - "num_samples": 116800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.3, - "original_num_samples": 116800, - "transcript": "how about that now his pain at her failure to meet or write him rapidly increased as he devoted himself to this subject" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.745, - "num_samples": 107920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.745, - "original_num_samples": 107920, - "transcript": "he decided to write her care of the west side post office and ask for an explanation as well as to have her meet him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.85, - "num_samples": 77600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.85, - "original_num_samples": 77600, - "transcript": "three o'clock came four five six and no letter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.62, - "num_samples": 73920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.62, - "original_num_samples": 73920, - "transcript": "the helpless manager paced the floor and grimly endured the gloom of defeat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.685, - "num_samples": 74960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.685, - "original_num_samples": 74960, - "transcript": "he saw a busy saturday ushered out the sabbath in and nothing done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.05, - "num_samples": 160800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.05, - "original_num_samples": 160800, - "transcript": "all day the bar being closed he brooded alone shut out from home from the excitement of his resort from carrie and without the ability to alter his condition one iota" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.31, - "num_samples": 52960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.31, - "original_num_samples": 52960, - "transcript": "it was the worst sunday he had spent in his life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.935, - "num_samples": 46960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.935, - "original_num_samples": 46960, - "transcript": "it seemed as if his family troubles were just beginning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.285, - "num_samples": 68560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.285, - "original_num_samples": 68560, - "transcript": "he was quite certain now that she knew he was married and was angered at his perfidy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.815, - "num_samples": 109040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0026.wav", - "speed": 1 - } - ], - "original_duration": 6.815, - "original_num_samples": 109040, - "transcript": "he had loved her earnestly enough but now that the possibility of losing her stared him in the face she seemed much more attractive" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.515, - "num_samples": 56240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.515, - "original_num_samples": 56240, - "transcript": "he would go to her and tell her all his family complications" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.625, - "num_samples": 58000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.625, - "original_num_samples": 58000, - "transcript": "he would explain to her just where he stood and how much he needed her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.645, - "num_samples": 122320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0029.wav", - "speed": 1 - } - ], - "original_duration": 7.645, - "original_num_samples": 122320, - "transcript": "he did manage to bring himself into the mood to go out to carrie but when he got in ogden place he thought he saw a man watching him and went away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.55, - "num_samples": 40800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.55, - "original_num_samples": 40800, - "transcript": "he did not go within a block of the house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.405, - "num_samples": 70480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0031.wav", - "speed": 1 - } - ], - "original_duration": 4.405, - "original_num_samples": 70480, - "transcript": "he troubled over many little details and talked perfunctorily to everybody" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.69, - "num_samples": 139040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0032.wav", - "speed": 1 - } - ], - "original_duration": 8.69, - "original_num_samples": 139040, - "transcript": "he stayed at his desk long after all others had gone and only quitted it when the night watchman on his round pulled at the front door to see if it was safely locked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.065, - "num_samples": 81040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0033.wav", - "speed": 1 - } - ], - "original_duration": 5.065, - "original_num_samples": 81040, - "transcript": "on wednesday he received another polite note from mc gregor james and hay it read" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.16, - "num_samples": 194560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0034.wav", - "speed": 1 - } - ], - "original_duration": 12.16, - "original_num_samples": 194560, - "transcript": "dear sir we beg to inform you that we are instructed to wait until to morrow thursday at one o'clock before filing suit against you on behalf of missus julia hurstwood for divorce and alimony" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.45, - "num_samples": 55200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0035.wav", - "speed": 1 - } - ], - "original_duration": 3.45, - "original_num_samples": 55200, - "transcript": "very truly yours et cetera compromise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.42, - "num_samples": 70720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0036.wav", - "speed": 1 - } - ], - "original_duration": 4.42, - "original_num_samples": 70720, - "transcript": "so here it was spread out clear before him and now he knew what to expect" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.01, - "num_samples": 48160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149897/2277-149897-0037.wav", - "speed": 1 - } - ], - "original_duration": 3.01, - "original_num_samples": 48160, - "transcript": "if he didn't go and see them they would sue him promptly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.505, - "num_samples": 248080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0000.wav", - "speed": 1 - } - ], - "original_duration": 15.505, - "original_num_samples": 248080, - "transcript": "minnie's flat as the one floor resident apartments were then being called was in a part of west van buren street inhabited by families of labourers and clerks men who had come and were still coming with the rush of population pouring in at the rate of fifty thousand a year" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.04, - "num_samples": 112640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.04, - "original_num_samples": 112640, - "transcript": "to carrie the sound of the little bells upon the horse cars as they tinkled in and out of hearing was as pleasing as it was novel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.095, - "num_samples": 81520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.095, - "original_num_samples": 81520, - "transcript": "to him the presence or absence of his wife's sister was a matter of indifference" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.7, - "num_samples": 123200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.7, - "original_num_samples": 123200, - "transcript": "he was of a clean saving disposition and had already paid a number of monthly instalments on two lots far out on the west side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.215, - "num_samples": 51440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.215, - "original_num_samples": 51440, - "transcript": "his ambition was some day to build a house on them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.37, - "num_samples": 101920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.37, - "original_num_samples": 101920, - "transcript": "she had some slight gift of observation and that sense so rich in every woman intuition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.745, - "num_samples": 43920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.745, - "original_num_samples": 43920, - "transcript": "the walls of the rooms were discordantly papered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.4, - "num_samples": 70400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.4, - "original_num_samples": 70400, - "transcript": "the floors were covered with matting and the hall laid with a thin rag carpet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.385, - "num_samples": 86160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.385, - "original_num_samples": 86160, - "transcript": "then she walked and sang to it until hanson disturbed in his reading came and took it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.35, - "num_samples": 53600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.35, - "original_num_samples": 53600, - "transcript": "one could see that he was very much wrapped up in his offspring" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.42, - "num_samples": 118720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.42, - "original_num_samples": 118720, - "transcript": "now now he said walking there there and there was a certain swedish accent noticeable in his voice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.465, - "num_samples": 39440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.465, - "original_num_samples": 39440, - "transcript": "he seemed to be thinking of something else" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.08, - "num_samples": 81280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.08, - "original_num_samples": 81280, - "transcript": "minnie began to explain but her husband took this part of the conversation to himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.16, - "num_samples": 50560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.16, - "original_num_samples": 50560, - "transcript": "you could get home easy too it isn't very far" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.6, - "num_samples": 137600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.6, - "original_num_samples": 137600, - "transcript": "she asked minnie for ink and paper which were upon the mantel in the dining room and when the latter had gone to bed at ten got out drouet's card and wrote him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.73, - "num_samples": 75680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.73, - "original_num_samples": 75680, - "transcript": "she wanted to make some reference to their relations upon the train but was too timid" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.325, - "num_samples": 85200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.325, - "original_num_samples": 85200, - "transcript": "anything was good enough so long as it paid say five dollars a week to begin with" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.36, - "num_samples": 53760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.36, - "original_num_samples": 53760, - "transcript": "a shop girl was the destiny prefigured for the newcomer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.02, - "num_samples": 80320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.02, - "original_num_samples": 80320, - "transcript": "it was under such auspicious circumstances that she started out this morning to look for work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.45, - "num_samples": 135200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0019.wav", - "speed": 1 - } - ], - "original_duration": 8.45, - "original_num_samples": 135200, - "transcript": "narrow board walks extended out passing here a house and there a store at far intervals eventually ending on the open prairie" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.38, - "num_samples": 118080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0020.wav", - "speed": 1 - } - ], - "original_duration": 7.38, - "original_num_samples": 118080, - "transcript": "it gave an imposing appearance to most of the wholesale houses whose offices were upon the ground floor and in plain view of the street" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.985, - "num_samples": 47760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2277/149874/2277-149874-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.985, - "original_num_samples": 47760, - "transcript": "these vast buildings what were they" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.02, - "num_samples": 144320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.02, - "original_num_samples": 144320, - "transcript": "she was four years older than i to be sure and had seen more of the world but i was a boy and she was a girl and i resented her protecting manner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.925, - "num_samples": 62800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.925, - "original_num_samples": 62800, - "transcript": "this change came about from an adventure we had together" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.84, - "num_samples": 141440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.84, - "original_num_samples": 141440, - "transcript": "one day when i rode over to the shimerdas i found antonia starting off on foot for russian peter's house to borrow a spade ambrosch needed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.84, - "num_samples": 93440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.84, - "original_num_samples": 93440, - "transcript": "there had been another black frost the night before and the air was clear and heady as wine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.17, - "num_samples": 66720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.17, - "original_num_samples": 66720, - "transcript": "it was on one of these gravel beds that i met my adventure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.585, - "num_samples": 105360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.585, - "original_num_samples": 105360, - "transcript": "i whirled round and there on one of those dry gravel beds was the biggest snake i had ever seen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.315, - "num_samples": 53040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.315, - "original_num_samples": 53040, - "transcript": "i know i am just awful jim i was so scared" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.6, - "num_samples": 73600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.6, - "original_num_samples": 73600, - "transcript": "i never know you was so brave jim she went on comfortingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.21, - "num_samples": 99360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.21, - "original_num_samples": 99360, - "transcript": "a faint fetid smell came from him and a thread of green liquid oozed from his crushed head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.5, - "num_samples": 56000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.5, - "original_num_samples": 56000, - "transcript": "look tony that's his poison i said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.585, - "num_samples": 153360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0010.wav", - "speed": 1 - } - ], - "original_duration": 9.585, - "original_num_samples": 153360, - "transcript": "i explained to antonia how this meant that he was twenty four years old that he must have been there when white men first came left on from buffalo and indian times" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.38, - "num_samples": 70080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.38, - "original_num_samples": 70080, - "transcript": "we decided that antonia should ride dude home and i would walk" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.255, - "num_samples": 68080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.255, - "original_num_samples": 68080, - "transcript": "i followed with the spade over my shoulder dragging my snake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.675, - "num_samples": 42800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.675, - "original_num_samples": 42800, - "transcript": "otto fuchs was the first one we met" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.465063, - "num_samples": 71441, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.465063, - "original_num_samples": 71441, - "transcript": "he could stand right up and talk to you he could did he fight hard" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.56, - "num_samples": 24960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0015.wav", - "speed": 1 - } - ], - "original_duration": 1.56, - "original_num_samples": 24960, - "transcript": "otto winked at me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.895, - "num_samples": 78320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147960/2035-147960-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.895, - "original_num_samples": 78320, - "transcript": "a snake of his size in fighting trim would be more than any boy could handle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.875, - "num_samples": 126000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0000.wav", - "speed": 1 - } - ], - "original_duration": 7.875, - "original_num_samples": 126000, - "transcript": "throughout this century the power of the church was constantly on the increase and is visible in many important changes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.67, - "num_samples": 266720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0001.wav", - "speed": 1 - } - ], - "original_duration": 16.67, - "original_num_samples": 266720, - "transcript": "the ancestors of the present pretender congal surnamed the squint eyed had twice received and cherished the licentious bards when under the ban of tara and his popularity with that still powerful order was one prop of his ambition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.34, - "num_samples": 149440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.34, - "original_num_samples": 149440, - "transcript": "it is pretty clear also that the last rally of druidism against christianity took place behind his banner on the plain of moira" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.415, - "num_samples": 102640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.415, - "original_num_samples": 102640, - "transcript": "the poets of succeeding ages have dwelt much in detail on the occurrences of this memorable day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.34, - "num_samples": 117440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.34, - "original_num_samples": 117440, - "transcript": "like the two kings of sparta they reigned jointly dividing between them the labours and cares of state" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.03, - "num_samples": 384480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0005.wav", - "speed": 1 - } - ], - "original_duration": 24.03, - "original_num_samples": 384480, - "transcript": "it was the season when the ancient sun god had been accustomed to receive his annual oblations and we can well believe that those whose hearts still trembled at the name of bel must have connected the eclipse and the plague with the revolution in the national worship and the overthrow of the ancient gods on that plain of prostration where they had so long received the homage of an entire people" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.71, - "num_samples": 123360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.71, - "original_num_samples": 123360, - "transcript": "lastly the royal brothers fell themselves victims to the epidemic which so sadly signalizes their reign" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.94, - "num_samples": 207040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0007.wav", - "speed": 1 - } - ], - "original_duration": 12.94, - "original_num_samples": 207040, - "transcript": "the only conflicts that occurred on irish soil with a pictish or an anglo saxon force if we except those who formed a contingent of congal's army at moira occurred in the time of the hospitable finnacta" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.665, - "num_samples": 106640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.665, - "original_num_samples": 106640, - "transcript": "as leading to the mention of other interesting events we must set this inroad clearly before the reader" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.32, - "num_samples": 277120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0009.wav", - "speed": 1 - } - ], - "original_duration": 17.32, - "original_num_samples": 277120, - "transcript": "the saxons of kent and the southern kingdoms generally were converted by missionaries from france or rome or native preachers of the first or second christian generation those of northumbria recognise as their apostles saint aidan and saint cuthbert two fathers from iona" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.6, - "num_samples": 121600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.6, - "original_num_samples": 121600, - "transcript": "the kingdom of northumbria as the name implies embraced nearly all the country from the humber to the pictish border" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.465, - "num_samples": 167440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.465, - "original_num_samples": 167440, - "transcript": "the barren rock about three miles in length was covered with monastic buildings and its cemetery was already adorned with the tombs of saints and kings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.23, - "num_samples": 163680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0012.wav", - "speed": 1 - } - ], - "original_duration": 10.23, - "original_num_samples": 163680, - "transcript": "now every missionary that ever went out from iona had taught that to reduce christians to slavery was wholly inconsistent with a belief in the doctrines of the gospel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.47, - "num_samples": 327520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0013.wav", - "speed": 1 - } - ], - "original_duration": 20.47, - "original_num_samples": 327520, - "transcript": "while the liberated exiles rejoiced on the plain of meath the tent of the abbot of iona was pitched on the rath of tara a fact which would seem to indicate that already in little more than a century since the interdict had fallen on it the edifices which made so fine a show in the days of patrick were ruined and uninhabitable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.35, - "num_samples": 117600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.35, - "original_num_samples": 117600, - "transcript": "so slow and patient is the process by which christianity infuses itself into the social life of a converted people" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.51, - "num_samples": 136160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.51, - "original_num_samples": 136160, - "transcript": "here the holy prelate of ferns met him and related a vision in which he had been instructed to demand the abolition of the impost" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.59, - "num_samples": 105440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.59, - "original_num_samples": 105440, - "transcript": "the tribute was at this period enormous fifteen thousand head of cattle annually" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.855, - "num_samples": 125680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.855, - "original_num_samples": 125680, - "transcript": "saint moling survived him three years and saint adamnan so intimately connected with his reign ten years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.015, - "num_samples": 96240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/152373/2035-152373-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.015, - "original_num_samples": 96240, - "transcript": "nothing could be more natural than such an assembly in such a place at such a period" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.07, - "num_samples": 241120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0000.wav", - "speed": 1 - } - ], - "original_duration": 15.07, - "original_num_samples": 241120, - "transcript": "peter told his troubles to mister shimerda he was unable to meet a note which fell due on the first of november had to pay an exorbitant bonus on renewing it and to give a mortgage on his pigs and horses and even his milk cow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.415, - "num_samples": 70640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.415, - "original_num_samples": 70640, - "transcript": "peter could give no very clear account of his transactions with cutter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.995, - "num_samples": 111920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.995, - "original_num_samples": 111920, - "transcript": "she asked peter to wait a moment and when she came back from the kitchen she brought a bag of sandwiches and doughnuts for us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.695, - "num_samples": 43120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.695, - "original_num_samples": 43120, - "transcript": "we lay still and did not talk" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.965, - "num_samples": 95440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.965, - "original_num_samples": 95440, - "transcript": "the little house on the hillside was so much the color of the night that we could not see it as we came up the draw" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.42, - "num_samples": 150720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.42, - "original_num_samples": 150720, - "transcript": "they made me think of defeated armies retreating or of ghosts who were trying desperately to get in for shelter and then went moaning on" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.795, - "num_samples": 44720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.795, - "original_num_samples": 44720, - "transcript": "i could not take my eyes off the man in the bed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.1, - "num_samples": 49600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.1, - "original_num_samples": 49600, - "transcript": "the sharp smell of spirits went through the room" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.25, - "num_samples": 68000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.25, - "original_num_samples": 68000, - "transcript": "it seemed to me that he despised him for being so simple and docile" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.875, - "num_samples": 46000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.875, - "original_num_samples": 46000, - "transcript": "the sick man raged and shook his fist" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.91, - "num_samples": 46560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.91, - "original_num_samples": 46560, - "transcript": "he seemed to be cursing people who had wronged him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.72, - "num_samples": 91520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.72, - "original_num_samples": 91520, - "transcript": "quickly it was covered with bright red spots i thought i had never seen any blood so bright" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.025, - "num_samples": 64400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.025, - "original_num_samples": 64400, - "transcript": "he lay patiently fighting for breath like a child with croup" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.98, - "num_samples": 79680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.98, - "original_num_samples": 79680, - "transcript": "antonia's father uncovered one of his long bony legs and rubbed it rhythmically" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.92, - "num_samples": 62720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.92, - "original_num_samples": 62720, - "transcript": "from our bench we could see what a hollow case his body was" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "gradually relief came to all of us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.265, - "num_samples": 52240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.265, - "original_num_samples": 52240, - "transcript": "without a word peter got up and lit his lantern" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.01, - "num_samples": 32160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.01, - "original_num_samples": 32160, - "transcript": "mister shimerda went with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.59, - "num_samples": 89440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.59, - "original_num_samples": 89440, - "transcript": "after the ceremony at the church the party went to a dinner given by the parents of the bride" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.77, - "num_samples": 76320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.77, - "original_num_samples": 76320, - "transcript": "the first howls were taken up and echoed and with quickening repetitions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.63, - "num_samples": 58080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0020.wav", - "speed": 1 - } - ], - "original_duration": 3.63, - "original_num_samples": 58080, - "transcript": "a black drove came up over the hill behind the wedding party" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.895, - "num_samples": 174320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0021.wav", - "speed": 1 - } - ], - "original_duration": 10.895, - "original_num_samples": 174320, - "transcript": "something happened to the hindmost sledge the driver lost control he was probably very drunk the horses left the road the sledge was caught in a clump of trees and overturned" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.415, - "num_samples": 54640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.415, - "original_num_samples": 54640, - "transcript": "the shrieks that followed made everybody sober" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.31, - "num_samples": 68960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.31, - "original_num_samples": 68960, - "transcript": "the road was clear and white and the groom's three blacks went like the wind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.81, - "num_samples": 60960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.81, - "original_num_samples": 60960, - "transcript": "there are only three sledges left he whispered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.525, - "num_samples": 88400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.525, - "original_num_samples": 88400, - "transcript": "and the wolves pavel asked enough enough for all of us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.875, - "num_samples": 46000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.875, - "original_num_samples": 46000, - "transcript": "they were within a few miles of their village now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.875, - "num_samples": 46000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.875, - "original_num_samples": 46000, - "transcript": "yes how many" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.705, - "num_samples": 43280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.705, - "original_num_samples": 43280, - "transcript": "twenty thirty enough" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.73, - "num_samples": 59680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.73, - "original_num_samples": 59680, - "transcript": "now his middle horse was being almost dragged by the other two" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.85, - "num_samples": 61600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.85, - "original_num_samples": 61600, - "transcript": "pavel knocked him over the side of the sledge and threw the girl after him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.55, - "num_samples": 56800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0031.wav", - "speed": 1 - } - ], - "original_duration": 3.55, - "original_num_samples": 56800, - "transcript": "peter crouching in the front seat saw nothing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.86, - "num_samples": 221760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0032.wav", - "speed": 1 - } - ], - "original_duration": 13.86, - "original_num_samples": 221760, - "transcript": "the first thing either of them noticed was a new sound that broke into the clear air louder than they had ever heard it before the bell of the monastery of their own village ringing for early prayers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.945, - "num_samples": 31120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0033.wav", - "speed": 1 - } - ], - "original_duration": 1.945, - "original_num_samples": 31120, - "transcript": "they were run out of their village" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.805, - "num_samples": 44880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0034.wav", - "speed": 1 - } - ], - "original_duration": 2.805, - "original_num_samples": 44880, - "transcript": "wherever they went the story followed them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.445, - "num_samples": 87120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0035.wav", - "speed": 1 - } - ], - "original_duration": 5.445, - "original_num_samples": 87120, - "transcript": "they worked in chicago des moines fort wayne but they were always unfortunate" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.48, - "num_samples": 71680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0036.wav", - "speed": 1 - } - ], - "original_duration": 4.48, - "original_num_samples": 71680, - "transcript": "during the auction he went about with his head down and never lifted his eyes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0037.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "every one said peter kissed the cow before she was led away by her new owner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.715, - "num_samples": 75440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0038.wav", - "speed": 1 - } - ], - "original_duration": 4.715, - "original_num_samples": 75440, - "transcript": "the loss of his two friends had a depressing effect upon old mister shimerda" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.35, - "num_samples": 85600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0039.wav", - "speed": 1 - } - ], - "original_duration": 5.35, - "original_num_samples": 85600, - "transcript": "when he was out hunting he used to go into the empty log house and sit there brooding" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.94, - "num_samples": 79040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2035/147961/2035-147961-0040.wav", - "speed": 1 - } - ], - "original_duration": 4.94, - "original_num_samples": 79040, - "transcript": "this cabin was his hermitage until the winter snows penned him in his cave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.81, - "num_samples": 156960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149214/2086-149214-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.81, - "original_num_samples": 156960, - "transcript": "the narrative it may be is woven of so humble a texture as to require this advantage and at the same time to render it the more difficult of attainment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.97, - "num_samples": 111520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149214/2086-149214-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.97, - "original_num_samples": 111520, - "transcript": "in good faith however he is not sufficiently imaginative to flatter himself with the slightest hope of this kind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.745, - "num_samples": 267920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149214/2086-149214-0002.wav", - "speed": 1 - } - ], - "original_duration": 16.745, - "original_num_samples": 267920, - "transcript": "the author has considered it hardly worth his while therefore relentlessly to impale the story with its moral as with an iron rod or rather as by sticking a pin through a butterfly thus at once depriving it of life and causing it to stiffen in an ungainly and unnatural attitude" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.994938, - "num_samples": 143919, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149214/2086-149214-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.994938, - "original_num_samples": 143919, - "transcript": "if permitted by the historical connection which though slight was essential to his plan the author would very willingly have avoided anything of this nature" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.22, - "num_samples": 243520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149214/2086-149214-0004.wav", - "speed": 1 - } - ], - "original_duration": 15.22, - "original_num_samples": 243520, - "transcript": "he trusts not to be considered as unpardonably offending by laying out a street that infringes upon nobody's private rights and appropriating a lot of land which had no visible owner and building a house of materials long in use for constructing castles in the air" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.095, - "num_samples": 193520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0000.wav", - "speed": 1 - } - ], - "original_duration": 12.095, - "original_num_samples": 193520, - "transcript": "the enclosure had formerly been very extensive but was now contracted within small compass and hemmed about partly by high wooden fences and partly by the outbuildings of houses that stood on another street" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.78, - "num_samples": 316480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0001.wav", - "speed": 1 - } - ], - "original_duration": 19.78, - "original_num_samples": 316480, - "transcript": "the white double rosebush had evidently been propped up anew against the house since the commencement of the season and a pear tree and three damson trees which except a row of currant bushes constituted the only varieties of fruit bore marks of the recent amputation of several superfluous or defective limbs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.01, - "num_samples": 256160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0002.wav", - "speed": 1 - } - ], - "original_duration": 16.01, - "original_num_samples": 256160, - "transcript": "there were also a few species of antique and hereditary flowers in no very flourishing condition but scrupulously weeded as if some person either out of love or curiosity had been anxious to bring them to such perfection as they were capable of attaining" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.81, - "num_samples": 364960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0003.wav", - "speed": 1 - } - ], - "original_duration": 22.81, - "original_num_samples": 364960, - "transcript": "summer squashes almost in their golden blossom cucumbers now evincing a tendency to spread away from the main stock and ramble far and wide two or three rows of string beans and as many more that were about to festoon themselves on poles tomatoes occupying a site so sheltered and sunny that the plants were already gigantic and promised an early and abundant harvest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.22, - "num_samples": 115520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.22, - "original_num_samples": 115520, - "transcript": "phoebe wondered whose care and toil it could have been that had planted these vegetables and kept the soil so clean and orderly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.09, - "num_samples": 145440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.09, - "original_num_samples": 145440, - "transcript": "bees too strange to say had thought it worth their while to come hither possibly from the range of hives beside some farm house miles away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.35, - "num_samples": 165600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.35, - "original_num_samples": 165600, - "transcript": "this was a fountain set round with a rim of old mossy stones and paved in its bed with what appeared to be a sort of mosaic work of variously colored pebbles" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.24, - "num_samples": 83840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.24, - "original_num_samples": 83840, - "transcript": "it now contained only chanticleer his two wives and a solitary chicken" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.89, - "num_samples": 142240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.89, - "original_num_samples": 142240, - "transcript": "it was evident that the race had degenerated like many a noble race besides in consequence of too strict a watchfulness to keep it pure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.6, - "num_samples": 169600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0009.wav", - "speed": 1 - } - ], - "original_duration": 10.6, - "original_num_samples": 169600, - "transcript": "these feathered people had existed too long in their distinct variety a fact of which the present representatives judging by their lugubrious deportment seemed to be aware" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.95, - "num_samples": 223200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0010.wav", - "speed": 1 - } - ], - "original_duration": 13.95, - "original_num_samples": 223200, - "transcript": "they kept themselves alive unquestionably and laid now and then an egg and hatched a chicken not for any pleasure of their own but that the world might not absolutely lose what had once been so admirable a breed of fowls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.43, - "num_samples": 310880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0011.wav", - "speed": 1 - } - ], - "original_duration": 19.43, - "original_num_samples": 310880, - "transcript": "the distinguishing mark of the hens was a crest of lamentably scanty growth in these latter days but so oddly and wickedly analogous to hepzibah's turban that phoebe to the poignant distress of her conscience but inevitably was led to fancy a general resemblance betwixt these forlorn bipeds and her respectable relative" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.995, - "num_samples": 239920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0012.wav", - "speed": 1 - } - ], - "original_duration": 14.995, - "original_num_samples": 239920, - "transcript": "the chicken crept through the pales of the coop and ran with some show of liveliness to her feet while chanticleer and the ladies of his household regarded her with queer sidelong glances and then croaked one to another as if communicating their sage opinions of her character" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.57, - "num_samples": 281120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0013.wav", - "speed": 1 - } - ], - "original_duration": 17.57, - "original_num_samples": 281120, - "transcript": "so wise as well as antique was their aspect as to give color to the idea not merely that they were the descendants of a time honored race but that they had existed in their individual capacity ever since the house of the seven gables was founded and were somehow mixed up with its destiny" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.58, - "num_samples": 153280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0014.wav", - "speed": 1 - } - ], - "original_duration": 9.58, - "original_num_samples": 153280, - "transcript": "he held a hoe in his hand and while phoebe was gone in quest of the crumbs had begun to busy himself with drawing up fresh earth about the roots of the tomatoes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.19, - "num_samples": 131040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.19, - "original_num_samples": 131040, - "transcript": "they have known me much longer but never honor me with any familiarity though hardly a day passes without my bringing them food" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.52, - "num_samples": 120320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0016.wav", - "speed": 1 - } - ], - "original_duration": 7.52, - "original_num_samples": 120320, - "transcript": "miss hepzibah i suppose will interweave the fact with her other traditions and set it down that the fowls know you to be a pyncheon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.7, - "num_samples": 139200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.7, - "original_num_samples": 139200, - "transcript": "ah but these hens answered the young man these hens of aristocratic lineage would scorn to understand the vulgar language of a barn yard fowl" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.83, - "num_samples": 109280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.83, - "original_num_samples": 109280, - "transcript": "i prefer to think and so would miss hepzibah that they recognize the family tone for you are a pyncheon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.18, - "num_samples": 210880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0019.wav", - "speed": 1 - } - ], - "original_duration": 13.18, - "original_num_samples": 210880, - "transcript": "my name is phoebe pyncheon said the girl with a manner of some reserve for she was aware that her new acquaintance could be no other than the daguerreotypist of whose lawless propensities the old maid had given her a disagreeable idea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.93, - "num_samples": 46880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.93, - "original_num_samples": 46880, - "transcript": "i turn up the earth by way of pastime" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.335, - "num_samples": 53360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.335, - "original_num_samples": 53360, - "transcript": "it is like a bandage over one's eyes to come into it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.265, - "num_samples": 148240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0022.wav", - "speed": 1 - } - ], - "original_duration": 9.265, - "original_num_samples": 148240, - "transcript": "if you would permit me said the artist looking at phoebe i should like to try whether the daguerreotype can bring out disagreeable traits on a perfectly amiable face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.365, - "num_samples": 117840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.365, - "original_num_samples": 117840, - "transcript": "most of my likenesses do look unamiable but the very sufficient reason i fancy is because the originals are so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.805, - "num_samples": 60880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.805, - "original_num_samples": 60880, - "transcript": "there is a wonderful insight in heaven's broad and simple sunshine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.255, - "num_samples": 164080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0025.wav", - "speed": 1 - } - ], - "original_duration": 10.255, - "original_num_samples": 164080, - "transcript": "while we give it credit only for depicting the merest surface it actually brings out the secret character with a truth that no painter would ever venture upon even could he detect it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.81, - "num_samples": 76960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.81, - "original_num_samples": 76960, - "transcript": "yet the original wears to common eyes a very different expression" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.245, - "num_samples": 67920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.245, - "original_num_samples": 67920, - "transcript": "he exhibited a daguerreotype miniature in a morocco case" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.855, - "num_samples": 45680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.855, - "original_num_samples": 45680, - "transcript": "phoebe merely glanced at it and gave it back" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.58, - "num_samples": 89280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0029.wav", - "speed": 1 - } - ], - "original_duration": 5.58, - "original_num_samples": 89280, - "transcript": "i can assure you that this is a modern face and one which you will very probably meet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.1, - "num_samples": 129600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0030.wav", - "speed": 1 - } - ], - "original_duration": 8.1, - "original_num_samples": 129600, - "transcript": "the sun as you see tells quite another story and will not be coaxed out of it after half a dozen patient attempts on my part" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.58, - "num_samples": 137280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0031.wav", - "speed": 1 - } - ], - "original_duration": 8.58, - "original_num_samples": 137280, - "transcript": "here we have the man sly subtle hard imperious and withal cold as ice look at that eye" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "and yet if you could only see the benign smile of the original" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.435, - "num_samples": 118960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0033.wav", - "speed": 1 - } - ], - "original_duration": 7.435, - "original_num_samples": 118960, - "transcript": "well i don't wish to see it any more observed phoebe turning away her eyes it is certainly very like the old portrait" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.91, - "num_samples": 94560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0034.wav", - "speed": 1 - } - ], - "original_duration": 5.91, - "original_num_samples": 94560, - "transcript": "if the original is still in the world i think he might defy the sun to make him look stern and hard" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.549938, - "num_samples": 168799, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0035.wav", - "speed": 1 - } - ], - "original_duration": 10.549938, - "original_num_samples": 168799, - "transcript": "is there nothing wild in the eye continued holgrave so earnestly that it embarrassed phoebe as did also the quiet freedom with which he presumed on their so recent acquaintance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.32, - "num_samples": 101120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0036.wav", - "speed": 1 - } - ], - "original_duration": 6.32, - "original_num_samples": 101120, - "transcript": "it is nonsense said phoebe a little impatiently for us to talk about a picture which you have never seen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0037.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "since you are a friend of my cousin hepzibah's you should ask her to show you the picture" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.35, - "num_samples": 69600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0038.wav", - "speed": 1 - } - ], - "original_duration": 4.35, - "original_num_samples": 69600, - "transcript": "so we will be fellow laborers somewhat on the community system" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.48, - "num_samples": 39680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0039.wav", - "speed": 1 - } - ], - "original_duration": 2.48, - "original_num_samples": 39680, - "transcript": "she did not altogether like him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.7, - "num_samples": 107200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0040.wav", - "speed": 1 - } - ], - "original_duration": 6.7, - "original_num_samples": 107200, - "transcript": "oh rejoined the daguerreotypist because like an old lady's cup of tea it is water bewitched" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.805, - "num_samples": 236880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0041.wav", - "speed": 1 - } - ], - "original_duration": 14.805, - "original_num_samples": 236880, - "transcript": "she was indistinctly aware however that the gaunt figure of the old gentlewoman was sitting in one of the straight backed chairs a little withdrawn from the window the faint gleam of which showed the blanched paleness of her cheek turned sideways towards a corner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.84, - "num_samples": 45440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0042.wav", - "speed": 1 - } - ], - "original_duration": 2.84, - "original_num_samples": 45440, - "transcript": "but put it on the table in the corner of the passage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.77, - "num_samples": 44320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0043.wav", - "speed": 1 - } - ], - "original_duration": 2.77, - "original_num_samples": 44320, - "transcript": "what an instrument is the human voice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.95, - "num_samples": 63200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0044.wav", - "speed": 1 - } - ], - "original_duration": 3.95, - "original_num_samples": 63200, - "transcript": "how wonderfully responsive to every emotion of the human soul" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.545, - "num_samples": 72720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0045.wav", - "speed": 1 - } - ], - "original_duration": 4.545, - "original_num_samples": 72720, - "transcript": "fewer words than before but with the same mysterious music in them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.415, - "num_samples": 54640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0046.wav", - "speed": 1 - } - ], - "original_duration": 3.415, - "original_num_samples": 54640, - "transcript": "pray go to bed for i am sure you must need rest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.92, - "num_samples": 62720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0047.wav", - "speed": 1 - } - ], - "original_duration": 3.92, - "original_num_samples": 62720, - "transcript": "i will sit in the parlor awhile and collect my thoughts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.31, - "num_samples": 180960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0048.wav", - "speed": 1 - } - ], - "original_duration": 11.31, - "original_num_samples": 180960, - "transcript": "while thus dismissing her the maiden lady stept forward kissed phoebe and pressed her to her heart which beat against the girl's bosom with a strong high and tumultuous swell" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.605, - "num_samples": 185680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2086/149220/2086-149220-0049.wav", - "speed": 1 - } - ], - "original_duration": 11.605, - "original_num_samples": 185680, - "transcript": "at some uncertain period in the depths of night and as it were through the thin veil of a dream she was conscious of a footstep mounting the stairs heavily but not with force and decision" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.549937, - "num_samples": 56799, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.549937, - "original_num_samples": 56799, - "transcript": "long ago there lived a merchant who had three daughters" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.350062, - "num_samples": 117601, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.350062, - "original_num_samples": 117601, - "transcript": "every year at a certain day of a certain month he went away to a distant city to collect money on an account" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.845, - "num_samples": 125520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.845, - "original_num_samples": 125520, - "transcript": "how do you know asked their father i am older and wiser than you are and i know that there are many evils which might come upon you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.23, - "num_samples": 147680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.23, - "original_num_samples": 147680, - "transcript": "when it was evening he led his band into a nearby street and in his disguise approached the merchant's house he knocked at the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.735, - "num_samples": 75760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.735, - "original_num_samples": 75760, - "transcript": "have pity upon a poor unfortunate one he called out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.21, - "num_samples": 83360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.21, - "original_num_samples": 83360, - "transcript": "let me enter i pray you to pass the night under your roof" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.62, - "num_samples": 169920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.62, - "original_num_samples": 169920, - "transcript": "it's surely a terrible storm outside said the merchant's eldest daughter as the wind rattled the tiles of the roof and the rain beat in torrents against the doors and windows" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.74, - "num_samples": 43840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.74, - "original_num_samples": 43840, - "transcript": "he is old as well as poor she said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.295, - "num_samples": 84720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.295, - "original_num_samples": 84720, - "transcript": "if we decide to show mercy to this poor beggar it is not for you to oppose it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.81, - "num_samples": 76960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.81, - "original_num_samples": 76960, - "transcript": "bui we should not forget our promise to our father cried the youngest daughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.61, - "num_samples": 105760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.61, - "original_num_samples": 105760, - "transcript": "however in spite of all she could say the elder sisters opened the door and admitted the beggar" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.47, - "num_samples": 87520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.47, - "original_num_samples": 87520, - "transcript": "it is a fearful night to send away a beggar said the eldest sister while they were eating" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.295, - "num_samples": 132720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.295, - "original_num_samples": 132720, - "transcript": "while they were talking the beggar had taken the apples which the girls were to eat for dessert and had sprinkled a sleeping powder over them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.555, - "num_samples": 104880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.555, - "original_num_samples": 104880, - "transcript": "the two eldest ate their apples but the youngest could not eat that night she threw the apple away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.21, - "num_samples": 83360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.21, - "original_num_samples": 83360, - "transcript": "she did not stir and he knew that the sleeping powder had thoroughly done its work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.35, - "num_samples": 85600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.35, - "original_num_samples": 85600, - "transcript": "then she heard him go down the stairway and unbolt the heavy doors which led into the store" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.18, - "num_samples": 66880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.18, - "original_num_samples": 66880, - "transcript": "it was the youngest one who deceived me cried the robber chieftain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.685, - "num_samples": 42960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.685, - "original_num_samples": 42960, - "transcript": "perhaps you can outwit her yet cried another" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.075, - "num_samples": 129200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0018.wav", - "speed": 1 - } - ], - "original_duration": 8.075, - "original_num_samples": 129200, - "transcript": "the merchant's daughter at first did not answer but as he kept on calling to her she finally asked him what it was that he wanted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.745, - "num_samples": 43920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.745, - "original_num_samples": 43920, - "transcript": "i promise you i will do you no harm" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.54, - "num_samples": 40640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.54, - "original_num_samples": 40640, - "transcript": "you shall not come into my father's house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.365, - "num_samples": 53840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.365, - "original_num_samples": 53840, - "transcript": "pass the charm out to me then said the robber" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.02, - "num_samples": 64320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.02, - "original_num_samples": 64320, - "transcript": "when she returned his hand was sticking through the hole in the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.44, - "num_samples": 55040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.44, - "original_num_samples": 55040, - "transcript": "the cries and curses of the robbers filled the air" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.15, - "num_samples": 50400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.15, - "original_num_samples": 50400, - "transcript": "they tried in vain to break down the great doors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.59, - "num_samples": 41440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110124/7976-110124-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.59, - "original_num_samples": 41440, - "transcript": "all my worries about you were foolish" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.16, - "num_samples": 146560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.16, - "original_num_samples": 146560, - "transcript": "grant was only a few miles away but although commander in chief he knew nothing of the hardest fought battle of the civil war until it was over" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.8, - "num_samples": 44800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.8, - "original_num_samples": 44800, - "transcript": "my own regiment was in the advance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.045, - "num_samples": 48720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.045, - "original_num_samples": 48720, - "transcript": "our brigade was fearfully outnumbered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.175, - "num_samples": 146800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.175, - "original_num_samples": 146800, - "transcript": "there were no breastworks yet that one little brigade of hamilton's division stood there in the open and repulsed assault after assault" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.015, - "num_samples": 96240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.015, - "original_num_samples": 96240, - "transcript": "not balaklava nor the alma saw such fighting it was a duel to the death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.56, - "num_samples": 88960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.56, - "original_num_samples": 88960, - "transcript": "no battery in the whole four years war lost so many men in so short a time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.4, - "num_samples": 150400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0006.wav", - "speed": 1 - } - ], - "original_duration": 9.4, - "original_num_samples": 150400, - "transcript": "one daring rebel was shot down and bayoneted clear behind the line of company b where he had broken through to seize the flag of my regiment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.48, - "num_samples": 103680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.48, - "original_num_samples": 103680, - "transcript": "that night the enemy slipped away leaving hundreds and hundreds of his dead and wounded on the field" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.19, - "num_samples": 115040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0008.wav", - "speed": 1 - } - ], - "original_duration": 7.19, - "original_num_samples": 115040, - "transcript": "with a few lanterns our men then went about and tried to gather up the wounded the dead were left till morning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.045, - "num_samples": 64720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.045, - "original_num_samples": 64720, - "transcript": "it was not a question who was dead or wounded but who was not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.525, - "num_samples": 72400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.525, - "original_num_samples": 72400, - "transcript": "fifteen officers of our little half regiment were dead or wounded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.425, - "num_samples": 102800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.425, - "original_num_samples": 102800, - "transcript": "i remained awake all night talking with a comrade who shared my blanket with me poor jimmy king" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.155, - "num_samples": 82480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.155, - "original_num_samples": 82480, - "transcript": "he survived the war only to be murdered later on a plantation in mississippi" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.025, - "num_samples": 160400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0013.wav", - "speed": 1 - } - ], - "original_duration": 10.025, - "original_num_samples": 160400, - "transcript": "when morning came the firing opened and for all that day the battle raged fiercely at the left and center left we getting the worst of it too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.49, - "num_samples": 119840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.49, - "original_num_samples": 119840, - "transcript": "that evening an order came for us hamilton's division to assault the enemy's left flank at midnight" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.355, - "num_samples": 117680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.355, - "original_num_samples": 117680, - "transcript": "under the same quiet moonlight and only six hundred yards away from us also lay the victorious rebel army" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.205, - "num_samples": 163280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0016.wav", - "speed": 1 - } - ], - "original_duration": 10.205, - "original_num_samples": 163280, - "transcript": "once in the night i slipped away from the bivouac and hurried to the old tishimingo hotel to see a lieutenant of my company who had been shot through the breast" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.935, - "num_samples": 30960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0017.wav", - "speed": 1 - } - ], - "original_duration": 1.935, - "original_num_samples": 30960, - "transcript": "i could not help my friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "go back to the regiment he said smiling all will be needed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.225, - "num_samples": 83600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.225, - "original_num_samples": 83600, - "transcript": "my friend with many others was being carried out to die elsewhere" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.09, - "num_samples": 33440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.09, - "original_num_samples": 33440, - "transcript": "i hastened back to the lines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.655, - "num_samples": 74480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.655, - "original_num_samples": 74480, - "transcript": "the cloud of rebels we had seen divided itself into three columns" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.38, - "num_samples": 86080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0022.wav", - "speed": 1 - } - ], - "original_duration": 5.38, - "original_num_samples": 86080, - "transcript": "a perfect blaze of close range musketry too mowed them down like grass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.78, - "num_samples": 76480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.78, - "original_num_samples": 76480, - "transcript": "they lay in heaps of dozens even close up to the works" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.135, - "num_samples": 98160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.135, - "original_num_samples": 98160, - "transcript": "that night i stood guard under an oak tree on the battlefield among the unburied dead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.235, - "num_samples": 83760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.235, - "original_num_samples": 83760, - "transcript": "indeed we of the rank and file had little confidence in grant in those days" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.55, - "num_samples": 56800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0026.wav", - "speed": 1 - } - ], - "original_duration": 3.55, - "original_num_samples": 56800, - "transcript": "rosecrans protested it was in vain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.675, - "num_samples": 106800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0027.wav", - "speed": 1 - } - ], - "original_duration": 6.675, - "original_num_samples": 106800, - "transcript": "it required months and great events to make grant the hero of the army which he afterward became" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.87, - "num_samples": 61920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.87, - "original_num_samples": 61920, - "transcript": "for some reason the dead at hatchie bridge were not buried" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.79, - "num_samples": 188640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/105575/7976-105575-0029.wav", - "speed": 1 - } - ], - "original_duration": 11.79, - "original_num_samples": 188640, - "transcript": "a week after the battle my brother rode by there on a cavalry expedition and made the horrible discovery that hogs were eating up the bodies of our dead heroes that too was war" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.22, - "num_samples": 243520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0000.wav", - "speed": 1 - } - ], - "original_duration": 15.22, - "original_num_samples": 243520, - "transcript": "he had little enough to break or bite and once when there was a great famine in the land he could hardly procure even his daily bread and as he lay thinking in his bed one night he sighed and said to his wife what will become of us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.72, - "num_samples": 75520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.72, - "original_num_samples": 75520, - "transcript": "how can we feed our children when we have no more than we can eat ourselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.12, - "num_samples": 113920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.12, - "original_num_samples": 113920, - "transcript": "oh you simpleton said she then we must all four die of hunger you had better plane the coffins for us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.1, - "num_samples": 97600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.1, - "original_num_samples": 97600, - "transcript": "but she left him no peace till he consented saying ah but i shall miss the poor children" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.26, - "num_samples": 116160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.26, - "original_num_samples": 116160, - "transcript": "and as soon as their parents had gone to sleep he got up put on his coat and unbarring the back door went out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.795, - "num_samples": 124720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.795, - "original_num_samples": 124720, - "transcript": "ah father said hansel i am looking at my white cat sitting upon the roof of the house and trying to say good bye" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.815, - "num_samples": 125040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.815, - "original_num_samples": 125040, - "transcript": "but in reality hansel was not looking at a cat but every time he stopped he dropped a pebble out of his pocket upon the path" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.705, - "num_samples": 107280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.705, - "original_num_samples": 107280, - "transcript": "but her husband felt heavy at heart and thought it were better to share the last crust with the children" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.555, - "num_samples": 136880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.555, - "original_num_samples": 136880, - "transcript": "early in the morning the stepmother came and pulled them out of bed and gave them each a slice of bread which was still smaller than the former piece" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.725, - "num_samples": 107600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.725, - "original_num_samples": 107600, - "transcript": "we are going into the forest to hew wood and in the evening when we are ready we will come and fetch you again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.86, - "num_samples": 173760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.86, - "original_num_samples": 173760, - "transcript": "hansel thought the roof tasted very nice and so he tore off a great piece while grethel broke a large round pane out of the window and sat down quite contentedly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.82, - "num_samples": 141120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.82, - "original_num_samples": 141120, - "transcript": "come in and stop with me and no harm shall come to you and so saying she took them both by the hand and led them into her cottage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.28, - "num_samples": 260480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0012.wav", - "speed": 1 - } - ], - "original_duration": 16.28, - "original_num_samples": 260480, - "transcript": "the old woman behaved very kindly to them but in reality she was a wicked old witch who way laid children and built the breadhouse in order to entice them in but as soon as they were in her power she killed them cooked and ate them and made a great festival of the day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.32, - "num_samples": 149120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0013.wav", - "speed": 1 - } - ], - "original_duration": 9.32, - "original_num_samples": 149120, - "transcript": "then she took up hansel with her rough hand and shut him up in a little cage with a lattice door and although he screamed loudly it was of no use" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.295, - "num_samples": 100720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0014.wav", - "speed": 1 - } - ], - "original_duration": 6.295, - "original_num_samples": 100720, - "transcript": "grethel began to cry but it was all useless for the old witch made her do as she wanted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.75, - "num_samples": 124000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.75, - "original_num_samples": 124000, - "transcript": "grethel she cried in a passion get some water quickly be hansel fat or lean this morning i will kill and cook him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.135, - "num_samples": 50160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.135, - "original_num_samples": 50160, - "transcript": "dear good god help us now she prayed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.26, - "num_samples": 212160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0017.wav", - "speed": 1 - } - ], - "original_duration": 13.26, - "original_num_samples": 212160, - "transcript": "creep in said the witch and see if it is hot enough and then we will put in the bread but she intended when grethel got in to shut up the oven and let her bake so that she might eat her as well as hansel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.46, - "num_samples": 39360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.46, - "original_num_samples": 39360, - "transcript": "see i could even get in myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.825, - "num_samples": 45200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.825, - "original_num_samples": 45200, - "transcript": "and she got up and put her head into the oven" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.535, - "num_samples": 136560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0020.wav", - "speed": 1 - } - ], - "original_duration": 8.535, - "original_num_samples": 136560, - "transcript": "and now as there was nothing to fear they went back to the witch's house where in every corner were caskets full of pearls and precious stones" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.035, - "num_samples": 80560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7976/110523/7976-110523-0021.wav", - "speed": 1 - } - ], - "original_duration": 5.035, - "original_num_samples": 80560, - "transcript": "then they began to run and rushing into the house they fell upon their father's neck" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.95, - "num_samples": 239200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0000.wav", - "speed": 1 - } - ], - "original_duration": 14.95, - "original_num_samples": 239200, - "transcript": "fuchs brought up a sack of potatoes and a piece of cured pork from the cellar and grandmother packed some loaves of saturday's bread a jar of butter and several pumpkin pies in the straw of the wagon box" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.21, - "num_samples": 227360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0001.wav", - "speed": 1 - } - ], - "original_duration": 14.21, - "original_num_samples": 227360, - "transcript": "occasionally one of the horses would tear off with his teeth a plant full of blossoms and walk along munching it the flowers nodding in time to his bites as he ate down toward them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.395, - "num_samples": 70320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.395, - "original_num_samples": 70320, - "transcript": "it's no better than a badger hole no proper dugout at all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.495, - "num_samples": 39920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.495, - "original_num_samples": 39920, - "transcript": "now why is that otto" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.025, - "num_samples": 144400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0004.wav", - "speed": 1 - } - ], - "original_duration": 9.025, - "original_num_samples": 144400, - "transcript": "presently against one of those banks i saw a sort of shed thatched with the same wine colored grass that grew everywhere" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.47, - "num_samples": 55520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.47, - "original_num_samples": 55520, - "transcript": "very glad very glad she ejaculated" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.38, - "num_samples": 86080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.38, - "original_num_samples": 86080, - "transcript": "you'll get fixed up comfortable after while missus shimerda make good house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.865, - "num_samples": 93840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.865, - "original_num_samples": 93840, - "transcript": "my grandmother always spoke in a very loud tone to foreigners as if they were deaf" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.92, - "num_samples": 238720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0008.wav", - "speed": 1 - } - ], - "original_duration": 14.92, - "original_num_samples": 238720, - "transcript": "she made missus shimerda understand the friendly intention of our visit and the bohemian woman handled the loaves of bread and even smelled them and examined the pies with lively curiosity exclaiming much good much thank" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.16, - "num_samples": 82560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.16, - "original_num_samples": 82560, - "transcript": "the family had been living on corncakes and sorghum molasses for three days" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.855, - "num_samples": 61680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.855, - "original_num_samples": 61680, - "transcript": "i remembered what the conductor had said about her eyes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.23, - "num_samples": 83680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.23, - "original_num_samples": 83680, - "transcript": "her skin was brown too and in her cheeks she had a glow of rich dark color" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.57, - "num_samples": 73120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.57, - "original_num_samples": 73120, - "transcript": "even from a distance one could see that there was something strange about this boy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.89, - "num_samples": 46240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.89, - "original_num_samples": 46240, - "transcript": "he was born like that the others are smart" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.185, - "num_samples": 34960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.185, - "original_num_samples": 34960, - "transcript": "ambrosch he make good farmer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.175, - "num_samples": 66800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.175, - "original_num_samples": 66800, - "transcript": "he struck ambrosch on the back and the boy smiled knowingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.17, - "num_samples": 50720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.17, - "original_num_samples": 50720, - "transcript": "at that moment the father came out of the hole in the bank" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.3, - "num_samples": 100800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.3, - "original_num_samples": 100800, - "transcript": "it was so long that it bushed out behind his ears and made him look like the old portraits i remembered in virginia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.535, - "num_samples": 56560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0018.wav", - "speed": 1 - } - ], - "original_duration": 3.535, - "original_num_samples": 56560, - "transcript": "i noticed how white and well shaped his own hands were" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.135, - "num_samples": 98160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.135, - "original_num_samples": 98160, - "transcript": "we stood panting on the edge of the ravine looking down at the trees and bushes that grew below us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.78, - "num_samples": 108480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.78, - "original_num_samples": 108480, - "transcript": "the wind was so strong that i had to hold my hat on and the girls skirts were blown out before them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.935, - "num_samples": 78960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.935, - "original_num_samples": 78960, - "transcript": "she looked at me her eyes fairly blazing with things she could not say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.54, - "num_samples": 120640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.54, - "original_num_samples": 120640, - "transcript": "she pointed into the gold cottonwood tree behind whose top we stood and said again what name" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.14, - "num_samples": 66240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.14, - "original_num_samples": 66240, - "transcript": "antonia pointed up to the sky and questioned me with her glance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.085, - "num_samples": 49360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.085, - "original_num_samples": 49360, - "transcript": "she got up on her knees and wrung her hands" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.41, - "num_samples": 38560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.41, - "original_num_samples": 38560, - "transcript": "she was quick and very eager" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.915, - "num_samples": 110640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0026.wav", - "speed": 1 - } - ], - "original_duration": 6.915, - "original_num_samples": 110640, - "transcript": "we were so deep in the grass that we could see nothing but the blue sky over us and the gold tree in front of us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.73, - "num_samples": 123680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0027.wav", - "speed": 1 - } - ], - "original_duration": 7.73, - "original_num_samples": 123680, - "transcript": "after antonia had said the new words over and over she wanted to give me a little chased silver ring she wore on her middle finger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.88, - "num_samples": 94080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0028.wav", - "speed": 1 - } - ], - "original_duration": 5.88, - "original_num_samples": 94080, - "transcript": "when i came up he touched my shoulder and looked searchingly down into my face for several seconds" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.65, - "num_samples": 90400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/147956/1988-147956-0029.wav", - "speed": 1 - } - ], - "original_duration": 5.65, - "original_num_samples": 90400, - "transcript": "i became somewhat embarrassed for i was used to being taken for granted by my elders" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.825, - "num_samples": 269200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0000.wav", - "speed": 1 - } - ], - "original_duration": 16.825, - "original_num_samples": 269200, - "transcript": "in aristocratic communities the people readily give themselves up to bursts of tumultuous and boisterous gayety which shake off at once the recollection of their privations the natives of democracies are not fond of being thus violently broken in upon and they never lose sight of their own selves without regret" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.02, - "num_samples": 176320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.02, - "original_num_samples": 176320, - "transcript": "an american instead of going in a leisure hour to dance merrily at some place of public resort as the fellows of his calling continue to do throughout the greater part of europe shuts himself up at home to drink" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.45, - "num_samples": 71200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.45, - "original_num_samples": 71200, - "transcript": "i believe the seriousness of the americans arises partly from their pride" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.58, - "num_samples": 89280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.58, - "original_num_samples": 89280, - "transcript": "this is more especially the case amongst those free nations which form democratic communities" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.7, - "num_samples": 219200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0004.wav", - "speed": 1 - } - ], - "original_duration": 13.7, - "original_num_samples": 219200, - "transcript": "then there are in all classes a very large number of men constantly occupied with the serious affairs of the government and those whose thoughts are not engaged in the direction of the commonwealth are wholly engrossed by the acquisition of a private fortune" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.35, - "num_samples": 261600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0005.wav", - "speed": 1 - } - ], - "original_duration": 16.35, - "original_num_samples": 261600, - "transcript": "i do not believe in such republics any more than in that of plato or if the things we read of really happened i do not hesitate to affirm that these supposed democracies were composed of very different elements from ours and that they had nothing in common with the latter except their name" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.865, - "num_samples": 365840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0006.wav", - "speed": 1 - } - ], - "original_duration": 22.865, - "original_num_samples": 365840, - "transcript": "in aristocracies every man has one sole object which he unceasingly pursues but amongst democratic nations the existence of man is more complex the same mind will almost always embrace several objects at the same time and these objects are frequently wholly foreign to each other as it cannot know them all well the mind is readily satisfied with imperfect notions of each" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.44, - "num_samples": 119040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0007.wav", - "speed": 1 - } - ], - "original_duration": 7.44, - "original_num_samples": 119040, - "transcript": "chapter sixteen why the national vanity of the americans is more restless and captious than that of the english" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.815, - "num_samples": 109040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.815, - "original_num_samples": 109040, - "transcript": "the americans in their intercourse with strangers appear impatient of the smallest censure and insatiable of praise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.4, - "num_samples": 118400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.4, - "original_num_samples": 118400, - "transcript": "if i say to an american that the country he lives in is a fine one ay he replies there is not its fellow in the world" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.685, - "num_samples": 138960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.685, - "original_num_samples": 138960, - "transcript": "if i applaud the freedom which its inhabitants enjoy he answers freedom is a fine thing but few nations are worthy to enjoy it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.55, - "num_samples": 152800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.55, - "original_num_samples": 152800, - "transcript": "in aristocratic countries the great possess immense privileges upon which their pride rests without seeking to rely upon the lesser advantages which accrue to them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.655, - "num_samples": 218480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0012.wav", - "speed": 1 - } - ], - "original_duration": 13.655, - "original_num_samples": 218480, - "transcript": "they therefore entertain a calm sense of their superiority they do not dream of vaunting privileges which everyone perceives and no one contests and these things are not sufficiently new to them to be made topics of conversation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.405, - "num_samples": 166480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0013.wav", - "speed": 1 - } - ], - "original_duration": 10.405, - "original_num_samples": 166480, - "transcript": "they stand unmoved in their solitary greatness well assured that they are seen of all the world without any effort to show themselves off and that no one will attempt to drive them from that position" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.615, - "num_samples": 169840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0014.wav", - "speed": 1 - } - ], - "original_duration": 10.615, - "original_num_samples": 169840, - "transcript": "when an aristocracy carries on the public affairs its national pride naturally assumes this reserved indifferent and haughty form which is imitated by all the other classes of the nation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.335, - "num_samples": 277360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/148538/1988-148538-0015.wav", - "speed": 1 - } - ], - "original_duration": 17.335, - "original_num_samples": 277360, - "transcript": "these persons then displayed towards each other precisely the same puerile jealousies which animate the men of democracies the same eagerness to snatch the smallest advantages which their equals contested and the same desire to parade ostentatiously those of which they were in possession" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.32, - "num_samples": 53120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.32, - "original_num_samples": 53120, - "transcript": "the two stray kittens gradually make themselves at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.895, - "num_samples": 126320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.895, - "original_num_samples": 126320, - "transcript": "somehow or other cat has taught them that he's in charge here and he just chases them for fun now and again when he's not busy sleeping" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.555, - "num_samples": 72880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.555, - "original_num_samples": 72880, - "transcript": "she doesn't pick them up but just having them in the room sure doesn't give her asthma" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.16, - "num_samples": 82560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.16, - "original_num_samples": 82560, - "transcript": "when are you getting rid of these cats i'm not fixing to start an annex to kate's cat home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.5, - "num_samples": 88000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.5, - "original_num_samples": 88000, - "transcript": "right away when i bring home my new program he says how come you're taking one less course this half" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.455, - "num_samples": 103280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.455, - "original_num_samples": 103280, - "transcript": "i explain that i'm taking music and also biology algebra english and french music he snorts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.98, - "num_samples": 31680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0006.wav", - "speed": 1 - } - ], - "original_duration": 1.98, - "original_num_samples": 31680, - "transcript": "pop it's a course" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.28, - "num_samples": 84480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.28, - "original_num_samples": 84480, - "transcript": "he does and for once i win a round i keep music for this semester" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.47, - "num_samples": 39520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.47, - "original_num_samples": 39520, - "transcript": "i'll be lucky if i have time to breathe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.52, - "num_samples": 104320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.52, - "original_num_samples": 104320, - "transcript": "sometimes schools do let kids take a lot of soft courses and then they're out on a limb later huh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.03, - "num_samples": 48480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.03, - "original_num_samples": 48480, - "transcript": "so he cares huh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.69, - "num_samples": 155040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.69, - "original_num_samples": 155040, - "transcript": "besides says tom half the reason you and your father are always bickering is that you're so much alike me like him sure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.64, - "num_samples": 58240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.64, - "original_num_samples": 58240, - "transcript": "as long as there's a bone on the floor the two of you worry it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.535, - "num_samples": 88560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.535, - "original_num_samples": 88560, - "transcript": "i get the pillows comfortably arranged on the floor with a big bottle of soda and a bag of popcorn within easy reach" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.53, - "num_samples": 40480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.53, - "original_num_samples": 40480, - "transcript": "pop goes right on tuning his channel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.745, - "num_samples": 75920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.745, - "original_num_samples": 75920, - "transcript": "you're getting altogether too upset about these programs stop it and behave yourself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.455, - "num_samples": 39280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.455, - "original_num_samples": 39280, - "transcript": "it's your fault mop it up yourself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.12, - "num_samples": 97920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.12, - "original_num_samples": 97920, - "transcript": "i hear the t v going for a few minutes then pop turns it off and goes in the kitchen to talk to mom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.35, - "num_samples": 85600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.35, - "original_num_samples": 85600, - "transcript": "well i don't think you should turn a guy's t v program off in the middle without even finding out about it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.715, - "num_samples": 43440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.715, - "original_num_samples": 43440, - "transcript": "i look at my watch it's a quarter to eleven" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.4, - "num_samples": 102400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.4, - "original_num_samples": 102400, - "transcript": "i turn off the television set i've lost track of what's happening and it doesn't seem to be the grandfather who's the spook after all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.28, - "num_samples": 68480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.28, - "original_num_samples": 68480, - "transcript": "it's the first time hilda has been to our house and tom introduces her around" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "i told tom we shouldn't come so late says hilda" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.32, - "num_samples": 69120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.32, - "original_num_samples": 69120, - "transcript": "tom says thanks and looks at hilda and she blushes really" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.005, - "num_samples": 96080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.005, - "original_num_samples": 96080, - "transcript": "tom drinks a little more coffee and then he goes on the trouble is i can't get married on this flower shop job" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.91, - "num_samples": 46560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.91, - "original_num_samples": 46560, - "transcript": "you know i'd get drafted in a year or two anyway" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.595, - "num_samples": 41520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.595, - "original_num_samples": 41520, - "transcript": "i've decided to enlist in the army" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.545, - "num_samples": 40720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.545, - "original_num_samples": 40720, - "transcript": "i'll have to check some more says tom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.95, - "num_samples": 47200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1988/24833/1988-24833-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.95, - "original_num_samples": 47200, - "transcript": "here's to you a long happy life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.74, - "num_samples": 43840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.74, - "original_num_samples": 43840, - "transcript": "all idealisation makes life poorer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.3, - "num_samples": 36800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.3, - "original_num_samples": 36800, - "transcript": "then why indulge in prophetic phantasies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.06, - "num_samples": 80960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.06, - "original_num_samples": 80960, - "transcript": "at best they can only interpret the mind of the prophet and can have no objective value" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.45, - "num_samples": 151200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.45, - "original_num_samples": 151200, - "transcript": "michaelis the ticket of leave apostle was speaking in an even voice a voice that wheezed as if deadened and oppressed by the layer of fat on his chest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.255, - "num_samples": 68080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.255, - "original_num_samples": 68080, - "transcript": "and ever since he had never managed to get his weight down as much as an ounce" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.12, - "num_samples": 177920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0005.wav", - "speed": 1 - } - ], - "original_duration": 11.12, - "original_num_samples": 177920, - "transcript": "with his elbow presenting no appearance of a joint but more like a bend in a dummy's limb thrown over the back of a chair he leaned forward slightly over his short and enormous thighs to spit into the grate" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.575, - "num_samples": 73200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.575, - "original_num_samples": 73200, - "transcript": "yes i had the time to think things out a little he added without emphasis" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.255, - "num_samples": 196080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0007.wav", - "speed": 1 - } - ], - "original_duration": 12.255, - "original_num_samples": 196080, - "transcript": "when he rose painfully the thrusting forward of a skinny groping hand deformed by gouty swellings suggested the effort of a moribund murderer summoning all his remaining strength for a last stab" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.425, - "num_samples": 70800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.425, - "original_num_samples": 70800, - "transcript": "his enunciation would have been almost totally unintelligible to a stranger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.815, - "num_samples": 77040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.815, - "original_num_samples": 77040, - "transcript": "the old terrorist turned slowly his head on his skinny neck from side to side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.42, - "num_samples": 54720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.42, - "original_num_samples": 54720, - "transcript": "and i could never get as many as three such men together" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.45, - "num_samples": 119200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.45, - "original_num_samples": 119200, - "transcript": "the possessors of property had not only to face the awakened proletariat but they had also to fight amongst themselves yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "struggle warfare was the condition of private ownership it was fatal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.15, - "num_samples": 242400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0013.wav", - "speed": 1 - } - ], - "original_duration": 15.15, - "original_num_samples": 242400, - "transcript": "ah he did not depend upon emotional excitement to keep up his belief no declamations no anger no visions of blood red flags waving or metaphorical lurid suns of vengeance rising above the horizon of a doomed society not he" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.42, - "num_samples": 118720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.42, - "original_num_samples": 118720, - "transcript": "don't you think that if i had not been the optimist i am i could not have found in fifteen years some means to cut my throat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.51, - "num_samples": 152160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0015.wav", - "speed": 1 - } - ], - "original_duration": 9.51, - "original_num_samples": 152160, - "transcript": "a harsh laugh from comrade ossipon cut the tirade dead short in a sudden faltering of the tongue and a bewildered unsteadiness of the apostle's mildly exalted eyes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.535, - "num_samples": 200560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0016.wav", - "speed": 1 - } - ], - "original_duration": 12.535, - "original_num_samples": 200560, - "transcript": "alexander ossipon got up tall in his threadbare blue serge suit under the low ceiling shook off the stiffness of long immobility and strolled away into the kitchen down two steps to look over stevie's shoulder" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.73, - "num_samples": 43680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.73, - "original_num_samples": 43680, - "transcript": "very characteristic perfectly typical" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.05, - "num_samples": 64800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.05, - "original_num_samples": 64800, - "transcript": "you would call that lad a degenerate would you mumbled mister verloc" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.94, - "num_samples": 79040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.94, - "original_num_samples": 79040, - "transcript": "it was karl yundt who was heard implacable to his last breath" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.205, - "num_samples": 35280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.205, - "original_num_samples": 35280, - "transcript": "lombroso is an ass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.635, - "num_samples": 58160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.635, - "original_num_samples": 58160, - "transcript": "for him the criminal is the prisoner simple is it not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.02, - "num_samples": 48320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.02, - "original_num_samples": 48320, - "transcript": "there was an extraordinary force of suggestion in this posturing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.02, - "num_samples": 112320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.02, - "original_num_samples": 112320, - "transcript": "the famous terrorist had never in his life raised personally as much as his little finger against the social edifice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.6, - "num_samples": 217600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0024.wav", - "speed": 1 - } - ], - "original_duration": 13.6, - "original_num_samples": 217600, - "transcript": "the shadow of his evil gift clung to him yet like the smell of a deadly drug in an old vial of poison emptied now useless ready to be thrown away upon the rubbish heap of things that had served their time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.18, - "num_samples": 146880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0025.wav", - "speed": 1 - } - ], - "original_duration": 9.18, - "original_num_samples": 146880, - "transcript": "michaelis the ticket of leave apostle smiled vaguely with his glued lips his pasty moon face drooped under the weight of melancholy assent" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.97, - "num_samples": 31520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0026.wav", - "speed": 1 - } - ], - "original_duration": 1.97, - "original_num_samples": 31520, - "transcript": "he had been a prisoner himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.33, - "num_samples": 69280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.33, - "original_num_samples": 69280, - "transcript": "his own skin had sizzled under the red hot brand he murmured softly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.77, - "num_samples": 204320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0028.wav", - "speed": 1 - } - ], - "original_duration": 12.77, - "original_num_samples": 204320, - "transcript": "you don't understand he began disdainfully but stopped short intimidated by the dead blackness of the cavernous eyes in the face turned slowly towards him with a blind stare as if guided only by the sound" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.585, - "num_samples": 57360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.585, - "original_num_samples": 57360, - "transcript": "he gave the discussion up with a slight shrug of the shoulders" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.465, - "num_samples": 103440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0030.wav", - "speed": 1 - } - ], - "original_duration": 6.465, - "original_num_samples": 103440, - "transcript": "stevie accustomed to move about disregarded had got up from the kitchen table carrying off his drawing to bed with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.065, - "num_samples": 177040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0031.wav", - "speed": 1 - } - ], - "original_duration": 11.065, - "original_num_samples": 177040, - "transcript": "the sheet of paper covered with circles dropped out of his fingers and he remained staring at the old terrorist as if rooted suddenly to the spot by his morbid horror and dread of physical pain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.85, - "num_samples": 93600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0032.wav", - "speed": 1 - } - ], - "original_duration": 5.85, - "original_num_samples": 93600, - "transcript": "his scared eyes blazed with indignation it would hurt terribly his mouth dropped open" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.655, - "num_samples": 58480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0033.wav", - "speed": 1 - } - ], - "original_duration": 3.655, - "original_num_samples": 58480, - "transcript": "comrade ossipon's face twitched with exasperation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.48, - "num_samples": 55680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0034.wav", - "speed": 1 - } - ], - "original_duration": 3.48, - "original_num_samples": 55680, - "transcript": "i don't say that protested michaelis gently" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.585, - "num_samples": 89360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0035.wav", - "speed": 1 - } - ], - "original_duration": 5.585, - "original_num_samples": 89360, - "transcript": "his vision of truth had grown so intense that the sound of a strange voice failed to rout it this time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.07, - "num_samples": 145120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0036.wav", - "speed": 1 - } - ], - "original_duration": 9.07, - "original_num_samples": 145120, - "transcript": "the coals in the grate settled down with a slight crash and michaelis the hermit of visions in the desert of a penitentiary got up impetuously" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.44, - "num_samples": 87040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0037.wav", - "speed": 1 - } - ], - "original_duration": 5.44, - "original_num_samples": 87040, - "transcript": "the disdainful pout of comrade ossipon's thick lips accentuated the negro type of his face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.1, - "num_samples": 49600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0038.wav", - "speed": 1 - } - ], - "original_duration": 3.1, - "original_num_samples": 49600, - "transcript": "he paused then added with modest firmness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.835, - "num_samples": 45360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0039.wav", - "speed": 1 - } - ], - "original_duration": 2.835, - "original_num_samples": 45360, - "transcript": "i would call it cannibalistic that's what it is" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.985, - "num_samples": 79760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0040.wav", - "speed": 1 - } - ], - "original_duration": 4.985, - "original_num_samples": 79760, - "transcript": "they are nourishing their greed on the quivering flesh and the warm blood of the people nothing else" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.09, - "num_samples": 161440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0041.wav", - "speed": 1 - } - ], - "original_duration": 10.09, - "original_num_samples": 161440, - "transcript": "stevie swallowed the terrifying statement with an audible gulp and at once as though it had been swift poison sank limply in a sitting posture on the steps of the kitchen door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.57, - "num_samples": 89120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0042.wav", - "speed": 1 - } - ], - "original_duration": 5.57, - "original_num_samples": 89120, - "transcript": "he closed the door behind their backs with restrained violence turned the key shot the bolt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.535, - "num_samples": 40560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0043.wav", - "speed": 1 - } - ], - "original_duration": 2.535, - "original_num_samples": 40560, - "transcript": "he was not satisfied with his friends" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.025, - "num_samples": 64400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0044.wav", - "speed": 1 - } - ], - "original_duration": 4.025, - "original_num_samples": 64400, - "transcript": "with the insight of a kindred temperament he pronounced his verdict" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.975, - "num_samples": 191600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0045.wav", - "speed": 1 - } - ], - "original_duration": 11.975, - "original_num_samples": 191600, - "transcript": "there are natures too to whose sense of justice the price exacted looms up monstrously enormous odious oppressive worrying humiliating extortionate intolerable those are the fanatics" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.245, - "num_samples": 35920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0046.wav", - "speed": 1 - } - ], - "original_duration": 2.245, - "original_num_samples": 35920, - "transcript": "in any case he had not the time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.125, - "num_samples": 130000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0047.wav", - "speed": 1 - } - ], - "original_duration": 8.125, - "original_num_samples": 130000, - "transcript": "loafing was all very well for these fellows who knew not mister vladimir and had women to fall back upon whereas he had a woman to provide for" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.975, - "num_samples": 95600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0048.wav", - "speed": 1 - } - ], - "original_duration": 5.975, - "original_num_samples": 95600, - "transcript": "these were but few and for the first time since he opened his shop he took a commercial survey of its value" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.25, - "num_samples": 36000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0049.wav", - "speed": 1 - } - ], - "original_duration": 2.25, - "original_num_samples": 36000, - "transcript": "this survey was unfavourable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.81, - "num_samples": 108960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0050.wav", - "speed": 1 - } - ], - "original_duration": 6.81, - "original_num_samples": 108960, - "transcript": "he took the cash box out of the drawer and turning to leave the shop became aware that stevie was still downstairs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0051.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "he looked dubiously at his brother in law but he did not ask him for information" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.95, - "num_samples": 79200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0052.wav", - "speed": 1 - } - ], - "original_duration": 4.95, - "original_num_samples": 79200, - "transcript": "mister verloc perceived with some surprise that he did not know really what to say to stevie" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.66, - "num_samples": 74560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0053.wav", - "speed": 1 - } - ], - "original_duration": 4.66, - "original_num_samples": 74560, - "transcript": "he stood still in the middle of the parlour and looked into the kitchen in silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.03, - "num_samples": 48480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0054.wav", - "speed": 1 - } - ], - "original_duration": 3.03, - "original_num_samples": 48480, - "transcript": "he watched him gesticulating and murmuring in the kitchen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.855, - "num_samples": 61680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0055.wav", - "speed": 1 - } - ], - "original_duration": 3.855, - "original_num_samples": 61680, - "transcript": "stevie prowled round the table like an excited animal in a cage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.26, - "num_samples": 148160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0056.wav", - "speed": 1 - } - ], - "original_duration": 9.26, - "original_num_samples": 148160, - "transcript": "the light thrown down by the shade fell dazzlingly on the white pillow sunk by the weight of her head reposing with closed eyes and dark hair done up in several plaits for the night" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.975, - "num_samples": 175600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0057.wav", - "speed": 1 - } - ], - "original_duration": 10.975, - "original_num_samples": 175600, - "transcript": "her bare feet as if poked through the bottom of an unadorned sleeved calico sack buttoned tightly at neck and wrists felt over the rug for the slippers while she looked upward into her husband's face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.225, - "num_samples": 83600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0058.wav", - "speed": 1 - } - ], - "original_duration": 5.225, - "original_num_samples": 83600, - "transcript": "there is no occupation that fails a man more completely than that of a secret agent of police" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.16, - "num_samples": 98560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0059.wav", - "speed": 1 - } - ], - "original_duration": 6.16, - "original_num_samples": 98560, - "transcript": "it's like your horse suddenly falling dead under you in the midst of an uninhabited and thirsty plain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.895, - "num_samples": 126320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0060.wav", - "speed": 1 - } - ], - "original_duration": 7.895, - "original_num_samples": 126320, - "transcript": "the comparison occurred to mister verloc because he had sat astride various army horses in his time and had now the sensation of an incipient fall" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.565, - "num_samples": 73040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0061.wav", - "speed": 1 - } - ], - "original_duration": 4.565, - "original_num_samples": 73040, - "transcript": "the prospect was as black as the window pane against which he was leaning his forehead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.375, - "num_samples": 38000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0062.wav", - "speed": 1 - } - ], - "original_duration": 2.375, - "original_num_samples": 38000, - "transcript": "yes not at all well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.17, - "num_samples": 290720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0063.wav", - "speed": 1 - } - ], - "original_duration": 18.17, - "original_num_samples": 290720, - "transcript": "down below in the quiet narrow street measured footsteps approached the house then died away unhurried and firm as if the passer by had started to pace out all eternity from gas lamp to gas lamp in a night without end and the drowsy ticking of the old clock on the landing became distinctly audible in the bedroom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.859938, - "num_samples": 93759, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0064.wav", - "speed": 1 - } - ], - "original_duration": 5.859938, - "original_num_samples": 93759, - "transcript": "that poor boy is in a very excited state to night she murmured after a pause which lasted for three ticks of the clock" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.225, - "num_samples": 83600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0065.wav", - "speed": 1 - } - ], - "original_duration": 5.225, - "original_num_samples": 83600, - "transcript": "this dread led him to make the remark that stevie had disregarded his suggestion to go to bed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.595, - "num_samples": 185520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0066.wav", - "speed": 1 - } - ], - "original_duration": 11.595, - "original_num_samples": 185520, - "transcript": "there was no young man of his age in london more willing and docile than stephen she affirmed none more affectionate and ready to please and even useful as long as people did not upset his poor head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.245, - "num_samples": 83920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0067.wav", - "speed": 1 - } - ], - "original_duration": 5.245, - "original_num_samples": 83920, - "transcript": "mister verloc's anxieties had prevented him from attaching any sense to what his wife was saying" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0068.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "that boy hears too much of what is talked about here" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.945, - "num_samples": 79120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0069.wav", - "speed": 1 - } - ], - "original_duration": 4.945, - "original_num_samples": 79120, - "transcript": "if i had known they were coming to night i would have seen to it that he went to bed at the same time i did" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.315, - "num_samples": 117040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0070.wav", - "speed": 1 - } - ], - "original_duration": 7.315, - "original_num_samples": 117040, - "transcript": "he was out of his mind with something he overheard about eating people's flesh and drinking blood what's the good of talking like that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.305, - "num_samples": 36880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0071.wav", - "speed": 1 - } - ], - "original_duration": 2.305, - "original_num_samples": 36880, - "transcript": "mister verloc was fully responsive now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.71, - "num_samples": 43360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0072.wav", - "speed": 1 - } - ], - "original_duration": 2.71, - "original_num_samples": 43360, - "transcript": "ask karl yundt he growled savagely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.8, - "num_samples": 60800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0073.wav", - "speed": 1 - } - ], - "original_duration": 3.8, - "original_num_samples": 60800, - "transcript": "he isn't fit to hear what's said here he believes it's all true" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.48, - "num_samples": 55680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0074.wav", - "speed": 1 - } - ], - "original_duration": 3.48, - "original_num_samples": 55680, - "transcript": "he knows no better he gets into his passions over it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.495, - "num_samples": 71920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0075.wav", - "speed": 1 - } - ], - "original_duration": 4.495, - "original_num_samples": 71920, - "transcript": "he glared at me as if he didn't know who i was when i went downstairs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.26, - "num_samples": 68160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0076.wav", - "speed": 1 - } - ], - "original_duration": 4.26, - "original_num_samples": 68160, - "transcript": "i wish he had never been to school missus verloc began again brusquely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.405, - "num_samples": 38480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0077.wav", - "speed": 1 - } - ], - "original_duration": 2.405, - "original_num_samples": 38480, - "transcript": "he gets a red face poring over them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0078.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "i wouldn't give a halfpenny for the whole lot" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.04, - "num_samples": 160640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0079.wav", - "speed": 1 - } - ], - "original_duration": 10.04, - "original_num_samples": 160640, - "transcript": "the other day stevie got hold of one and there was a story in it of a german soldier officer tearing half off the ear of a recruit and nothing was done to him for it the brute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.535, - "num_samples": 40560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0080.wav", - "speed": 1 - } - ], - "original_duration": 2.535, - "original_num_samples": 40560, - "transcript": "he can't stand the notion of any cruelty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.505, - "num_samples": 24080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/777/126732/777-126732-0081.wav", - "speed": 1 - } - ], - "original_duration": 1.505, - "original_num_samples": 24080, - "transcript": "comfortable dear" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.09, - "num_samples": 33440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.09, - "original_num_samples": 33440, - "transcript": "go do you hear" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.99, - "num_samples": 63840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.99, - "original_num_samples": 63840, - "transcript": "but in less than five minutes the staircase groaned beneath an extraordinary weight" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.69, - "num_samples": 219040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0002.wav", - "speed": 1 - } - ], - "original_duration": 13.69, - "original_num_samples": 219040, - "transcript": "at this moment the whole soul of the old man seemed centred in his eyes which became bloodshot the veins of the throat swelled his cheeks and temples became purple as though he was struck with epilepsy nothing was wanting to complete this but the utterance of a cry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.8, - "num_samples": 108800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.8, - "original_num_samples": 108800, - "transcript": "and the cry issued from his pores if we may thus speak a cry frightful in its silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.4, - "num_samples": 70400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.4, - "original_num_samples": 70400, - "transcript": "d'avrigny rushed towards the old man and made him inhale a powerful restorative" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.96, - "num_samples": 255360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0005.wav", - "speed": 1 - } - ], - "original_duration": 15.96, - "original_num_samples": 255360, - "transcript": "d'avrigny unable to bear the sight of this touching emotion turned away and villefort without seeking any further explanation and attracted towards him by the irresistible magnetism which draws us towards those who have loved the people for whom we mourn extended his hand towards the young man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.62, - "num_samples": 89920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.62, - "original_num_samples": 89920, - "transcript": "for some time nothing was heard in that chamber but sobs exclamations and prayers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.0, - "num_samples": 32000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.0, - "original_num_samples": 32000, - "transcript": "what do you mean sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.015, - "num_samples": 112240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0008.wav", - "speed": 1 - } - ], - "original_duration": 7.015, - "original_num_samples": 112240, - "transcript": "oh you rave sir exclaimed villefort in vain endeavoring to escape the net in which he was taken i rave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.695063, - "num_samples": 43121, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.695063, - "original_num_samples": 43121, - "transcript": "do you know the assassin asked morrel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.19, - "num_samples": 115040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.19, - "original_num_samples": 115040, - "transcript": "noirtier looked upon morrel with one of those melancholy smiles which had so often made valentine happy and thus fixed his attention" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.25, - "num_samples": 52000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.25, - "original_num_samples": 52000, - "transcript": "said morrel sadly yes replied noirtier" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.56, - "num_samples": 40960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.56, - "original_num_samples": 40960, - "transcript": "the old man's eyes remained fixed on the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.4, - "num_samples": 38400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.4, - "original_num_samples": 38400, - "transcript": "asked morrel yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.885, - "num_samples": 46160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.885, - "original_num_samples": 46160, - "transcript": "must i leave alone no" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.035, - "num_samples": 48560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.035, - "original_num_samples": 48560, - "transcript": "but can he understand you yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.41, - "num_samples": 134560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.41, - "original_num_samples": 134560, - "transcript": "gentlemen he said in a hoarse voice give me your word of honor that this horrible secret shall forever remain buried amongst ourselves the two men drew back" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.375, - "num_samples": 150000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0017.wav", - "speed": 1 - } - ], - "original_duration": 9.375, - "original_num_samples": 150000, - "transcript": "my father has revealed the culprit's name my father thirsts for revenge as much as you do yet even he conjures you as i do to keep this secret do you not father" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.55, - "num_samples": 56800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0018.wav", - "speed": 1 - } - ], - "original_duration": 3.55, - "original_num_samples": 56800, - "transcript": "morrel suffered an exclamation of horror and surprise to escape him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.435, - "num_samples": 38960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.435, - "original_num_samples": 38960, - "transcript": "the old man made a sign in the affirmative" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.92, - "num_samples": 110720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.92, - "original_num_samples": 110720, - "transcript": "it was something terrible to witness the silent agony the mute despair of noirtier whose tears silently rolled down his cheeks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.815, - "num_samples": 77040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.815, - "original_num_samples": 77040, - "transcript": "but he stopped on the landing he had not the courage to again visit the death chamber" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.61, - "num_samples": 41760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.61, - "original_num_samples": 41760, - "transcript": "the two doctors therefore entered the room alone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.685, - "num_samples": 74960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.685, - "original_num_samples": 74960, - "transcript": "noirtier was near the bed pale motionless and silent as the corpse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.38, - "num_samples": 166080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0024.wav", - "speed": 1 - } - ], - "original_duration": 10.38, - "original_num_samples": 166080, - "transcript": "the district doctor approached with the indifference of a man accustomed to spend half his time amongst the dead he then lifted the sheet which was placed over the face and just unclosed the lips" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.255, - "num_samples": 100080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0025.wav", - "speed": 1 - } - ], - "original_duration": 6.255, - "original_num_samples": 100080, - "transcript": "the nearest said the district doctor is a good italian abbe who lives next door to you shall i call on him as i pass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.94, - "num_samples": 223040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0026.wav", - "speed": 1 - } - ], - "original_duration": 13.94, - "original_num_samples": 223040, - "transcript": "d'avrigny said villefort be so kind i beseech you as to accompany this gentleman here is the key of the door so that you can go in and out as you please you will bring the priest with you and will oblige me by introducing him into my child's room do you wish to see him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.07, - "num_samples": 49120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.07, - "original_num_samples": 49120, - "transcript": "i only wish to be alone you will excuse me will you not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.76, - "num_samples": 76160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121123/84-121123-0028.wav", - "speed": 1 - } - ], - "original_duration": 4.76, - "original_num_samples": 76160, - "transcript": "i am going sir and i do not hesitate to say that no prayers will be more fervent than mine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.435, - "num_samples": 134960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0000.wav", - "speed": 1 - } - ], - "original_duration": 8.435, - "original_num_samples": 134960, - "transcript": "but with full ravishment the hours of prime singing received they in the midst of leaves that ever bore a burden to their rhymes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.975, - "num_samples": 127600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.975, - "original_num_samples": 127600, - "transcript": "all waters that on earth most limpid are would seem to have within themselves some mixture compared with that which nothing doth conceal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.265, - "num_samples": 132240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.265, - "original_num_samples": 132240, - "transcript": "thou makest me remember where and what proserpina that moment was when lost her mother her and she herself the spring" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.16, - "num_samples": 130560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.16, - "original_num_samples": 130560, - "transcript": "i do not think there shone so great a light under the lids of venus when transfixed by her own son beyond his usual custom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.965, - "num_samples": 127440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.965, - "original_num_samples": 127440, - "transcript": "ye are new comers and because i smile began she peradventure in this place elect to human nature for its nest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.714937, - "num_samples": 139439, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.714937, - "original_num_samples": 139439, - "transcript": "some apprehension keeps you marvelling but the psalm delectasti giveth light which has the power to uncloud your intellect" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.83, - "num_samples": 141280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.83, - "original_num_samples": 141280, - "transcript": "the good supreme sole in itself delighting created man good and this goodly place gave him as hansel of eternal peace" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.525, - "num_samples": 152400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.525, - "original_num_samples": 152400, - "transcript": "by his default short while he sojourned here by his default to weeping and to toil he changed his innocent laughter and sweet play" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.445, - "num_samples": 135120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.445, - "original_num_samples": 135120, - "transcript": "this every other savour doth transcend and notwithstanding slaked so far may be thy thirst that i reveal to thee no more" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.615, - "num_samples": 137840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.615, - "original_num_samples": 137840, - "transcript": "between her steps and mine were not a hundred when equally the margins gave a turn in such a way that to the east i faced" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.33, - "num_samples": 133280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.33, - "original_num_samples": 133280, - "transcript": "nor even thus our way continued far before the lady wholly turned herself unto me saying brother look and listen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.355, - "num_samples": 149680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.355, - "original_num_samples": 149680, - "transcript": "now helicon must needs pour forth for me and with her choir urania must assist me to put in verse things difficult to think" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.24, - "num_samples": 147840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.24, - "original_num_samples": 147840, - "transcript": "then back i turned my face to those high things which moved themselves towards us so sedately they had been distanced by new wedded brides" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.11, - "num_samples": 305760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0013.wav", - "speed": 1 - } - ], - "original_duration": 19.11, - "original_num_samples": 305760, - "transcript": "and i beheld the flamelets onward go leaving behind themselves the air depicted and they of trailing pennons had the semblance so that it overhead remained distinct with sevenfold lists all of them of the colours whence the sun's bow is made and delia's girdle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.185, - "num_samples": 130960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.185, - "original_num_samples": 130960, - "transcript": "these standards to the rearward longer were than was my sight and as it seemed to me ten paces were the outermost apart" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.155, - "num_samples": 130480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.155, - "original_num_samples": 130480, - "transcript": "the interval between these four contained a chariot triumphal on two wheels which by a griffin's neck came drawn along" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.955, - "num_samples": 143280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.955, - "original_num_samples": 143280, - "transcript": "not only rome with no such splendid car e'er gladdened africanus or augustus but poor to it that of the sun would be" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.645, - "num_samples": 138320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.645, - "original_num_samples": 138320, - "transcript": "three maidens at the right wheel in a circle came onward dancing one so very red that in the fire she hardly had been noted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.46, - "num_samples": 151360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0018.wav", - "speed": 1 - } - ], - "original_duration": 9.46, - "original_num_samples": 151360, - "transcript": "the second was as if her flesh and bones had all been fashioned out of emerald the third appeared as snow but newly fallen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.279938, - "num_samples": 148479, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0019.wav", - "speed": 1 - } - ], - "original_duration": 9.279938, - "original_num_samples": 148479, - "transcript": "in rear of all the group here treated of two old men i beheld unlike in habit but like in gait each dignified and grave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.39, - "num_samples": 134240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0020.wav", - "speed": 1 - } - ], - "original_duration": 8.39, - "original_num_samples": 134240, - "transcript": "and when the car was opposite to me thunder was heard and all that folk august seemed to have further progress interdicted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.71, - "num_samples": 139360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0021.wav", - "speed": 1 - } - ], - "original_duration": 8.71, - "original_num_samples": 139360, - "transcript": "and one of them as if by heaven commissioned singing veni sponsa de libano shouted three times and all the others after" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.65, - "num_samples": 122400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.65, - "original_num_samples": 122400, - "transcript": "as soon as on my vision smote the power sublime that had already pierced me through ere from my boyhood i had yet come forth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.215, - "num_samples": 115440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.215, - "original_num_samples": 115440, - "transcript": "to the left hand i turned with that reliance with which the little child runs to his mother when he has fear or when he is afflicted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.63, - "num_samples": 138080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0024.wav", - "speed": 1 - } - ], - "original_duration": 8.63, - "original_num_samples": 138080, - "transcript": "to say unto virgilius not a drachm of blood remains in me that does not tremble i know the traces of the ancient flame" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.84, - "num_samples": 157440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0025.wav", - "speed": 1 - } - ], - "original_duration": 9.84, - "original_num_samples": 157440, - "transcript": "dante because virgilius has departed do not weep yet do not weep yet awhile for by another sword thou need'st must weep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.35, - "num_samples": 149600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0026.wav", - "speed": 1 - } - ], - "original_duration": 9.35, - "original_num_samples": 149600, - "transcript": "i saw the lady who erewhile appeared veiled underneath the angelic festival direct her eyes to me across the river" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.995, - "num_samples": 63920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.995, - "original_num_samples": 63920, - "transcript": "look at me well in sooth i'm beatrice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.64, - "num_samples": 122240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0028.wav", - "speed": 1 - } - ], - "original_duration": 7.64, - "original_num_samples": 122240, - "transcript": "ye keep your watch in the eternal day so that nor night nor sleep can steal from you one step the ages make upon their path" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.385, - "num_samples": 134160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0029.wav", - "speed": 1 - } - ], - "original_duration": 8.385, - "original_num_samples": 134160, - "transcript": "therefore my answer is with greater care that he may hear me who is weeping yonder so that the sin and dole be of one measure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.925, - "num_samples": 142800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0030.wav", - "speed": 1 - } - ], - "original_duration": 8.925, - "original_num_samples": 142800, - "transcript": "but by the largess of celestial graces which have such lofty vapours for their rain that near to them our sight approaches not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.655, - "num_samples": 138480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0031.wav", - "speed": 1 - } - ], - "original_duration": 8.655, - "original_num_samples": 138480, - "transcript": "as soon as ever of my second age i was upon the threshold and changed life himself from me he took and gave to others" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.25, - "num_samples": 132000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0032.wav", - "speed": 1 - } - ], - "original_duration": 8.25, - "original_num_samples": 132000, - "transcript": "so low he fell that all appliances for his salvation were already short save showing him the people of perdition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.02, - "num_samples": 128320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0033.wav", - "speed": 1 - } - ], - "original_duration": 8.02, - "original_num_samples": 128320, - "transcript": "confusion and dismay together mingled forced such a yes from out my mouth that sight was needful to the understanding of it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.945, - "num_samples": 143120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0034.wav", - "speed": 1 - } - ], - "original_duration": 8.945, - "original_num_samples": 143120, - "transcript": "whence she to me in those desires of mine which led thee to the loving of that good beyond which there is nothing to aspire to" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.22, - "num_samples": 131520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/84/121550/84-121550-0035.wav", - "speed": 1 - } - ], - "original_duration": 8.22, - "original_num_samples": 131520, - "transcript": "and what allurements or what vantages upon the forehead of the others showed that thou shouldst turn thy footsteps unto them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.675, - "num_samples": 234800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0000.wav", - "speed": 1 - } - ], - "original_duration": 14.675, - "original_num_samples": 234800, - "transcript": "a laudable regard for the honor of the first proselyte has countenanced the belief the hope the wish that the ebionites or at least the nazarenes were distinguished only by their obstinate perseverance in the practice of the mosaic rites" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.05, - "num_samples": 224800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0001.wav", - "speed": 1 - } - ], - "original_duration": 14.05, - "original_num_samples": 224800, - "transcript": "their churches have disappeared their books are obliterated their obscure freedom might allow a latitude of faith and the softness of their infant creed would be variously moulded by the zeal or prudence of three hundred years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.85, - "num_samples": 125600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.85, - "original_num_samples": 125600, - "transcript": "yet the most charitable criticism must refuse these sectaries any knowledge of the pure and proper divinity of christ" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.355, - "num_samples": 181680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.355, - "original_num_samples": 181680, - "transcript": "his progress from infancy to youth and manhood was marked by a regular increase in stature and wisdom and after a painful agony of mind and body he expired on the cross" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.915, - "num_samples": 318640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0004.wav", - "speed": 1 - } - ], - "original_duration": 19.915, - "original_num_samples": 318640, - "transcript": "he lived and died for the service of mankind but the life and death of socrates had likewise been devoted to the cause of religion and justice and although the stoic or the hero may disdain the humble virtues of jesus the tears which he shed over his friend and country may be esteemed the purest evidence of his humanity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.740063, - "num_samples": 187841, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0005.wav", - "speed": 1 - } - ], - "original_duration": 11.740063, - "original_num_samples": 187841, - "transcript": "the son of a virgin generated by the ineffable operation of the holy spirit was a creature without example or resemblance superior in every attribute of mind and body to the children of adam" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.92, - "num_samples": 254720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0006.wav", - "speed": 1 - } - ], - "original_duration": 15.92, - "original_num_samples": 254720, - "transcript": "nor could it seem strange or incredible that the first of these aeons the logos or word of god of the same substance with the father should descend upon earth to deliver the human race from vice and error and to conduct them in the paths of life and immortality" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.095, - "num_samples": 113520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0007.wav", - "speed": 1 - } - ], - "original_duration": 7.095, - "original_num_samples": 113520, - "transcript": "but the prevailing doctrine of the eternity and inherent pravity of matter infected the primitive churches of the east" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.65, - "num_samples": 282400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0008.wav", - "speed": 1 - } - ], - "original_duration": 17.65, - "original_num_samples": 282400, - "transcript": "many among the gentile proselytes refused to believe that a celestial spirit an undivided portion of the first essence had been personally united with a mass of impure and contaminated flesh and in their zeal for the divinity they piously abjured the humanity of christ" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.46, - "num_samples": 263360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0009.wav", - "speed": 1 - } - ], - "original_duration": 16.46, - "original_num_samples": 263360, - "transcript": "he first appeared on the banks of the jordan in the form of perfect manhood but it was a form only and not a substance a human figure created by the hand of omnipotence to imitate the faculties and actions of a man and to impose a perpetual illusion on the senses of his friends and enemies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.995, - "num_samples": 191920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0010.wav", - "speed": 1 - } - ], - "original_duration": 11.995, - "original_num_samples": 191920, - "transcript": "but the rashness of these concessions has encouraged a milder sentiment of those of the docetes who taught not that christ was a phantom but that he was clothed with an impassible and incorruptible body" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.415, - "num_samples": 262640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0011.wav", - "speed": 1 - } - ], - "original_duration": 16.415, - "original_num_samples": 262640, - "transcript": "a foetus that could increase from an invisible point to its full maturity a child that could attain the stature of perfect manhood without deriving any nourishment from the ordinary sources might continue to exist without repairing a daily waste by a daily supply of external matter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.47, - "num_samples": 231520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0012.wav", - "speed": 1 - } - ], - "original_duration": 14.47, - "original_num_samples": 231520, - "transcript": "in their eyes jesus of nazareth was a mere mortal the legitimate son of joseph and mary but he was the best and wisest of the human race selected as the worthy instrument to restore upon earth the worship of the true and supreme deity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.52, - "num_samples": 248320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0013.wav", - "speed": 1 - } - ], - "original_duration": 15.52, - "original_num_samples": 248320, - "transcript": "when the messiah was delivered into the hands of the jews the christ an immortal and impassible being forsook his earthly tabernacle flew back to the pleroma or world of spirits and left the solitary jesus to suffer to complain and to expire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.985, - "num_samples": 223760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0014.wav", - "speed": 1 - } - ], - "original_duration": 13.985, - "original_num_samples": 223760, - "transcript": "but the justice and generosity of such a desertion are strongly questionable and the fate of an innocent martyr at first impelled and at length abandoned by his divine companion might provoke the pity and indignation of the profane" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.305, - "num_samples": 100880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.305, - "original_num_samples": 100880, - "transcript": "their murmurs were variously silenced by the sectaries who espoused and modified the double system of cerinthus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.455, - "num_samples": 231280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0016.wav", - "speed": 1 - } - ], - "original_duration": 14.455, - "original_num_samples": 231280, - "transcript": "the worthy friend of athanasius the worthy antagonist of julian he bravely wrestled with the arians and polytheists and though he affected the rigor of geometrical demonstration his commentaries revealed the literal and allegorical sense of the scriptures" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.43, - "num_samples": 134880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.43, - "original_num_samples": 134880, - "transcript": "yet as the profound doctor had been terrified at his own rashness apollinaris was heard to mutter some faint accents of excuse and explanation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.655, - "num_samples": 202480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0018.wav", - "speed": 1 - } - ], - "original_duration": 12.655, - "original_num_samples": 202480, - "transcript": "he acquiesced in the old distinction of the greek philosophers between the rational and sensitive soul of man that he might reserve the logos for intellectual functions and employ the subordinate human principle in the meaner actions of animal life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.485, - "num_samples": 247760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0019.wav", - "speed": 1 - } - ], - "original_duration": 15.485, - "original_num_samples": 247760, - "transcript": "but instead of a temporary and occasional alliance they established and we still embrace the substantial indissoluble and everlasting union of a perfect god with a perfect man of the second person of the trinity with a reasonable soul and human flesh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.02, - "num_samples": 240320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143396/1673-143396-0020.wav", - "speed": 1 - } - ], - "original_duration": 15.02, - "original_num_samples": 240320, - "transcript": "under the tuition of the abbot serapion he applied himself to ecclesiastical studies with such indefatigable ardor that in the course of one sleepless night he has perused the four gospels the catholic epistles and the epistle to the romans" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.35, - "num_samples": 149600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.35, - "original_num_samples": 149600, - "transcript": "ardent in the prosecution of heresy cyril auspiciously opened his reign by oppressing the novatians the most innocent and harmless of the sectaries" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.895, - "num_samples": 142320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.895, - "original_num_samples": 142320, - "transcript": "without any legal sentence without any royal mandate the patriarch at the dawn of day led a seditious multitude to the attack of the synagogues" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.03, - "num_samples": 208480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0002.wav", - "speed": 1 - } - ], - "original_duration": 13.03, - "original_num_samples": 208480, - "transcript": "such crimes would have deserved the animadversion of the magistrate but in this promiscuous outrage the innocent were confounded with the guilty and alexandria was impoverished by the loss of a wealthy and industrious colony" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.18, - "num_samples": 178880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.18, - "original_num_samples": 178880, - "transcript": "the zeal of cyril exposed him to the penalties of the julian law but in a feeble government and a superstitious age he was secure of impunity and even of praise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.07, - "num_samples": 193120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0004.wav", - "speed": 1 - } - ], - "original_duration": 12.07, - "original_num_samples": 193120, - "transcript": "orestes complained but his just complaints were too quickly forgotten by the ministers of theodosius and too deeply remembered by a priest who affected to pardon and continued to hate the praefect of egypt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.895, - "num_samples": 158320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.895, - "original_num_samples": 158320, - "transcript": "a rumor was spread among the christians that the daughter of theon was the only obstacle to the reconciliation of the praefect and the archbishop and that obstacle was speedily removed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.535, - "num_samples": 136560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.535, - "original_num_samples": 136560, - "transcript": "which oppressed the metropolitans of europe and asia invaded the provinces of antioch and alexandria and measured their diocese by the limits of the empire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.16, - "num_samples": 82560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.16, - "original_num_samples": 82560, - "transcript": "exterminate with me the heretics and with you i will exterminate the persians" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.275, - "num_samples": 68400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.275, - "original_num_samples": 68400, - "transcript": "at these blasphemous sounds the pillars of the sanctuary were shaken" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.6, - "num_samples": 57600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.6, - "original_num_samples": 57600, - "transcript": "but the vatican received with open arms the messengers of egypt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.0, - "num_samples": 192000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0010.wav", - "speed": 1 - } - ], - "original_duration": 12.0, - "original_num_samples": 192000, - "transcript": "the vanity of celestine was flattered by the appeal and the partial version of a monk decided the faith of the pope who with his latin clergy was ignorant of the language the arts and the theology of the greeks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.195, - "num_samples": 227120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0011.wav", - "speed": 1 - } - ], - "original_duration": 14.195, - "original_num_samples": 227120, - "transcript": "nestorius who depended on the near approach of his eastern friends persisted like his predecessor chrysostom to disclaim the jurisdiction and to disobey the summons of his enemies they hastened his trial and his accuser presided in the seat of judgment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.33, - "num_samples": 149280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.33, - "original_num_samples": 149280, - "transcript": "sixty eight bishops twenty two of metropolitan rank defended his cause by a modest and temperate protest they were excluded from the councils of their brethren" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.675, - "num_samples": 106800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.675, - "original_num_samples": 106800, - "transcript": "by the vigilance of memnon the churches were shut against them and a strong garrison was thrown into the cathedral" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.395, - "num_samples": 150320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0014.wav", - "speed": 1 - } - ], - "original_duration": 9.395, - "original_num_samples": 150320, - "transcript": "during a busy period of three months the emperor tried every method except the most effectual means of indifference and contempt to reconcile this theological quarrel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.345, - "num_samples": 101520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.345, - "original_num_samples": 101520, - "transcript": "return to your provinces and may your private virtues repair the mischief and scandal of your meeting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.285, - "num_samples": 244560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0016.wav", - "speed": 1 - } - ], - "original_duration": 15.285, - "original_num_samples": 244560, - "transcript": "the feeble son of arcadius was alternately swayed by his wife and sister by the eunuchs and women of the palace superstition and avarice were their ruling passions and the orthodox chiefs were assiduous in their endeavors to alarm the former and to gratify the latter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.055, - "num_samples": 112880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.055, - "original_num_samples": 112880, - "transcript": "but in this awful moment of the danger of the church their vow was superseded by a more sublime and indispensable duty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.265, - "num_samples": 68240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.265, - "original_num_samples": 68240, - "transcript": "at the same time every avenue of the throne was assaulted with gold" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.695, - "num_samples": 251120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0019.wav", - "speed": 1 - } - ], - "original_duration": 15.695, - "original_num_samples": 251120, - "transcript": "the past he regretted he was discontented with the present and the future he had reason to dread the oriental bishops successively disengaged their cause from his unpopular name and each day decreased the number of the schismatics who revered nestorius as the confessor of the faith" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.32, - "num_samples": 261120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1673/143397/1673-143397-0020.wav", - "speed": 1 - } - ], - "original_duration": 16.32, - "original_num_samples": 261120, - "transcript": "a wandering tribe of the blemmyes or nubians invaded his solitary prison in their retreat they dismissed a crowd of useless captives but no sooner had nestorius reached the banks of the nile than he would gladly have escaped from a roman and orthodox city to the milder servitude of the savages" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.58, - "num_samples": 57280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.58, - "original_num_samples": 57280, - "transcript": "what are you doing here he asked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.64, - "num_samples": 74240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.64, - "original_num_samples": 74240, - "transcript": "you have been to the hotel he burst out you have seen catherine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.94, - "num_samples": 127040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.94, - "original_num_samples": 127040, - "transcript": "we have both seen the same newspaper of course and you have been the first to clear the thing up that's it isn't it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.245, - "num_samples": 179920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.245, - "original_num_samples": 179920, - "transcript": "not satisfied with gossip in private the greedy public appetite devours gossip in print and wants more of it than any one editor can supply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "supposing the report had been true" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.18, - "num_samples": 242880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0005.wav", - "speed": 1 - } - ], - "original_duration": 15.18, - "original_num_samples": 242880, - "transcript": "in the meantime after what missus presty had confessed the cruel falsehood which had checked poor kitty's natural inquiries raised an insuperable obstacle to a meeting between father and child" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.68, - "num_samples": 106880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.68, - "original_num_samples": 106880, - "transcript": "he had promised to do his best toward persuading catherine to grant sydney an interview" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.82, - "num_samples": 157120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.82, - "original_num_samples": 157120, - "transcript": "even the chance of successfully confiding her to bennydeck's protection had lost something of its fair promise since randal's visit to sydenham" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.84, - "num_samples": 125440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0008.wav", - "speed": 1 - } - ], - "original_duration": 7.84, - "original_num_samples": 125440, - "transcript": "be the results however what they might randal could see but one plain course before him now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.25, - "num_samples": 132000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.25, - "original_num_samples": 132000, - "transcript": "considerations of delicacy seemed to forbid taking this liberty even with an intimate friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.68, - "num_samples": 218880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0010.wav", - "speed": 1 - } - ], - "original_duration": 13.68, - "original_num_samples": 218880, - "transcript": "what happier future could await her especially if she justified randal's past experience of all that was candid and truthful in her character than to become his friend's wife" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.17, - "num_samples": 82720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.17, - "original_num_samples": 82720, - "transcript": "shall i say that she may expect an early visit from you when i see her to morrow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.7, - "num_samples": 91200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.7, - "original_num_samples": 91200, - "transcript": "he added sydney's address in a postscript and dispatched his letter that evening" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.19, - "num_samples": 195040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275156/8297-275156-0013.wav", - "speed": 1 - } - ], - "original_duration": 12.19, - "original_num_samples": 195040, - "transcript": "but it might perhaps be excusable to infer that the marriage had not yet been decided on and that the captain's proposals were still waiting for catherine's reply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.22, - "num_samples": 163520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.22, - "original_num_samples": 163520, - "transcript": "not having heard from captain bennydeck for some little time randal thought it desirable in sydney's interests to make inquiries at his club" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.34, - "num_samples": 165440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.34, - "original_num_samples": 165440, - "transcript": "the sailing master announced that he had orders to take the vessel back to her port with no other explanation than that the cruise was over" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.76, - "num_samples": 156160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.76, - "original_num_samples": 156160, - "transcript": "this alternative in the captain's plans terminating the voyage a month earlier than his arrangements had contemplated puzzled randal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.24, - "num_samples": 83840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.24, - "original_num_samples": 83840, - "transcript": "randal waited a while in london on the chance that bennydeck might pay him a visit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.655, - "num_samples": 106480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.655, - "original_num_samples": 106480, - "transcript": "he is staying at this hotel to try the air of sydenham and he finds that it agrees with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.78, - "num_samples": 44480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.78, - "original_num_samples": 44480, - "transcript": "had her beauty fascinated him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.0, - "num_samples": 224000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0006.wav", - "speed": 1 - } - ], - "original_duration": 14.0, - "original_num_samples": 224000, - "transcript": "randal wrote to accept the invitation determining to present himself before the appointed hour and to question catherine privately without giving her the advantage over him of preparing herself for the interview" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.49, - "num_samples": 71840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.49, - "original_num_samples": 71840, - "transcript": "after months of separation he received a visit from herbert" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.04, - "num_samples": 64640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.04, - "original_num_samples": 64640, - "transcript": "you distress me herbert more than words can say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.48, - "num_samples": 39680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.48, - "original_num_samples": 39680, - "transcript": "he drank the wine greedily" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.61, - "num_samples": 121760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.61, - "original_num_samples": 121760, - "transcript": "whatever reviving effect it might otherwise have produced on him it made no change in the threatening gloom of his manner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.76, - "num_samples": 76160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.76, - "original_num_samples": 76160, - "transcript": "he put down the empty glass taking no notice of his brother's question" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.97, - "num_samples": 79520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.97, - "original_num_samples": 79520, - "transcript": "randal he said you know where sydney is" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.135, - "num_samples": 50160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.135, - "original_num_samples": 50160, - "transcript": "i will do neither the one nor the other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.085, - "num_samples": 289360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0014.wav", - "speed": 1 - } - ], - "original_duration": 18.085, - "original_num_samples": 289360, - "transcript": "you don't know what it is to be used to seeing a pretty creature always nicely dressed always about the room thinking so much of you and so little of herself and then to be left alone as i am left out in the dark" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.22, - "num_samples": 51520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.22, - "original_num_samples": 51520, - "transcript": "i'm alone do you hear that alone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.87, - "num_samples": 45920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.87, - "original_num_samples": 45920, - "transcript": "i haven't courage enough to do it for myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "oh why did i engage that governess" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.205, - "num_samples": 35280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.205, - "original_num_samples": 35280, - "transcript": "i'm afraid he said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.88, - "num_samples": 126080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0019.wav", - "speed": 1 - } - ], - "original_duration": 7.88, - "original_num_samples": 126080, - "transcript": "i tried it yesterday it set my brains on fire i'm feeling that glass i took just now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.470063, - "num_samples": 71521, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.470063, - "original_num_samples": 71521, - "transcript": "he paused and put his hand to his fevered head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.810062, - "num_samples": 76961, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.810062, - "original_num_samples": 76961, - "transcript": "was his mind wandering into some other train of thought" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.255, - "num_samples": 36080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.255, - "original_num_samples": 36080, - "transcript": "you can't do it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.6, - "num_samples": 41600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.6, - "original_num_samples": 41600, - "transcript": "let me hear what it is first" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.665, - "num_samples": 58640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.665, - "original_num_samples": 58640, - "transcript": "i feel for you herbert he said warmly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.88, - "num_samples": 78080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.88, - "original_num_samples": 78080, - "transcript": "she shall have your message all that i can do to persuade her shall be done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.89, - "num_samples": 78240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.89, - "original_num_samples": 78240, - "transcript": "let me rest a little he pleaded if i'm not in the way" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.75, - "num_samples": 140000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275154/8297-275154-0027.wav", - "speed": 1 - } - ], - "original_duration": 8.75, - "original_num_samples": 140000, - "transcript": "he mentioned the name of one of the old servants at mount morven who had attached himself to randal after the breakup of the family" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.47, - "num_samples": 167520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.47, - "original_num_samples": 167520, - "transcript": "on the next day but one randal arranged his departure for sydenham so as to arrive at the hotel an hour before the time appointed for the dinner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.625, - "num_samples": 106000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.625, - "original_num_samples": 106000, - "transcript": "the new number of a popular weekly journal had that day been published randal bought it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.85, - "num_samples": 141600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.85, - "original_num_samples": 141600, - "transcript": "after reading one or two of the political articles he arrived at the columns specially devoted to fashionable intelligence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.55, - "num_samples": 56800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.55, - "original_num_samples": 56800, - "transcript": "randal looked again at the first words in the paragraph" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.65, - "num_samples": 186400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0004.wav", - "speed": 1 - } - ], - "original_duration": 11.65, - "original_num_samples": 186400, - "transcript": "with his own suspicions steadily contradicting him he arrived at the hotel obstinately believing that the charming widow would prove to be a stranger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.14, - "num_samples": 130240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.14, - "original_num_samples": 130240, - "transcript": "missus norman and her little daughter were out driving with a friend and were expected to return in good time for dinner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.915, - "num_samples": 110640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.915, - "original_num_samples": 110640, - "transcript": "missus presty was at home she was reported to be in the garden of the hotel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.795, - "num_samples": 60720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.795, - "original_num_samples": 60720, - "transcript": "how nice of you to come so soon she began" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.22, - "num_samples": 51520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.22, - "original_num_samples": 51520, - "transcript": "she added looking at him suspiciously" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.145, - "num_samples": 98320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.145, - "original_num_samples": 98320, - "transcript": "but you ought to have known that we are only half an hour behind you at sydenham in the matter of news" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.105, - "num_samples": 49680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.105, - "original_num_samples": 49680, - "transcript": "the report is premature my good friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.33, - "num_samples": 149280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.33, - "original_num_samples": 149280, - "transcript": "but if these newspaper people waited to find out whether a report is true or false how much gossip would society get in its favorite newspapers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.925, - "num_samples": 46800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.925, - "original_num_samples": 46800, - "transcript": "you are to understand that catherine is a widow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.335, - "num_samples": 53360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.335, - "original_num_samples": 53360, - "transcript": "sit down said missus presty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.44, - "num_samples": 135040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.44, - "original_num_samples": 135040, - "transcript": "you shall hear how my divorced daughter and my poor little grandchild were treated at sandyseal after you left us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.81, - "num_samples": 108960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.81, - "original_num_samples": 108960, - "transcript": "you would have seen her pining for the company of other children and would have had no mercy on her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.52, - "num_samples": 136320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.52, - "original_num_samples": 136320, - "transcript": "he was introduced to missus norman and to missus norman's little girl and we were all charmed with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.91, - "num_samples": 142560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.91, - "original_num_samples": 142560, - "transcript": "when he and i happened to be left together he naturally wondered after having seen the beautiful wife where the lucky husband might be" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.845, - "num_samples": 125520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.845, - "original_num_samples": 125520, - "transcript": "and the captain of course concluded after having been introduced to kitty that missus norman was a widow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.2, - "num_samples": 99200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.2, - "original_num_samples": 99200, - "transcript": "worse stories have been printed i do assure you worse stories have been printed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.93, - "num_samples": 110880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.93, - "original_num_samples": 110880, - "transcript": "before i consented to answer the child's inquiries i came to an understanding with her mother" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.775, - "num_samples": 28400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0021.wav", - "speed": 1 - } - ], - "original_duration": 1.775, - "original_num_samples": 28400, - "transcript": "honestly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.69, - "num_samples": 107040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.69, - "original_num_samples": 107040, - "transcript": "after that i had her mother's authority for telling kitty that she would never see her father again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "she asked directly if her father was dead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.78, - "num_samples": 124480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0024.wav", - "speed": 1 - } - ], - "original_duration": 7.78, - "original_num_samples": 124480, - "transcript": "that will do missus presty your defense is thoroughly worthy of your conduct in all other respects" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.18, - "num_samples": 50880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.18, - "original_num_samples": 50880, - "transcript": "randal passed this over without notice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.455, - "num_samples": 55280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0026.wav", - "speed": 1 - } - ], - "original_duration": 3.455, - "original_num_samples": 55280, - "transcript": "a very wise decision she remarked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.15, - "num_samples": 50400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.15, - "original_num_samples": 50400, - "transcript": "have you any message for captain bennydeck" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.65, - "num_samples": 58400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.65, - "original_num_samples": 58400, - "transcript": "not at the hotel just now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.46, - "num_samples": 167360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0029.wav", - "speed": 1 - } - ], - "original_duration": 10.46, - "original_num_samples": 167360, - "transcript": "it was a relief to randal in the present state of catherine's relations toward bennydeck to return to london without having seen his friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.34, - "num_samples": 37440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.34, - "original_num_samples": 37440, - "transcript": "good by dear randal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0031.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "arrived at the station randal found that he must wait for the train" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.305, - "num_samples": 164880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8297/275155/8297-275155-0032.wav", - "speed": 1 - } - ], - "original_duration": 10.305, - "original_num_samples": 164880, - "transcript": "while he was walking up and down the platform with a mind doubly distressed by anxiety about his brother and anxiety about sydney the train from london came in" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.175, - "num_samples": 50800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.175, - "original_num_samples": 50800, - "transcript": "the savage philosopher the dual mind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.875, - "num_samples": 158000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.875, - "original_num_samples": 158000, - "transcript": "the original attitude of the american indian toward the eternal the great mystery that surrounds and embraces us was as simple as it was exalted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.11, - "num_samples": 161760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.11, - "original_num_samples": 161760, - "transcript": "it was silent because all speech is of necessity feeble and imperfect therefore the souls of my ancestors ascended to god in wordless adoration" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.04, - "num_samples": 96640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.04, - "original_num_samples": 96640, - "transcript": "none might exhort or confess or in any way meddle with the religious experience of another" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.825, - "num_samples": 93200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.825, - "original_num_samples": 93200, - "transcript": "among us all men were created sons of god and stood erect as conscious of their divinity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.065, - "num_samples": 225040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0005.wav", - "speed": 1 - } - ], - "original_duration": 14.065, - "original_num_samples": 225040, - "transcript": "that solitary communion with the unseen which was the highest expression of our religious life is partly described in the word bambeday literally mysterious feeling which has been variously translated fasting and dreaming" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.41, - "num_samples": 166560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.41, - "original_num_samples": 166560, - "transcript": "the first bambeday or religious retreat marked an epoch in the life of the youth which may be compared to that of confirmation or conversion in christian experience" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.42, - "num_samples": 150720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.42, - "original_num_samples": 150720, - "transcript": "knowing that god sets no value upon material things he took with him no offerings or sacrifices other than symbolic objects such as paints and tobacco" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.39, - "num_samples": 326240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0008.wav", - "speed": 1 - } - ], - "original_duration": 20.39, - "original_num_samples": 326240, - "transcript": "at the solemn hour of sunrise or sunset he took up his position overlooking the glories of earth and facing the great mystery and there he remained naked erect silent and motionless exposed to the elements and forces of his arming for a night and a day to two days and nights but rarely longer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.065, - "num_samples": 129040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.065, - "original_num_samples": 129040, - "transcript": "when he returned to the camp he must remain at a distance until he had again entered the vapor bath and prepared himself for intercourse with his fellows" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.14, - "num_samples": 130240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.14, - "original_num_samples": 130240, - "transcript": "it was not then wholly from ignorance or improvidence that he failed to establish permanent towns and to develop a material civilization" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.59, - "num_samples": 121440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.59, - "original_num_samples": 121440, - "transcript": "to the untutored sage the concentration of population was the prolific mother of all evils moral no less than physical" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.385, - "num_samples": 54160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.385, - "original_num_samples": 54160, - "transcript": "in this type of prayer there was no beseeching of favor or help" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.4, - "num_samples": 166400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0013.wav", - "speed": 1 - } - ], - "original_duration": 10.4, - "original_num_samples": 166400, - "transcript": "from the sun as the universal father proceeds the quickening principle in nature and in the patient and fruitful womb of our mother the earth are hidden embryos of plants and men" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.83, - "num_samples": 45280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.83, - "original_num_samples": 45280, - "transcript": "this is the material or physical prayer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.96, - "num_samples": 95360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.96, - "original_num_samples": 95360, - "transcript": "nothing of the marvelous could astonish him as that a beast should speak or the sun stand still" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.425, - "num_samples": 38800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.425, - "original_num_samples": 38800, - "transcript": "who may condemn his superstition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.805, - "num_samples": 220880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0017.wav", - "speed": 1 - } - ], - "original_duration": 13.805, - "original_num_samples": 220880, - "transcript": "here is the supreme mystery that is the essence of worship without which there can be no religion and in the presence of this mystery our attitude cannot be very unlike that of the natural philosopher who beholds with awe the divine in all creation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.065, - "num_samples": 193040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0018.wav", - "speed": 1 - } - ], - "original_duration": 12.065, - "original_num_samples": 193040, - "transcript": "in his own thought he rose superior to them he scorned them even as a lofty spirit absorbed in its stern task rejects the soft beds the luxurious food the pleasure worshiping dalliance of a rich neighbor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.26, - "num_samples": 84160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43358/5536-43358-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.26, - "original_num_samples": 84160, - "transcript": "the historians of the white race admit that the indian was never the first to repudiate his oath" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.35, - "num_samples": 53600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.35, - "original_num_samples": 53600, - "transcript": "he had neither a national army nor an organized church" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.36, - "num_samples": 165760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.36, - "original_num_samples": 165760, - "transcript": "her attitude and secret meditations must be such as to instill into the receptive soul of the unborn child the love of the great mystery and a sense of brotherhood with all creation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.04, - "num_samples": 176640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0002.wav", - "speed": 1 - } - ], - "original_duration": 11.04, - "original_num_samples": 176640, - "transcript": "the ordeal is best met alone where no curious or pitying eyes embarrass her where all nature says to her spirit tis love tis love the fulfilling of life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.265, - "num_samples": 196240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0003.wav", - "speed": 1 - } - ], - "original_duration": 12.265, - "original_num_samples": 196240, - "transcript": "this wild mother has not only the experience of her mother and grandmother and the accepted rules of her people for a guide but she humbly seeks to learn a lesson from ants bees spiders beavers and badgers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.32, - "num_samples": 101120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.32, - "original_num_samples": 101120, - "transcript": "in due time the child takes of his own accord the attitude of prayer and speaks reverently of the powers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.755, - "num_samples": 108080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.755, - "original_num_samples": 108080, - "transcript": "indeed the distinctive work of both grandparents is that of acquainting the youth with the national traditions and beliefs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.34, - "num_samples": 69440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.34, - "original_num_samples": 69440, - "transcript": "the family was not only the social unit but also the unit of government" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.175, - "num_samples": 274800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0007.wav", - "speed": 1 - } - ], - "original_duration": 17.175, - "original_num_samples": 274800, - "transcript": "the remoter degrees of kinship were fully recognized and that not as a matter of form only first cousins were known as brothers and sisters the name of cousin constituted a binding claim and our rigid morality forbade marriage between cousins in any known degree or in other words within the clan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.69, - "num_samples": 43040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.69, - "original_num_samples": 43040, - "transcript": "when she fell the whole race fell with her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.645, - "num_samples": 106320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.645, - "original_num_samples": 106320, - "transcript": "before this calamity came upon us you could not find anywhere a happier home than that created by the indian woman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.775, - "num_samples": 60400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.775, - "original_num_samples": 60400, - "transcript": "his daily devotions were more necessary to him than daily food" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.275, - "num_samples": 308400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0011.wav", - "speed": 1 - } - ], - "original_duration": 19.275, - "original_num_samples": 308400, - "transcript": "whenever in the course of the daily hunt the red hunter comes upon a scene that is strikingly beautiful or sublime a black thundercloud with the rainbow's glowing arch above the mountain a white waterfall in the heart of a green gorge a vast prairie tinged with the blood red of sunset he pauses for an instant in the attitude of worship" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.815, - "num_samples": 77040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.815, - "original_num_samples": 77040, - "transcript": "when he becomes an old man he loves to make a notable effort to prove his gratitude" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.645, - "num_samples": 106320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.645, - "original_num_samples": 106320, - "transcript": "he cuts off the choicest morsel of the meat and casts it into the fire the purest and most ethereal element" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.545, - "num_samples": 72720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.545, - "original_num_samples": 72720, - "transcript": "the hospitality of the wigwam is only limited by the institution of war" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.625, - "num_samples": 74000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.625, - "original_num_samples": 74000, - "transcript": "our honor is the guarantee for his safety so long as he is within the camp" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.05, - "num_samples": 96800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.05, - "original_num_samples": 96800, - "transcript": "love between man and woman is founded on the mating instinct and is not free from desire and self seeking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.51, - "num_samples": 88160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.51, - "original_num_samples": 88160, - "transcript": "but to have a friend and to be true under any and all trials is the mark of a man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.98, - "num_samples": 95680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43359/5536-43359-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.98, - "original_num_samples": 95680, - "transcript": "this bond is between man and man is usually formed in early youth and can only be broken by death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.82, - "num_samples": 45120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.82, - "original_num_samples": 45120, - "transcript": "reincarnation and the converse of spirits" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.14, - "num_samples": 114240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.14, - "original_num_samples": 114240, - "transcript": "therefore he courts death in battle on the other hand he would regard it as disgraceful to be killed in a private quarrel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.02, - "num_samples": 112320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.02, - "original_num_samples": 112320, - "transcript": "the men blacken their faces and widows or bereaved parents sometimes gash their arms and legs till they are covered with blood" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.21, - "num_samples": 163360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0003.wav", - "speed": 1 - } - ], - "original_duration": 10.21, - "original_num_samples": 163360, - "transcript": "giving themselves up wholly to their grief they are no longer concerned about any earthly possession and often give away all that they have to the first comers even to their beds and their home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.375, - "num_samples": 150000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0004.wav", - "speed": 1 - } - ], - "original_duration": 9.375, - "original_num_samples": 150000, - "transcript": "it was prepared by dressing in the finest clothes together with some personal possessions and ornaments wrapped in several robes and finally in a secure covering of raw hide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.56, - "num_samples": 264960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0005.wav", - "speed": 1 - } - ], - "original_duration": 16.56, - "original_num_samples": 264960, - "transcript": "as a special mark of respect the body of a young woman or a warrior was sometimes laid out in state in a new teepee with the usual household articles and even with a dish of food left beside it not that they supposed the spirit could use the implements or eat the food but merely as a last tribute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.985, - "num_samples": 191760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0006.wav", - "speed": 1 - } - ], - "original_duration": 11.985, - "original_num_samples": 191760, - "transcript": "if a man were slain in battle it was an old custom to place his body against a tree or rock in a sitting position always facing the enemy to indicate his undaunted defiance and bravery even in death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.9, - "num_samples": 158400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.9, - "original_num_samples": 158400, - "transcript": "at every meal time a dish of food was placed under it and some person of the same sex and age as the one who was gone must afterward be invited in to partake of the food" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.29, - "num_samples": 164640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.29, - "original_num_samples": 164640, - "transcript": "at the end of a year from the time of death the relatives made a public feast and gave away the clothing and other gifts while the lock of hair was interred with appropriate ceremonies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.88, - "num_samples": 334080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0009.wav", - "speed": 1 - } - ], - "original_duration": 20.88, - "original_num_samples": 334080, - "transcript": "it is well known that the american indian had somehow developed occult power and although in the latter days there have been many impostors and allowing for the vanity and weakness of human nature it is fair to assume that there must have been some even in the old days yet there are well attested instances of remarkable prophecies and other mystic practice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.26, - "num_samples": 164160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.26, - "original_num_samples": 164160, - "transcript": "no doubt many predictions have been colored to suit the new age and unquestionably false prophets fakirs and conjurers have become the pest of the tribes during the transition period" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.09, - "num_samples": 33440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.09, - "original_num_samples": 33440, - "transcript": "this was carried out to the letter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.265, - "num_samples": 52240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.265, - "original_num_samples": 52240, - "transcript": "this was only one of his remarkable prophecies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.425, - "num_samples": 118800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0013.wav", - "speed": 1 - } - ], - "original_duration": 7.425, - "original_num_samples": 118800, - "transcript": "another famous medicine man was born on the rum river about one hundred and fifty years ago and lived to be over a century old" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.345, - "num_samples": 181520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0014.wav", - "speed": 1 - } - ], - "original_duration": 11.345, - "original_num_samples": 181520, - "transcript": "at the age of about seventy five years he saved his band from utter destruction at the hands of their ancestral enemies by suddenly giving warning received in a dream of the approach of a large war party" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.74, - "num_samples": 75840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.74, - "original_num_samples": 75840, - "transcript": "five years later he repeated the service and again saved his people from awful slaughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.825, - "num_samples": 109200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.825, - "original_num_samples": 109200, - "transcript": "there are many trustworthy men and men of christian faith to vouch for these and similar events occurring as foretold" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.195, - "num_samples": 147120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0017.wav", - "speed": 1 - } - ], - "original_duration": 9.195, - "original_num_samples": 147120, - "transcript": "at another time when i was fourteen years old we had just left fort ellis on the assiniboine river and my youngest uncle had selected a fine spot for our night camp" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.89, - "num_samples": 126240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.89, - "original_num_samples": 126240, - "transcript": "many of the indians believed that one may be born more than once and there were some who claimed to have full knowledge of a former incarnation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.25, - "num_samples": 116000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5536/43363/5536-43363-0019.wav", - "speed": 1 - } - ], - "original_duration": 7.25, - "original_num_samples": 116000, - "transcript": "there was a well known sioux war prophet who lived in the middle of the last century so that he is still remembered by the old men of his band" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.88, - "num_samples": 158080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.88, - "original_num_samples": 158080, - "transcript": "she herself should have been a poem a lyric in a white gown and green scarf coming to him through the long grass under the blossomed boughs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.625, - "num_samples": 138000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.625, - "original_num_samples": 138000, - "transcript": "her hands should have been full of bluebells and she should have held them up to his face in maidenly defence as he sprang forward to take her in his arms" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.55, - "num_samples": 120800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.55, - "original_num_samples": 120800, - "transcript": "you see that she knew exactly how a tryst is conducted in the pages of the standard poets and of the cheaper weekly journals" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.535, - "num_samples": 88560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.535, - "original_num_samples": 88560, - "transcript": "she had to the full limit allowed of her reading and her environment the literary sense" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.415, - "num_samples": 70640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.415, - "original_num_samples": 70640, - "transcript": "and curiously enough she was hardly curious at all about what he might have to say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.36, - "num_samples": 165760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.36, - "original_num_samples": 165760, - "transcript": "she only wished for may and the orchard instead of january and the dingy dusty waiting room the plain faced preoccupied travellers the dim desolate weather" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.065, - "num_samples": 49040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.065, - "original_num_samples": 49040, - "transcript": "the setting of the scene seemed to her all important" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.37, - "num_samples": 69920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.37, - "original_num_samples": 69920, - "transcript": "it was bitterly cold but the embankment was more romantic than a railway carriage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.03, - "num_samples": 64480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.03, - "original_num_samples": 64480, - "transcript": "he had been late he had offered no excuse no explanation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.125, - "num_samples": 98000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.125, - "original_num_samples": 98000, - "transcript": "but here the only thing that occurred to her was to stop and look in one of the shops till he should ask her what she was looking at" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.45, - "num_samples": 71200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.45, - "original_num_samples": 71200, - "transcript": "the keen wind thrust itself even inside the high collar of her jacket" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.13, - "num_samples": 34080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.13, - "original_num_samples": 34080, - "transcript": "her hands and feet were aching with cold" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.925, - "num_samples": 94800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.925, - "original_num_samples": 94800, - "transcript": "she would have shared his sorrow and shown herself half wife half angel from heaven in this dark hour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.575, - "num_samples": 41200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.575, - "original_num_samples": 41200, - "transcript": "she said how frightfully cold it is" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.095, - "num_samples": 97520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0014.wav", - "speed": 1 - } - ], - "original_duration": 6.095, - "original_num_samples": 97520, - "transcript": "and yesterday i had a letter from her and she seems to expect to think and i thought i ought to tell you darling" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.025, - "num_samples": 96400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.025, - "original_num_samples": 96400, - "transcript": "a shock of unbelievable relief tingled through her so that was all what was it compared with her fears" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.72, - "num_samples": 107520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.72, - "original_num_samples": 107520, - "transcript": "what opinion would he form of the purity of her mind the innocence of her soul if an incident like this failed to shock her deeply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.585, - "num_samples": 105360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.585, - "original_num_samples": 105360, - "transcript": "following the tingle of relief came a sharp sickening pinch of jealousy and mortification these inspired her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.2, - "num_samples": 99200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.2, - "original_num_samples": 99200, - "transcript": "i don't wonder you were afraid to tell me she began you don't love me you've never loved me i was an idiot to believe you did" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.815, - "num_samples": 61040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.815, - "original_num_samples": 61040, - "transcript": "those four true words wounded her more than all the rest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.16, - "num_samples": 50560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0020.wav", - "speed": 1 - } - ], - "original_duration": 3.16, - "original_num_samples": 50560, - "transcript": "couldn't help it then how can i ever trust you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.165, - "num_samples": 34640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.165, - "original_num_samples": 34640, - "transcript": "do you think i'm not sorry now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.57, - "num_samples": 41120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.57, - "original_num_samples": 41120, - "transcript": "no it's only painful for both of us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.53, - "num_samples": 88480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0023.wav", - "speed": 1 - } - ], - "original_duration": 5.53, - "original_num_samples": 88480, - "transcript": "i didn't think a decent man could do such things she was pulling on her gloves go home and gloat over it all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.535, - "num_samples": 40560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.535, - "original_num_samples": 40560, - "transcript": "he stood up suddenly do you mean it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.935, - "num_samples": 46960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.935, - "original_num_samples": 46960, - "transcript": "are you really going to throw me over for a thing like this" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.19, - "num_samples": 83040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0026.wav", - "speed": 1 - } - ], - "original_duration": 5.19, - "original_num_samples": 83040, - "transcript": "and he strode down between the marble tables and out by the swing door it was a very good exit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.425, - "num_samples": 102800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0027.wav", - "speed": 1 - } - ], - "original_duration": 6.425, - "original_num_samples": 102800, - "transcript": "at the corner he remembered that he had gone away without paying for the tea and his natural impulse was to go back and remedy that error" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.94, - "num_samples": 31040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0028.wav", - "speed": 1 - } - ], - "original_duration": 1.94, - "original_num_samples": 31040, - "transcript": "he checked the silly impulse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.135, - "num_samples": 210160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93302/6345-93302-0029.wav", - "speed": 1 - } - ], - "original_duration": 13.135, - "original_num_samples": 210160, - "transcript": "so he enlisted and went to south africa and he never came home covered with medals and glory which was rather his idea to the few simple words of explanation that would have made all straight and repaid her and him for all the past" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.995, - "num_samples": 335920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0000.wav", - "speed": 1 - } - ], - "original_duration": 20.995, - "original_num_samples": 335920, - "transcript": "the last strains of the ill treated ill fated intermezzo had died away and after them had died away also the rumbling of the wheels of the murderous barrel organ that had so gaily executed that along with the nine other tunes of its repertory to the admiration of the housemaid at the window of the house opposite and the crowing delight of the two babies next door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.46, - "num_samples": 231360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0001.wav", - "speed": 1 - } - ], - "original_duration": 14.46, - "original_num_samples": 231360, - "transcript": "the young man drew a deep breath of relief and lighted the wax candles in the solid silver candlesticks on his writing table for now the late summer dusk was falling and that organ please heaven made full the measure of the day's appointed torture" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.13, - "num_samples": 82080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.13, - "original_num_samples": 82080, - "transcript": "then there was silence then a sigh and the sound of light moving feet on the gravel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.49, - "num_samples": 39840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.49, - "original_num_samples": 39840, - "transcript": "and again he listened with a quiet pleasure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.17, - "num_samples": 50720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.17, - "original_num_samples": 50720, - "transcript": "never had any act seemed so impossible" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.515, - "num_samples": 72240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.515, - "original_num_samples": 72240, - "transcript": "there is a seat in the garden at the side of the house again she hesitated" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.6, - "num_samples": 73600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.6, - "original_num_samples": 73600, - "transcript": "then she turned towards the quarter indicated and disappeared round the laurel bushes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.125, - "num_samples": 82000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.125, - "original_num_samples": 82000, - "transcript": "look here he said this is all nonsense you know you are tired out and there's something wrong what is it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.96, - "num_samples": 47360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.96, - "original_num_samples": 47360, - "transcript": "do drink this and then tell me perhaps i can help you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.57, - "num_samples": 41120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.57, - "original_num_samples": 41120, - "transcript": "he hurriedly cut cake and pressed it upon her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.535, - "num_samples": 88560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0010.wav", - "speed": 1 - } - ], - "original_duration": 5.535, - "original_num_samples": 88560, - "transcript": "he had no time to think but he was aware that this was the most exciting adventure that had ever happened to him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.325, - "num_samples": 69200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.325, - "original_num_samples": 69200, - "transcript": "is it only that you're poor why that's nothing i'm poor too she laughed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.565, - "num_samples": 41040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.565, - "original_num_samples": 41040, - "transcript": "her little foot tapped the gravel impatiently" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.815, - "num_samples": 173040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0013.wav", - "speed": 1 - } - ], - "original_duration": 10.815, - "original_num_samples": 173040, - "transcript": "he told me to stay on at the hotel and i did and then one night when i was at the theatre my maid a horrid french thing we got in paris packed up all my trunks and took all my money and paid the bill and went" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.24, - "num_samples": 35840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.24, - "original_num_samples": 35840, - "transcript": "she said again you are kind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.32, - "num_samples": 133120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.32, - "original_num_samples": 133120, - "transcript": "well then i went into lodgings that wicked woman had left me one street suit and to day they turned me out because my money was all gone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.215, - "num_samples": 67440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.215, - "original_num_samples": 67440, - "transcript": "let me think he said oh how glad i am that you happened to come this way" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.475, - "num_samples": 119600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.475, - "original_num_samples": 119600, - "transcript": "i shall lock up all the doors and windows in the house and then i shall give you my latch key and you can let yourself in and stay the night here there is no one in the house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.405, - "num_samples": 70480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.405, - "original_num_samples": 70480, - "transcript": "i will catch the night train and bring my mother up to morrow then we will see what can be done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.67, - "num_samples": 122720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0019.wav", - "speed": 1 - } - ], - "original_duration": 7.67, - "original_num_samples": 122720, - "transcript": "you see papa's so very rich and at home they expect me to to get acquainted with dukes and things and she stopped" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.735, - "num_samples": 107760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.735, - "original_num_samples": 107760, - "transcript": "it wasn't i who said that said the girl smiling but that's so anyhow and then she sighed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.785, - "num_samples": 60560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.785, - "original_num_samples": 60560, - "transcript": "all the same he added irrelevantly you shall have the latch key" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.11, - "num_samples": 161760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.11, - "original_num_samples": 161760, - "transcript": "you are kind she said for the third time and reached her hand out to him he did not kiss it then only took it in his and felt how small and cold it was then it was taken away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.85, - "num_samples": 189600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0023.wav", - "speed": 1 - } - ], - "original_duration": 11.85, - "original_num_samples": 189600, - "transcript": "the lady and the guitar certainly passed the night at hill view villa but when his mother very angry and very frightened came up with him at about noon the house looked just as usual and no one was there but the charwoman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.365, - "num_samples": 149840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0024.wav", - "speed": 1 - } - ], - "original_duration": 9.365, - "original_num_samples": 149840, - "transcript": "the silver is all right thank goodness she said but your banjo girl has taken a pair of your sister's silk stockings and those new shoes of hers with the silver buckles and she's left these" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.55, - "num_samples": 184800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/93306/6345-93306-0025.wav", - "speed": 1 - } - ], - "original_duration": 11.55, - "original_num_samples": 184800, - "transcript": "it was plain that his castanet girl his mother and sister took a pleasure in crediting her daily with some fresh and unpleasing instrument could have had neither taste money nor honesty to such a point as this" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.055, - "num_samples": 176880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.055, - "original_num_samples": 176880, - "transcript": "when she said good night to beenie and went to her chamber over that where the loved parent and friend would fall asleep no more she felt as if she went walking along to her tomb" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 21.93, - "num_samples": 350880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0001.wav", - "speed": 1 - } - ], - "original_duration": 21.93, - "original_num_samples": 350880, - "transcript": "at the time mary had noted nothing of these things now she saw them all as for the first time in minute detail while slowly she went up the stair and through the narrowed ways and heard the same wind that raved alike about the new grave and the old house into which latter for all the bales banked against the walls it found many a chink of entrance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.045, - "num_samples": 160720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.045, - "original_num_samples": 160720, - "transcript": "when she opened the door of it the bright fire which beenie undesired had kindled there startled her the room looked unnatural uncanny because it was cheerful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.78, - "num_samples": 140480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.78, - "original_num_samples": 140480, - "transcript": "she stood for a moment on the hearth and in sad dreamy mood listened to the howling swoops of the wind making the house quiver and shake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.05, - "num_samples": 96800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.05, - "original_num_samples": 96800, - "transcript": "this was her dream as nearly as she could recall it when she came to herself after waking from it with a cry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.665, - "num_samples": 122640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.665, - "original_num_samples": 122640, - "transcript": "she was one of a large company at a house where she had never been before a beautiful house with a large garden behind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.125, - "num_samples": 146000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0006.wav", - "speed": 1 - } - ], - "original_duration": 9.125, - "original_num_samples": 146000, - "transcript": "it was a summer night and the guests were wandering in and out at will and through house and garden amid lovely things of all colors and odors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.84, - "num_samples": 93440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.84, - "original_num_samples": 93440, - "transcript": "but she knew nobody and wandered alone in the garden oppressed with something she did not understand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.205, - "num_samples": 51280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.205, - "original_num_samples": 51280, - "transcript": "at the end of it she was in a place of tombs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.385, - "num_samples": 198160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0009.wav", - "speed": 1 - } - ], - "original_duration": 12.385, - "original_num_samples": 198160, - "transcript": "she entered and the servants soft footed and silent were busy carrying away the vessels of hospitality and restoring order as if already they prepared for another company on the morrow no one heeded her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.985, - "num_samples": 63760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.985, - "original_num_samples": 63760, - "transcript": "she was lost lost utterly with an eternal loss" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.83, - "num_samples": 141280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.83, - "original_num_samples": 141280, - "transcript": "she knew nothing of the place had nowhere to go nowhere she wanted to go had not a thought to tell her what question to ask if she met a living soul" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.815, - "num_samples": 45040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.815, - "original_num_samples": 45040, - "transcript": "but living soul there could be none to meet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.295, - "num_samples": 100720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.295, - "original_num_samples": 100720, - "transcript": "she had lost him years and years before and now she saw him he was there and she knew him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.085, - "num_samples": 49360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.085, - "original_num_samples": 49360, - "transcript": "he came to her side and she gave him no greeting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.29, - "num_samples": 36640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.29, - "original_num_samples": 36640, - "transcript": "i know it and there is no waking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.78, - "num_samples": 92480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.78, - "original_num_samples": 92480, - "transcript": "the old time was but a thicker dream and this is truer because more shadowy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.83, - "num_samples": 45280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.83, - "original_num_samples": 45280, - "transcript": "her only life was that she was lost" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.785, - "num_samples": 76560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.785, - "original_num_samples": 76560, - "transcript": "shall i pour out my soul into the ear of a mist a fume from my own brain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.72, - "num_samples": 123520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0019.wav", - "speed": 1 - } - ], - "original_duration": 7.72, - "original_num_samples": 123520, - "transcript": "thus was she borne away captive of her dead neither willing nor unwilling of life and death equally careless" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.86, - "num_samples": 45760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6345/64257/6345-64257-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.86, - "original_num_samples": 45760, - "transcript": "with that came a pang of intense pain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.105, - "num_samples": 49680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.105, - "original_num_samples": 49680, - "transcript": "chapter seven the homecoming" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.885, - "num_samples": 126160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.885, - "original_num_samples": 126160, - "transcript": "colonel leonidas talbot regarded the white flag with feelings in which triumph and sadness were mingled strangely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.105, - "num_samples": 97680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.105, - "original_num_samples": 97680, - "transcript": "but the emotions of harry and his comrades were for the moment those of victory only" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.245, - "num_samples": 51920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.245, - "original_num_samples": 51920, - "transcript": "boats put out both from the fort and the shore" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.315, - "num_samples": 197040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0004.wav", - "speed": 1 - } - ], - "original_duration": 12.315, - "original_num_samples": 197040, - "transcript": "the smoke itself which had formed a vast cloud over harbor forts and city was now drifting out to sea leaving all things etched sharply in the dazzling sunlight of a southern spring day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.4, - "num_samples": 70400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.4, - "original_num_samples": 70400, - "transcript": "that white flag and those boats going out mean that sumter is ours" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.84, - "num_samples": 45440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.84, - "original_num_samples": 45440, - "transcript": "but the negotiations were soon completed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.59, - "num_samples": 73440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.59, - "original_num_samples": 73440, - "transcript": "all the amenities were preserved between the captured garrison and their captors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.4, - "num_samples": 214400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0008.wav", - "speed": 1 - } - ], - "original_duration": 13.4, - "original_num_samples": 214400, - "transcript": "the great state of virginia mother of presidents went out of the union at last and north carolina tennessee and arkansas followed her but maryland kentucky and missouri still hung in the balance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.265, - "num_samples": 132240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.265, - "original_num_samples": 132240, - "transcript": "lincoln had called for volunteers to put down a rebellion but harry heard everywhere in charleston that the confederacy was now secure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.09, - "num_samples": 161440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.09, - "original_num_samples": 161440, - "transcript": "the progress of president davis to the new capital set in the very face of the foe was to be one huge triumph of faith and loyalty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.525, - "num_samples": 40400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.525, - "original_num_samples": 40400, - "transcript": "there was not a single note of gloom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.58, - "num_samples": 73280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.58, - "original_num_samples": 73280, - "transcript": "europe which must have its cotton would favor the success of the south" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.71, - "num_samples": 107360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.71, - "original_num_samples": 107360, - "transcript": "an extraordinary wave of emotion swept over the south carrying everybody with it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.02, - "num_samples": 32320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.02, - "original_num_samples": 32320, - "transcript": "beauregard at once wrote an order" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.235, - "num_samples": 35760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.235, - "original_num_samples": 35760, - "transcript": "colonel kenton writes wisely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.51, - "num_samples": 184160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0016.wav", - "speed": 1 - } - ], - "original_duration": 11.51, - "original_num_samples": 184160, - "transcript": "we need kentucky and i understand that a very little more may bring the state to us go with your father i understand that you have been a brave young soldier here and may you do as well up there" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.55, - "num_samples": 136800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.55, - "original_num_samples": 136800, - "transcript": "harry feeling pride but not showing it saluted and left the room going at once to madame delaunay's where he had left his baggage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.325, - "num_samples": 85200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.325, - "original_num_samples": 85200, - "transcript": "he intended to leave early in the morning but first he sought his friends and told them good bye" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.015, - "num_samples": 64240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.015, - "original_num_samples": 64240, - "transcript": "harry gave his farewells with deep and genuine regret" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.44, - "num_samples": 119040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0020.wav", - "speed": 1 - } - ], - "original_duration": 7.44, - "original_num_samples": 119040, - "transcript": "whether their manner was grave or frivolous he knew that these were good friends of his and he sincerely hoped that he would meet them again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.97, - "num_samples": 47520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.97, - "original_num_samples": 47520, - "transcript": "it was a different harry who started home late in april" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.675, - "num_samples": 122800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.675, - "original_num_samples": 122800, - "transcript": "four months had made great changes he bore himself more like a man his manner was much more considered and grave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.41, - "num_samples": 54560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.41, - "original_num_samples": 54560, - "transcript": "he had seen great things and he had done his share of them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.195, - "num_samples": 67120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.195, - "original_num_samples": 67120, - "transcript": "he gazed upon a world full of responsibilities and perils" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.99, - "num_samples": 95840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.99, - "original_num_samples": 95840, - "transcript": "but he looked back at charleston the gay the volatile and the beautiful with real affection" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.34, - "num_samples": 53440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0026.wav", - "speed": 1 - } - ], - "original_duration": 3.34, - "original_num_samples": 53440, - "transcript": "it was almost buried now in flowers and foliage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.42, - "num_samples": 38720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.42, - "original_num_samples": 38720, - "transcript": "he was going home after victory" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.56, - "num_samples": 40960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.56, - "original_num_samples": 40960, - "transcript": "he soon left charleston out of sight" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.71, - "num_samples": 59360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.71, - "original_num_samples": 59360, - "transcript": "he felt the difference as soon as he reached the hills of his native state" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.545, - "num_samples": 88720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0030.wav", - "speed": 1 - } - ], - "original_duration": 5.545, - "original_num_samples": 88720, - "transcript": "people were cooler here and they were more prone to look at the two sides of a question" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.75, - "num_samples": 76000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0031.wav", - "speed": 1 - } - ], - "original_duration": 4.75, - "original_num_samples": 76000, - "transcript": "the air too was unlike that of south carolina there was a sharper tang to it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.94, - "num_samples": 63040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.94, - "original_num_samples": 63040, - "transcript": "it whipped his blood as it blew down from the slopes and crests" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.25, - "num_samples": 164000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0033.wav", - "speed": 1 - } - ], - "original_duration": 10.25, - "original_num_samples": 164000, - "transcript": "it was afternoon when he reached the little station of winton and left the train a tall sturdy boy the superior of many a man in size strength and agility" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.42, - "num_samples": 54720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0034.wav", - "speed": 1 - } - ], - "original_duration": 3.42, - "original_num_samples": 54720, - "transcript": "there were never before such times in old kentucky" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.195, - "num_samples": 51120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0035.wav", - "speed": 1 - } - ], - "original_duration": 3.195, - "original_num_samples": 51120, - "transcript": "bill skelly an his gang them mountaineers are up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.215, - "num_samples": 115440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0036.wav", - "speed": 1 - } - ], - "original_duration": 7.215, - "original_num_samples": 115440, - "transcript": "he did not say the last as a boast but merely as an assurance to the liveryman who he saw was anxious on his account" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.18, - "num_samples": 82880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0037.wav", - "speed": 1 - } - ], - "original_duration": 5.18, - "original_num_samples": 82880, - "transcript": "if you've got pistols just you think once before you shoot said collins" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.33, - "num_samples": 149280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0038.wav", - "speed": 1 - } - ], - "original_duration": 9.33, - "original_num_samples": 149280, - "transcript": "harry thanked him threw his saddle bags across the horse a powerful bay and giving a final wave of his hand to the sympathetic liveryman rode away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.145, - "num_samples": 146320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0039.wav", - "speed": 1 - } - ], - "original_duration": 9.145, - "original_num_samples": 146320, - "transcript": "this was not the fashion of a year ago when they exchanged a friendly word or two but harry knew its cause now nobody could trust anybody else" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.905, - "num_samples": 62480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/244435/6295-244435-0040.wav", - "speed": 1 - } - ], - "original_duration": 3.905, - "original_num_samples": 62480, - "transcript": "but he saw nothing that moved there no signal lights twinkled" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.65, - "num_samples": 282400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0000.wav", - "speed": 1 - } - ], - "original_duration": 17.65, - "original_num_samples": 282400, - "transcript": "one winter evening as soon as his work was over for the day joseph locked the door of his smithy washed himself well put on clean clothes and taking his violin set out for testbridge mary was expecting him to tea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.73, - "num_samples": 75680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.73, - "original_num_samples": 75680, - "transcript": "it was the afternoon of a holiday and she had closed early" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.72, - "num_samples": 107520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.72, - "original_num_samples": 107520, - "transcript": "was there ever a happier man than joseph that night as he strode along the footpath" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.77, - "num_samples": 108320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.77, - "original_num_samples": 108320, - "transcript": "he pressed his violin case to his heart as if it were a living thing that could know that he loved it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.77, - "num_samples": 60320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.77, - "original_num_samples": 60320, - "transcript": "earth was gone and heaven was all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.43, - "num_samples": 102880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.43, - "original_num_samples": 102880, - "transcript": "blessed am i here now my god and blessed shall i be there then" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.54, - "num_samples": 104640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.54, - "original_num_samples": 104640, - "transcript": "when he reached the suburbs the light of homes was shining through curtains of all colors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.395, - "num_samples": 70320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.395, - "original_num_samples": 70320, - "transcript": "just then he was in no mood to think of the sorrows" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.8, - "num_samples": 44800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.8, - "original_num_samples": 44800, - "transcript": "the nettle and the dock said joseph" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.005, - "num_samples": 48080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.005, - "original_num_samples": 48080, - "transcript": "he was in a mood for music was he not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.05, - "num_samples": 128800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.05, - "original_num_samples": 128800, - "transcript": "he laid down his violin and seated himself where mary told him in her father's arm chair by the fire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.08, - "num_samples": 161280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.08, - "original_num_samples": 161280, - "transcript": "letty finding herself not quite equal to the emergency came in her turn to call mary she went as quietly as if she were leaving a tiresome visitor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.515, - "num_samples": 104240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.515, - "original_num_samples": 104240, - "transcript": "the music was broken and joseph left alone with the dumb instruments" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.0, - "num_samples": 96000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.0, - "original_num_samples": 96000, - "transcript": "but in his hands solitude and a violin were sure to marry in music" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.57, - "num_samples": 73120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.57, - "original_num_samples": 73120, - "transcript": "they sat down and listened in silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.295, - "num_samples": 116720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.295, - "original_num_samples": 116720, - "transcript": "her heart seemed to swell up into her throat and it was all she could do to keep from weeping" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.85, - "num_samples": 109600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.85, - "original_num_samples": 109600, - "transcript": "a little longer and she was compelled to yield and the silent tears flowed freely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.485, - "num_samples": 87760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.485, - "original_num_samples": 87760, - "transcript": "letty too was overcome more than ever she had been by music" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.945, - "num_samples": 159120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0018.wav", - "speed": 1 - } - ], - "original_duration": 9.945, - "original_num_samples": 159120, - "transcript": "let but a mood be strong enough and the soul clothing itself in that mood as with a garment can walk abroad and haunt the world" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.865, - "num_samples": 157840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0019.wav", - "speed": 1 - } - ], - "original_duration": 9.865, - "original_num_samples": 157840, - "transcript": "it cried aloud that eternity was very long and like a great palace without a quiet room" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.94, - "num_samples": 95040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.94, - "original_num_samples": 95040, - "transcript": "nor was this exactly the shape the thing took to the consciousness of the musician" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.17, - "num_samples": 130720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0021.wav", - "speed": 1 - } - ], - "original_duration": 8.17, - "original_num_samples": 130720, - "transcript": "i love thee i love thee cried the violin and the worship was entreaty that knew not itself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.66, - "num_samples": 170560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.66, - "original_num_samples": 170560, - "transcript": "hast thou yet to learn that the love of the human is love is divine is but a lower form of a part of the love of god" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.82, - "num_samples": 333120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0023.wav", - "speed": 1 - } - ], - "original_duration": 20.82, - "original_num_samples": 333120, - "transcript": "when thou lovest man or woman or child yea or even dog aright then wilt thou no longer need that i tell thee how god and his christ would not be content with each other alone in the glories even of the eternal original love because they could create more love" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.465, - "num_samples": 103440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.465, - "original_num_samples": 103440, - "transcript": "he that loveth not his brother whom he hath seen how shall he love god whom he hath not seen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.705, - "num_samples": 91280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.705, - "original_num_samples": 91280, - "transcript": "a sob like a bird new born burst from mary's bosom" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.43, - "num_samples": 166880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0026.wav", - "speed": 1 - } - ], - "original_duration": 10.43, - "original_num_samples": 166880, - "transcript": "that enchantment had possessed him usurping as it were the throne of his life and displacing it when it ceased he was not his own master" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.4, - "num_samples": 102400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0027.wav", - "speed": 1 - } - ], - "original_duration": 6.4, - "original_num_samples": 102400, - "transcript": "he started to conscious confusion only neither knowing where he was nor what he did" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.65, - "num_samples": 170400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0028.wav", - "speed": 1 - } - ], - "original_duration": 10.65, - "original_num_samples": 170400, - "transcript": "how it happened he never could tell but he brought down his violin with a crash against the piano then somehow stumbled and all but fell" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.505, - "num_samples": 184080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0029.wav", - "speed": 1 - } - ], - "original_duration": 11.505, - "original_num_samples": 184080, - "transcript": "in the act of recovering himself he heard the neck of his instrument part from the body with a tearing discordant cry like the sound of the ruin of a living world" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.455, - "num_samples": 87280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0030.wav", - "speed": 1 - } - ], - "original_duration": 5.455, - "original_num_samples": 87280, - "transcript": "his violin was broken but his being was made whole" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.76, - "num_samples": 108160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0031.wav", - "speed": 1 - } - ], - "original_duration": 6.76, - "original_num_samples": 108160, - "transcript": "his treasure taken type of his self and a woman given him instead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.59, - "num_samples": 41440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6295/64301/6295-64301-0032.wav", - "speed": 1 - } - ], - "original_duration": 2.59, - "original_num_samples": 41440, - "transcript": "it's just like him he murmured" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.355, - "num_samples": 37680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.355, - "original_num_samples": 37680, - "transcript": "you'll all be over if you don't have a care" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.5, - "num_samples": 72000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.5, - "original_num_samples": 72000, - "transcript": "looks like a clump of bushes down there but i ain't sure can you make it out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.075, - "num_samples": 49200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.075, - "original_num_samples": 49200, - "transcript": "yes agreed tad that does look like bushes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.58, - "num_samples": 73280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.58, - "original_num_samples": 73280, - "transcript": "don't move around lie perfectly still warned the guide are you hurt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.26, - "num_samples": 52160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.26, - "original_num_samples": 52160, - "transcript": "and that tumble's enough to knock the sense out of a full grown man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.64, - "num_samples": 74240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.64, - "original_num_samples": 74240, - "transcript": "i could not think of allowing any of my charges to take so terrible a risk and" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.005, - "num_samples": 48080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.005, - "original_num_samples": 48080, - "transcript": "no i am the lighter of the two urged tad" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.93, - "num_samples": 78880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.93, - "original_num_samples": 78880, - "transcript": "i am the one to go after walt if anyone has to i'll go down mister thomas" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.49, - "num_samples": 71840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.49, - "original_num_samples": 71840, - "transcript": "master tad is right decided the guide gazing at the two boys approvingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.71, - "num_samples": 43360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.71, - "original_num_samples": 43360, - "transcript": "i protest shouted the professor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.805, - "num_samples": 60880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.805, - "original_num_samples": 60880, - "transcript": "you'd have both of us at the bottom if i left it to you to take care of this end" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.12, - "num_samples": 81920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.12, - "original_num_samples": 81920, - "transcript": "be sure to fasten him securely to the loop before you give the signal to haul up warned the guide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.29, - "num_samples": 36640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.29, - "original_num_samples": 36640, - "transcript": "are you ready yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.06, - "num_samples": 32960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.06, - "original_num_samples": 32960, - "transcript": "he tilted his head to look up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.785, - "num_samples": 60560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.785, - "original_num_samples": 60560, - "transcript": "the movement sent his body swaying giddily from side to side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.23, - "num_samples": 131680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.23, - "original_num_samples": 131680, - "transcript": "cautiously placing a hand against the rocks to steady himself tad wisely concluded that hereafter it would not pay to be too curious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.445, - "num_samples": 119120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0016.wav", - "speed": 1 - } - ], - "original_duration": 7.445, - "original_num_samples": 119120, - "transcript": "slowly but steadily the slender line was paid out amid a tense silence on the part of the little group at the top of the canyou" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.065, - "num_samples": 81040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.065, - "original_num_samples": 81040, - "transcript": "after what seemed to them hours a sharp call from the depths reached their ears" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.955, - "num_samples": 47280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.955, - "original_num_samples": 47280, - "transcript": "lige quickly made fast the line to a tree" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.69, - "num_samples": 75040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.69, - "original_num_samples": 75040, - "transcript": "i see him called tad his voice sounding hollow and unnatural to those above" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.99, - "num_samples": 47840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.99, - "original_num_samples": 47840, - "transcript": "he's so far to the right of me that i can't reach him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.975, - "num_samples": 79600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.975, - "original_num_samples": 79600, - "transcript": "lodged in the branches of a pinyon tree i think it is but he doesn't answer me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.78, - "num_samples": 124480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.78, - "original_num_samples": 124480, - "transcript": "lige leaning over the brink was able to follow the boy's movements by the aid of the thin arc of light made by the torch in tad's hand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.35, - "num_samples": 149600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0023.wav", - "speed": 1 - } - ], - "original_duration": 9.35, - "original_num_samples": 149600, - "transcript": "but from the cautious movements of the light far below them the guide understood that the lad was at work carrying out his part of the task of rescue to the best of his ability" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.41, - "num_samples": 70560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.41, - "original_num_samples": 70560, - "transcript": "mebby you think he's having some sort of a picnic down there eh glared lige" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.32, - "num_samples": 85120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.32, - "original_num_samples": 85120, - "transcript": "shall we haul up asked lige making a megaphone of his hands yes haul away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "sure thing answered the boy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.47, - "num_samples": 231520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66125/6313-66125-0027.wav", - "speed": 1 - } - ], - "original_duration": 14.47, - "original_num_samples": 231520, - "transcript": "nor was his sense of security increased when in shifting his position the torch fell from his grasp the fagots scattering as they slipped down between the limbs of the tree and whirling in ever diminishing circles until finally he heard them clatter on the rocks below" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.89, - "num_samples": 46240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.89, - "original_num_samples": 46240, - "transcript": "chapter four the first night in camp" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.97, - "num_samples": 47520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.97, - "original_num_samples": 47520, - "transcript": "even if i can't sing i can beat that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.005, - "num_samples": 48080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.005, - "original_num_samples": 48080, - "transcript": "not on the range why not demanded the boy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.975, - "num_samples": 47600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.975, - "original_num_samples": 47600, - "transcript": "a loud laugh followed at chunky's expense" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.565, - "num_samples": 89040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.565, - "original_num_samples": 89040, - "transcript": "the pony did most of it admitted the lad i just gave him his head and that's all there was to it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.81, - "num_samples": 92960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.81, - "original_num_samples": 92960, - "transcript": "walter had gone out with the second guard and the others had gathered around the camp fire for their nightly story telling" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.445, - "num_samples": 39120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.445, - "original_num_samples": 39120, - "transcript": "none of you will be fit for duty to morrow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "we've got a hard drive before us and every man must be fit as a fiddle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.345, - "num_samples": 37520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.345, - "original_num_samples": 37520, - "transcript": "humph grunted curley adams" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.4, - "num_samples": 102400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.4, - "original_num_samples": 102400, - "transcript": "the cowboy did this very thing but within an hour he found himself alone the others having turned in one by one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.63, - "num_samples": 106080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.63, - "original_num_samples": 106080, - "transcript": "the lads found that a pair of blankets had been assigned to each of them with an ordinary wagon sheet doubled for a tarpaulin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.025, - "num_samples": 64400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.025, - "original_num_samples": 64400, - "transcript": "these they spread out on the ground using boots wrapped in coats for pillows" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.59, - "num_samples": 105440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.59, - "original_num_samples": 105440, - "transcript": "stacy brown proved the only grumbler in the lot declaring that he could not sleep a wink on such a bed as that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.81, - "num_samples": 188960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0013.wav", - "speed": 1 - } - ], - "original_duration": 11.81, - "original_num_samples": 188960, - "transcript": "the horses of the outfit save those that were on night duty and two or three others that had developed a habit of straying had been turned loose early in the evening for animals on the trail are seldom staked down" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.515, - "num_samples": 120240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.515, - "original_num_samples": 120240, - "transcript": "in spite of their hard couches the pony riders slept soundly even professor zepplin himself never waking the whole night through" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.38, - "num_samples": 54080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.38, - "original_num_samples": 54080, - "transcript": "stacy grumbled turned over and went to sleep again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.35, - "num_samples": 69600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.35, - "original_num_samples": 69600, - "transcript": "you won't be so fast to wake up hard working cowboys after that i reckon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.055, - "num_samples": 80880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.055, - "original_num_samples": 80880, - "transcript": "lumpy bates came running toward him not daring to call out for fear of waking the camp" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.195, - "num_samples": 99120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.195, - "original_num_samples": 99120, - "transcript": "hi there hissed lumpy filled with indignation that anyone should attempt to mount a pony from the right side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.095, - "num_samples": 49520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.095, - "original_num_samples": 49520, - "transcript": "stacy brown's left leg swung over the saddle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.315, - "num_samples": 37040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.315, - "original_num_samples": 37040, - "transcript": "where are they asked the boy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.925, - "num_samples": 190800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0021.wav", - "speed": 1 - } - ], - "original_duration": 11.925, - "original_num_samples": 190800, - "transcript": "keep a going and if you're lucky you'll run plumb into them was the jeering answer as the sleepy cowmen spurred their ponies on toward camp muttering their disapproval of taking along a bunch of boys on a cattle drive" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.295, - "num_samples": 100720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.295, - "original_num_samples": 100720, - "transcript": "almost before the echoes of his voice had died away a shrill voice piped up from the tail end of the chuck wagon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.255, - "num_samples": 52080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.255, - "original_num_samples": 52080, - "transcript": "grub pi le grub pi le" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.375, - "num_samples": 70000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.375, - "original_num_samples": 70000, - "transcript": "who is the wrangler this morning asked the foreman glancing about at his men" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.795, - "num_samples": 60720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.795, - "original_num_samples": 60720, - "transcript": "a wrangler's a wrangler answered big foot stolidly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.43, - "num_samples": 70880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.43, - "original_num_samples": 70880, - "transcript": "he's a fellow who's all the time making trouble isn't he asked stacy innocently" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.505, - "num_samples": 56080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.505, - "original_num_samples": 56080, - "transcript": "oh no this kind of a wrangler isn't laughed the foreman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.795, - "num_samples": 60720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.795, - "original_num_samples": 60720, - "transcript": "he's a trouble curer not a troublemaker except for himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.695, - "num_samples": 107120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0029.wav", - "speed": 1 - } - ], - "original_duration": 6.695, - "original_num_samples": 107120, - "transcript": "pong tell the young gentlemen what would become of you if you were to serve bad meals to this outfit of cowpunchers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.265, - "num_samples": 36240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.265, - "original_num_samples": 36240, - "transcript": "how asked tad" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.205, - "num_samples": 35280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/76958/6313-76958-0031.wav", - "speed": 1 - } - ], - "original_duration": 2.205, - "original_num_samples": 35280, - "transcript": "we had better start the drive this morning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.815, - "num_samples": 45040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.815, - "original_num_samples": 45040, - "transcript": "he no doubt would bring food of some kind with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.8, - "num_samples": 188800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.8, - "original_num_samples": 188800, - "transcript": "with a shout the boys dashed pell mell to meet the pack train and falling in behind the slow moving burros urged them on with derisive shouts and sundry resounding slaps on the animals flanks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.67, - "num_samples": 58720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.67, - "original_num_samples": 58720, - "transcript": "cold water is the most nourishing thing we've touched since last night" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.45, - "num_samples": 87200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.45, - "original_num_samples": 87200, - "transcript": "we did not it must have come to life some time during the night and dug its way out laughed tad" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.9, - "num_samples": 62400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.9, - "original_num_samples": 62400, - "transcript": "and we've got a surprise for you announced stacy swelling with pride" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.58, - "num_samples": 73280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.58, - "original_num_samples": 73280, - "transcript": "jam exclaimed chunky stretching his neck and eyeing the dish longingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.96, - "num_samples": 47360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.96, - "original_num_samples": 47360, - "transcript": "now fall to young gentlemen directed the professor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.455, - "num_samples": 39280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.455, - "original_num_samples": 39280, - "transcript": "i am free to admit that i am hungry too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.07, - "num_samples": 81120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.07, - "original_num_samples": 81120, - "transcript": "he buried his biscuit under a layer of jam over which he spread a thick coating of honey" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.78, - "num_samples": 76480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.78, - "original_num_samples": 76480, - "transcript": "president brown i withdraw my criticism i offer you my humble apologies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.23, - "num_samples": 67680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.23, - "original_num_samples": 67680, - "transcript": "i reckon there are smiled the guide we are in the bear country now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.215, - "num_samples": 35440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.215, - "original_num_samples": 35440, - "transcript": "yes the country is full of caves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.09, - "num_samples": 49440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.09, - "original_num_samples": 49440, - "transcript": "this announcement filled the boys with excitement" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.605, - "num_samples": 57680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.605, - "original_num_samples": 57680, - "transcript": "but i know an old settler who will lend us his dog if it is not out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.335, - "num_samples": 181360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0014.wav", - "speed": 1 - } - ], - "original_duration": 11.335, - "original_num_samples": 181360, - "transcript": "supper having been finished the party gathered about the camp fire for their evening chat after which admonishing stacy to keep within his tent and not to go borrowing trouble the boys turned in for a sound sleep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.52, - "num_samples": 120320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.52, - "original_num_samples": 120320, - "transcript": "as yet they had been unable to attempt any fancy riding with their ponies owing to the rugged nature of the country through which they had been journeying" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.475, - "num_samples": 119600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0016.wav", - "speed": 1 - } - ], - "original_duration": 7.475, - "original_num_samples": 119600, - "transcript": "the boys were now all anxiety to start while the ponies after their sunday rest were almost as full of life as were their owners" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.685, - "num_samples": 138960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.685, - "original_num_samples": 138960, - "transcript": "the little animals were becoming more sure footed every day and ned said that before the trip was finished jimmie would be able to walk a slack rope" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.705, - "num_samples": 91280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.705, - "original_num_samples": 91280, - "transcript": "an early start was made so that the party reached the promised table lands shortly before ten o'clock in the forenoon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.46, - "num_samples": 39360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.46, - "original_num_samples": 39360, - "transcript": "a temporary camp was quickly pitched" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.98, - "num_samples": 111680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.98, - "original_num_samples": 111680, - "transcript": "the great green field surrounded on all sides by tall trees made the place an ideal one for their purpose" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.905, - "num_samples": 206480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0021.wav", - "speed": 1 - } - ], - "original_duration": 12.905, - "original_num_samples": 206480, - "transcript": "it was a beautiful race the little indian ponies seeming to enter thoroughly into the spirit of the contest stretching themselves out to their full lengths and with heads on a level with their backs fairly flew across the great plot of green" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "all agreed that tad's superior horsemanship alone had won the race for him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.25, - "num_samples": 84000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0023.wav", - "speed": 1 - } - ], - "original_duration": 5.25, - "original_num_samples": 84000, - "transcript": "galloping into camp the boy fetched his sombrero which he carried well out into the field and tossed away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.925, - "num_samples": 126800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0024.wav", - "speed": 1 - } - ], - "original_duration": 7.925, - "original_num_samples": 126800, - "transcript": "then bidding the boys ride up near the spot to watch him he drew off some ten rods and wheeling spurred his pony to a run" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.67, - "num_samples": 74720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.67, - "original_num_samples": 74720, - "transcript": "grasping the pommel with the left hand he appeared to dive head first toward the ground" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.69, - "num_samples": 187040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0026.wav", - "speed": 1 - } - ], - "original_duration": 11.69, - "original_num_samples": 187040, - "transcript": "they saw his long hair almost brush the grass one of his hands swept down and up and once more tad butler rose standing in his stirrups uttering a cowboy yell as he waved the sombrero on high" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.16, - "num_samples": 82560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0027.wav", - "speed": 1 - } - ], - "original_duration": 5.16, - "original_num_samples": 82560, - "transcript": "the boys howled with delight that is all did save stacy brown" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.32, - "num_samples": 37120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0028.wav", - "speed": 1 - } - ], - "original_duration": 2.32, - "original_num_samples": 37120, - "transcript": "tad is an experienced rider" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.07, - "num_samples": 113120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0029.wav", - "speed": 1 - } - ], - "original_duration": 7.07, - "original_num_samples": 113120, - "transcript": "the first time he rode swiftly by it leaning over to look at the hat as he passed holding to the pommel firmly with his left hand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.435, - "num_samples": 54960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.435, - "original_num_samples": 54960, - "transcript": "what's that for demanded ned wonderingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.425, - "num_samples": 86800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0031.wav", - "speed": 1 - } - ], - "original_duration": 5.425, - "original_num_samples": 86800, - "transcript": "hat too close to me i couldn't get it explained chunky the boys roared" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.86, - "num_samples": 29760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0032.wav", - "speed": 1 - } - ], - "original_duration": 1.86, - "original_num_samples": 29760, - "transcript": "why don't you move the pony" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.065, - "num_samples": 145040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0033.wav", - "speed": 1 - } - ], - "original_duration": 9.065, - "original_num_samples": 145040, - "transcript": "once more stacy approached the sombrero his pony running well and as he drew near it they saw him rise in his saddle just as tad butler had done a few minutes before" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.96, - "num_samples": 127360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0034.wav", - "speed": 1 - } - ], - "original_duration": 7.96, - "original_num_samples": 127360, - "transcript": "at the moment when he freed his left foot from the stirrup he threw his body sharply to the right reaching for the hat without taking the precaution to grasp the pommel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.2, - "num_samples": 67200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6313/66129/6313-66129-0035.wav", - "speed": 1 - } - ], - "original_duration": 4.2, - "original_num_samples": 67200, - "transcript": "as a result instead of stopping when he reached the hat the boy kept on going" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.55, - "num_samples": 72800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.55, - "original_num_samples": 72800, - "transcript": "it had no ornamentation being exceedingly plain in appearance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.05, - "num_samples": 144800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.05, - "original_num_samples": 144800, - "transcript": "here said one of their guides as the procession halted before the little stone building is the palace of tourmaline who is our queen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "what that little cabin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.73, - "num_samples": 43680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.73, - "original_num_samples": 43680, - "transcript": "exclaimed trot of course" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.195, - "num_samples": 99120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.195, - "original_num_samples": 99120, - "transcript": "did you suppose a palace would be like one of our handsome residences asked the woman evidently surprised" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.21, - "num_samples": 83360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.21, - "original_num_samples": 83360, - "transcript": "these intruders are very peculiar people remarked a man in the crowd" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.93, - "num_samples": 78880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.93, - "original_num_samples": 78880, - "transcript": "they seem very ignorant poor things said another in reply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "the people must wait outside for there is no room for them in the palace" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.25, - "num_samples": 148000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.25, - "original_num_samples": 148000, - "transcript": "so they followed her through the low archway and in a room beyond very simply furnished sat a young girl engaged in darning a pair of pink stockings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.295, - "num_samples": 164720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0009.wav", - "speed": 1 - } - ], - "original_duration": 10.295, - "original_num_samples": 164720, - "transcript": "she was a beautiful girl of about seventeen years of age not fat like all the rest of the pinkies but slender and well formed according to our own ideas of beauty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.625, - "num_samples": 106000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.625, - "original_num_samples": 106000, - "transcript": "her complexion was not a decided pink but a soft rosy tint not much deeper than that of trot's skin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.105, - "num_samples": 49680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.105, - "original_num_samples": 49680, - "transcript": "what is it coralie she asked the woman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.025, - "num_samples": 48400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.025, - "original_num_samples": 48400, - "transcript": "the queen gazed upon our friends with evident interest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.25, - "num_samples": 180000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0013.wav", - "speed": 1 - } - ], - "original_duration": 11.25, - "original_num_samples": 180000, - "transcript": "she smiled a little sadly at trot seemed to approve button bright's open frank face and was quite surprised because cap'n bill was so much bigger than her own people" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.845, - "num_samples": 29520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0014.wav", - "speed": 1 - } - ], - "original_duration": 1.845, - "original_num_samples": 29520, - "transcript": "are you a giant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.33, - "num_samples": 85280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.33, - "original_num_samples": 85280, - "transcript": "perhaps you are trying to ridicule me she continued regarding the sailor's face closely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.37, - "num_samples": 53920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.37, - "original_num_samples": 53920, - "transcript": "there is nothing majestic about me as you know very well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.01, - "num_samples": 80160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.01, - "original_num_samples": 80160, - "transcript": "coralie do you consider majesty a proper word to use when addressing a queen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.96, - "num_samples": 127360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.96, - "original_num_samples": 127360, - "transcript": "even in america ever'body bows low to our president an the blueskins are so fraid o their boolooroo that they tremble whenever they go near him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.115, - "num_samples": 49840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.115, - "original_num_samples": 49840, - "transcript": "but surely that is all wrong said tourmaline gravely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.675, - "num_samples": 170800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0020.wav", - "speed": 1 - } - ], - "original_duration": 10.675, - "original_num_samples": 170800, - "transcript": "therefore i am a mere agent to direct the laws which are the will of the people and am only a public servant obliged constantly to guard the welfare of my subjects" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.515, - "num_samples": 120240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0021.wav", - "speed": 1 - } - ], - "original_duration": 7.515, - "original_num_samples": 120240, - "transcript": "in that case said button bright you're entitled to the best there is to pay for your trouble" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.08, - "num_samples": 161280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.08, - "original_num_samples": 161280, - "transcript": "if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.795, - "num_samples": 44720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.795, - "original_num_samples": 44720, - "transcript": "no our way is best" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.985, - "num_samples": 95760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0024.wav", - "speed": 1 - } - ], - "original_duration": 5.985, - "original_num_samples": 95760, - "transcript": "the queen has nothing but the power to execute the laws to adjust grievances and to compel order" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.94, - "num_samples": 31040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0025.wav", - "speed": 1 - } - ], - "original_duration": 1.94, - "original_num_samples": 31040, - "transcript": "i have one great privilege" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.925, - "num_samples": 254800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0026.wav", - "speed": 1 - } - ], - "original_duration": 15.925, - "original_num_samples": 254800, - "transcript": "after my death a pink marble statue of me will be set up in the grand court with the statues of the other kings and queens who have ruled this land and all the pinkies in ages to come will then honor me as having been a just and upright queen that is my reward" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.97, - "num_samples": 63520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.97, - "original_num_samples": 63520, - "transcript": "a misfortune of birth placed me here and i cannot escape my fate" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.59, - "num_samples": 73440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0028.wav", - "speed": 1 - } - ], - "original_duration": 4.59, - "original_num_samples": 73440, - "transcript": "it is much more desirable to be a private citizen happy and care free" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.58, - "num_samples": 121280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0029.wav", - "speed": 1 - } - ], - "original_duration": 7.58, - "original_num_samples": 121280, - "transcript": "yes it was wet an sticky all right agreed the sailor but the big frog helped us an we got through all right" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.33, - "num_samples": 69280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0030.wav", - "speed": 1 - } - ], - "original_duration": 4.33, - "original_num_samples": 69280, - "transcript": "you are not like my people the pinkies and there is no place for you in our country" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.75, - "num_samples": 124000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0031.wav", - "speed": 1 - } - ], - "original_duration": 7.75, - "original_num_samples": 124000, - "transcript": "in all our history you are the first people from outside our borders who have ever stepped a foot in our land" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.275, - "num_samples": 148400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0032.wav", - "speed": 1 - } - ], - "original_duration": 9.275, - "original_num_samples": 148400, - "transcript": "we do not hate you as you say the blueskins do nor are we savage or cruel but we do not want you here and i am really puzzled what to do with you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.075, - "num_samples": 33200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/284437/5338-284437-0033.wav", - "speed": 1 - } - ], - "original_duration": 2.075, - "original_num_samples": 33200, - "transcript": "i'll look in the great book first" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.025, - "num_samples": 160400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.025, - "original_num_samples": 160400, - "transcript": "it was about noon when captain waverley entered the straggling village or rather hamlet of tully veolan close to which was situated the mansion of the proprietor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.605, - "num_samples": 121680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0001.wav", - "speed": 1 - } - ], - "original_duration": 7.605, - "original_num_samples": 121680, - "transcript": "the houses seemed miserable in the extreme especially to an eye accustomed to the smiling neatness of english cottages" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.145, - "num_samples": 514320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0002.wav", - "speed": 1 - } - ], - "original_duration": 32.145, - "original_num_samples": 514320, - "transcript": "occasionally indeed when such a consummation seemed inevitable a watchful old grandam with her close cap distaff and spindle rushed like a sibyl in frenzy out of one of these miserable cells dashed into the middle of the path and snatching up her own charge from among the sunburnt loiterers saluted him with a sound cuff and transported him back to his dungeon the little white headed varlet screaming all the while from the very top of his lungs a shrilly treble to the growling remonstrances of the enraged matron" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.575, - "num_samples": 185200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.575, - "original_num_samples": 185200, - "transcript": "the evil and remedy such as it is still exist but this is remote from our present purpose and is only thrown out for consideration of the collectors under mister dent's dog bill" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.51, - "num_samples": 376160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0004.wav", - "speed": 1 - } - ], - "original_duration": 23.51, - "original_num_samples": 376160, - "transcript": "yet the physiognomy of the people when more closely examined was far from exhibiting the indifference of stupidity their features were rough but remarkably intelligent grave but the very reverse of stupid and from among the young women an artist might have chosen more than one model whose features and form resembled those of minerva" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.77, - "num_samples": 284320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0005.wav", - "speed": 1 - } - ], - "original_duration": 17.77, - "original_num_samples": 284320, - "transcript": "this avenue was straight and of moderate length running between a double row of very ancient horse chestnuts planted alternately with sycamores which rose to such huge height and nourished so luxuriantly that their boughs completely over arched the broad road beneath" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.585, - "num_samples": 185360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0006.wav", - "speed": 1 - } - ], - "original_duration": 11.585, - "original_num_samples": 185360, - "transcript": "it was one of those effects which a painter loves to represent and mingled well with the struggling light which found its way between the boughs of the shady arch that vaulted the broad green alley" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.08, - "num_samples": 161280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.08, - "original_num_samples": 161280, - "transcript": "the house which seemed to consist of two or three high narrow and steep roofed buildings projecting from each other at right angles formed one side of the inclosure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.68, - "num_samples": 154880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.68, - "original_num_samples": 154880, - "transcript": "it had been built at a period when castles were no longer necessary and when the scottish architects had not yet acquired the art of designing a domestic residence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.255, - "num_samples": 68080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.255, - "original_num_samples": 68080, - "transcript": "neither did the front indicate absolute security from danger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.89, - "num_samples": 62240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.89, - "original_num_samples": 62240, - "transcript": "stables and other offices occupied another side of the square" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.93, - "num_samples": 110880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.93, - "original_num_samples": 110880, - "transcript": "two battlemented walls one of which faced the avenue and the other divided the court from the garden completed the inclosure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.3, - "num_samples": 68800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.3, - "original_num_samples": 68800, - "transcript": "this work of art was the wonder of the country ten miles round" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.085, - "num_samples": 129360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.085, - "original_num_samples": 129360, - "transcript": "the court was spacious well paved and perfectly clean there being probably another entrance behind the stables for removing the litter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.295, - "num_samples": 196720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24615/5338-24615-0014.wav", - "speed": 1 - } - ], - "original_duration": 12.295, - "original_num_samples": 196720, - "transcript": "everything around appeared solitary and would have been silent but for the continued plashing of the fountain and the whole scene still maintained the monastic illusion which the fancy of waverley had conjured up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.45, - "num_samples": 55200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.45, - "original_num_samples": 55200, - "transcript": "chapter thirty three a confidant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.26, - "num_samples": 180160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.26, - "original_num_samples": 180160, - "transcript": "mister morton replied that far from making any claim upon his good opinion his only wish and the sole purpose of his visit was to find out the means of deserving it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.83, - "num_samples": 157280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.83, - "original_num_samples": 157280, - "transcript": "evil to him that thinks otherwise said mister morton or who holds church government and ceremonies as the exclusive gage of christian faith or moral virtue" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.42, - "num_samples": 102720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.42, - "original_num_samples": 102720, - "transcript": "mister morton seemed particularly struck with the account of waverley's visit to donald bean lean" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.66, - "num_samples": 154560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0004.wav", - "speed": 1 - } - ], - "original_duration": 9.66, - "original_num_samples": 154560, - "transcript": "when i was a young man like you mister waverley any such hair brained expedition i beg your pardon for the expression would have had inexpressible charms for me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.21, - "num_samples": 243360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0005.wav", - "speed": 1 - } - ], - "original_duration": 15.21, - "original_num_samples": 243360, - "transcript": "he certainly possesses talents beyond the rude sphere in which he moves and being neither destitute of ambition nor encumbered with scruples he will probably attempt by every means to distinguish himself during the period of these unhappy commotions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.93, - "num_samples": 174880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.93, - "original_num_samples": 174880, - "transcript": "mister morton then made a careful memorandum of the various particulars of waverley's interview with donald bean lean and the other circumstances which he had communicated" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.55, - "num_samples": 280800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0007.wav", - "speed": 1 - } - ], - "original_duration": 17.55, - "original_num_samples": 280800, - "transcript": "he had neither sympathy with my innocence nor with my wretchedness and the petrifying accuracy with which he attended to every form of civility while he tortured me by his questions his suspicions and his inferences was as tormenting as the racks of the inquisition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.08, - "num_samples": 161280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.08, - "original_num_samples": 161280, - "transcript": "they held conventicles in the open fields and being treated with great violence and cruelty by the scottish government more than once took arms during those reigns" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.125, - "num_samples": 194000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5338/24640/5338-24640-0009.wav", - "speed": 1 - } - ], - "original_duration": 12.125, - "original_num_samples": 194000, - "transcript": "since that time their numbers have gradually diminished but a good many are still to be found in the western counties and several with a better temper than in seventeen o seven have now taken arms for government" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.295, - "num_samples": 116720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0000.wav", - "speed": 1 - } - ], - "original_duration": 7.295, - "original_num_samples": 116720, - "transcript": "in those very early times there was a man named deucalion and he was the son of prometheus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.145, - "num_samples": 162320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.145, - "original_num_samples": 162320, - "transcript": "after jupiter had bound prometheus on mount caucasus and had sent diseases and cares into the world men became very very wicked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.52, - "num_samples": 104320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.52, - "original_num_samples": 104320, - "transcript": "these men he said to his mighty company are nothing but a source of trouble" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.37, - "num_samples": 133920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.37, - "original_num_samples": 133920, - "transcript": "but men kept on fighting and robbing even while the rain was pouring down and the sea was coming up over the land" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.71, - "num_samples": 91360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.71, - "original_num_samples": 91360, - "transcript": "no one but deucalion the son of prometheus was ready for such a storm" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.625, - "num_samples": 138000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.625, - "original_num_samples": 138000, - "transcript": "the day is coming said prometheus when jupiter will send a flood to destroy mankind from the earth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.89, - "num_samples": 142240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.89, - "original_num_samples": 142240, - "transcript": "but deucalion and pyrrha were very sad for they knew that they were the only persons who were left alive in all the land" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.505, - "num_samples": 56080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.505, - "original_num_samples": 56080, - "transcript": "is there anything that you wish he asked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.46, - "num_samples": 199360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0008.wav", - "speed": 1 - } - ], - "original_duration": 12.46, - "original_num_samples": 199360, - "transcript": "we should like above all things said deucalion to see this land full of people once more for without neighbors and friends the world is a very lonely place indeed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.43, - "num_samples": 246880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0009.wav", - "speed": 1 - } - ], - "original_duration": 15.43, - "original_num_samples": 246880, - "transcript": "go on down the mountain said mercury and as you go cast the bones of your mother over your shoulders behind you and with these words he leaped into the air and was seen no more" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.34, - "num_samples": 53440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.34, - "original_num_samples": 53440, - "transcript": "what did he mean asked pyrrha" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.71, - "num_samples": 59360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.71, - "original_num_samples": 59360, - "transcript": "surely i do not know said deucalion" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.44, - "num_samples": 135040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/57405/6319-57405-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.44, - "original_num_samples": 135040, - "transcript": "when at last they reached the plain they found themselves at the head of a noble company of human beings all eager to serve them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.245, - "num_samples": 163920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.245, - "original_num_samples": 163920, - "transcript": "when at last the queen gave birth to a daughter the king was so overjoyed that he gave a great christening feast the like of which had never before been known" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.969938, - "num_samples": 255519, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0001.wav", - "speed": 1 - } - ], - "original_duration": 15.969938, - "original_num_samples": 255519, - "transcript": "one of the young fairies overhearing her and fancying she might work some mischief to the little baby went and hid herself behind the hangings in the hall so as to be able to have the last word and undo any harm the old fairy might wish to work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.2, - "num_samples": 195200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0002.wav", - "speed": 1 - } - ], - "original_duration": 12.2, - "original_num_samples": 195200, - "transcript": "the turn of the old fairy had now come and she declared while her head shook with malice that the princess should pierce her hand with a spindle and die of the wound" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.53, - "num_samples": 72480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.53, - "original_num_samples": 72480, - "transcript": "it is true i cannot entirely undo what my elder has done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.1, - "num_samples": 305600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0004.wav", - "speed": 1 - } - ], - "original_duration": 19.1, - "original_num_samples": 305600, - "transcript": "now fifteen years after the princess was born she was with the king and queen at one of their castles and as she was running about by herself she came to a little chamber at the top of a tower and there sat an honest old woman spinning for she had never heard of the king's edict" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.68, - "num_samples": 154880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.68, - "original_num_samples": 154880, - "transcript": "she had no sooner taken up the spindle than being hasty and careless she pierced her hand with the point of it and fainted away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "he knew that she would not awake for a hundred years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.725, - "num_samples": 267600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0007.wav", - "speed": 1 - } - ], - "original_duration": 16.725, - "original_num_samples": 267600, - "transcript": "one said it was an enchanted castle another that witches lived there but most believed that it was occupied by a great ogre which carried thither all the children he could catch and ate them up one at a time for nobody could get at him through the wood" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.36, - "num_samples": 69760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.36, - "original_num_samples": 69760, - "transcript": "the young prince at these words felt himself on fire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.24, - "num_samples": 163840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0009.wav", - "speed": 1 - } - ], - "original_duration": 10.24, - "original_num_samples": 163840, - "transcript": "scarcely had he come to the wood when all the trees and thorns which had made such an impenetrable thicket opened on one side and the other to offer him a path" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.96, - "num_samples": 79360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.96, - "original_num_samples": 79360, - "transcript": "he entered a large forecourt and stood still with amazement and awe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.655, - "num_samples": 106480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.655, - "original_num_samples": 106480, - "transcript": "but the faces of the men were rosy and the goblets by them had a few drops of wine left" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.21, - "num_samples": 147360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.21, - "original_num_samples": 147360, - "transcript": "he entered the guard room there the guards stood drawn up in line with carbines at their shoulders but they were sound asleep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.8, - "num_samples": 124800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0013.wav", - "speed": 1 - } - ], - "original_duration": 7.8, - "original_num_samples": 124800, - "transcript": "he passed through one apartment after another where were ladies and gentlemen asleep in their chairs or standing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.63, - "num_samples": 42080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.63, - "original_num_samples": 42080, - "transcript": "i have waited long for you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.56, - "num_samples": 88960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.56, - "original_num_samples": 88960, - "transcript": "they talked for four hours and had not then said half that was in their heads to say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.835, - "num_samples": 141360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.835, - "original_num_samples": 141360, - "transcript": "meanwhile all the rest of the people in the castle had been wakened at the same moment as the princess and they were now extremely hungry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.795, - "num_samples": 108720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.795, - "original_num_samples": 108720, - "transcript": "the lady in waiting became very impatient and at length announced to the princess that they all waited for her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.11, - "num_samples": 225760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0018.wav", - "speed": 1 - } - ], - "original_duration": 14.11, - "original_num_samples": 225760, - "transcript": "then the prince took the princess by the hand she was dressed in great splendour but he did not hint that she looked as he had seen pictures of his great grandmother look he thought her all the more charming for that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.81, - "num_samples": 204960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0019.wav", - "speed": 1 - } - ], - "original_duration": 12.81, - "original_num_samples": 204960, - "transcript": "the violins and haut boys played old but excellent pieces of music and after supper to lose no time the grand almoner married the royal lovers in the chapel of the castle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.675, - "num_samples": 58800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/64726/6319-64726-0020.wav", - "speed": 1 - } - ], - "original_duration": 3.675, - "original_num_samples": 58800, - "transcript": "he turned to show them the castle but behold" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.825, - "num_samples": 77200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.825, - "original_num_samples": 77200, - "transcript": "what a fuss is made about you my dear little friends" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.175, - "num_samples": 194800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0001.wav", - "speed": 1 - } - ], - "original_duration": 12.175, - "original_num_samples": 194800, - "transcript": "you surely cannot suppose that in a natural state you would be forced to climb regularly up one tall bare stick such as i see you upon now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.455, - "num_samples": 119280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.455, - "original_num_samples": 119280, - "transcript": "your cousin the wild convolvulus whom i left in the fields this morning does no such thing i assure you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.61, - "num_samples": 185760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.61, - "original_num_samples": 185760, - "transcript": "my young plants require heat or they would not live and the pots we are kept in protect us from those cruel wire worms who delight to destroy our roots" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.295, - "num_samples": 100720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.295, - "original_num_samples": 100720, - "transcript": "why not allow your silver tufts to luxuriate in a natural manner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.75, - "num_samples": 204000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0005.wav", - "speed": 1 - } - ], - "original_duration": 12.75, - "original_num_samples": 204000, - "transcript": "still the rose tree stood out that there must be some great advantages in a gardener's care for she could not pretend to be ignorant of her own superiority to all her wild relations in the woods" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.09, - "num_samples": 273440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0006.wav", - "speed": 1 - } - ], - "original_duration": 17.09, - "original_num_samples": 273440, - "transcript": "then the wind took another frolic round the garden and made up to the large white lily into whose refined ear he whispered a doubt as to the necessity or advantage of her thick powerful stem being propped up against a stupid ugly stick" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "he really grieved to see it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.445, - "num_samples": 295120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0008.wav", - "speed": 1 - } - ], - "original_duration": 18.445, - "original_num_samples": 295120, - "transcript": "did that lovely creature suppose that nature who had done so much for her that the fame of her beauty extended throughout the world had yet left her so weak and feeble that she could not support herself in the position most calculated to give her ease and pleasure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.545, - "num_samples": 72720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.545, - "original_num_samples": 72720, - "transcript": "indeed not a flower escaped his mischievous suggestions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.94, - "num_samples": 111040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.94, - "original_num_samples": 111040, - "transcript": "echoed the flowers tremulously as with a sort of fearful pleasure they awaited his approach" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.315, - "num_samples": 293040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0011.wav", - "speed": 1 - } - ], - "original_duration": 18.315, - "original_num_samples": 293040, - "transcript": "making a sort of eddying circuit round the garden he knocked over the convolvulus pole tore the strips from the stick that held up the white lily loosed all the carnation flowers from their fastenings broke the rose tree down and levelled the sweet peas to the ground" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.115, - "num_samples": 65840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.115, - "original_num_samples": 65840, - "transcript": "meanwhile how fared it with the flowers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.87, - "num_samples": 77920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.87, - "original_num_samples": 77920, - "transcript": "oh that she were once more climbing up the friendly fir pole" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.73, - "num_samples": 171680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0014.wav", - "speed": 1 - } - ], - "original_duration": 10.73, - "original_num_samples": 171680, - "transcript": "the honeysuckle escaped no better and the carnation was ready to die of vexation at finding that her coveted freedom had levelled her to the dirt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.46, - "num_samples": 119360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.46, - "original_num_samples": 119360, - "transcript": "before the day closed the gardener came whistling from his farm work to look over his pretty charges" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.98, - "num_samples": 79680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.98, - "original_num_samples": 79680, - "transcript": "but for the sight that awaited him he was not prepared at all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.41, - "num_samples": 134560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.41, - "original_num_samples": 134560, - "transcript": "weeds meanwhile sprang up and a dreary confusion reigned in the once orderly and brilliant little garden" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.755, - "num_samples": 108080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.755, - "original_num_samples": 108080, - "transcript": "the mistress had returned and the young lady was with her and hurried at once to her favourite garden" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.78, - "num_samples": 92480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.78, - "original_num_samples": 92480, - "transcript": "in this position she remained until a gentle hand was laid upon her shoulder" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.94, - "num_samples": 191040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6319/275224/6319-275224-0020.wav", - "speed": 1 - } - ], - "original_duration": 11.94, - "original_num_samples": 191040, - "transcript": "i am not thinking about the garden mamma replied the young girl without lifting up her face we can plant new flowers and tie up even some of these afresh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.37, - "num_samples": 69920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.37, - "original_num_samples": 69920, - "transcript": "grandfather came down wearing a white shirt and his sunday coat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.685, - "num_samples": 42960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.685, - "original_num_samples": 42960, - "transcript": "morning prayers were longer than usual" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.36, - "num_samples": 149760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.36, - "original_num_samples": 149760, - "transcript": "he gave thanks for our food and comfort and prayed for the poor and destitute in great cities where the struggle for life was harder than it was here with us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.375, - "num_samples": 118000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.375, - "original_num_samples": 118000, - "transcript": "because he talked so little his words had a peculiar force they were not worn dull from constant use" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.915, - "num_samples": 46640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.915, - "original_num_samples": 46640, - "transcript": "all afternoon he sat in the dining room" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.66, - "num_samples": 138560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.66, - "original_num_samples": 138560, - "transcript": "at about four o'clock a visitor appeared mister shimerda wearing his rabbit skin cap and collar and new mittens his wife had knitted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.83, - "num_samples": 141280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.83, - "original_num_samples": 141280, - "transcript": "he sat still and passive his head resting against the back of the wooden rocking chair his hands relaxed upon the arms" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.57, - "num_samples": 121120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0007.wav", - "speed": 1 - } - ], - "original_duration": 7.57, - "original_num_samples": 121120, - "transcript": "his face had a look of weariness and pleasure like that of sick people when they feel relief from pain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.5, - "num_samples": 88000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147965/1993-147965-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.5, - "original_num_samples": 88000, - "transcript": "he made the sign of the cross over me put on his cap and went off in the dark" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.42, - "num_samples": 134720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0000.wav", - "speed": 1 - } - ], - "original_duration": 8.42, - "original_num_samples": 134720, - "transcript": "they sat about the house most of the day as if it were sunday greasing their boots mending their suspenders plaiting whiplashes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.75, - "num_samples": 76000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.75, - "original_num_samples": 76000, - "transcript": "anyway he would never allow one of his horses to be put to such a strain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.33, - "num_samples": 101280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.33, - "original_num_samples": 101280, - "transcript": "i had wanted to get some picture books for yulka and antonia even yulka was able to read a little now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.685, - "num_samples": 74960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.685, - "original_num_samples": 74960, - "transcript": "she cut squares of cotton cloth and we sewed them together into a book" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.3, - "num_samples": 116800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.3, - "original_num_samples": 116800, - "transcript": "on the white pages i grouped sunday school cards and advertising cards which i had brought from my old country" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.515, - "num_samples": 168240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.515, - "original_num_samples": 168240, - "transcript": "when he mounted his horse at the door i saw that he had a hatchet slung to his belt and he gave grandmother a meaning look which told me he was planning a surprise for me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.47, - "num_samples": 55520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.47, - "original_num_samples": 55520, - "transcript": "i put on my cap and ran out to meet jake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.865, - "num_samples": 109840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.865, - "original_num_samples": 109840, - "transcript": "he used to help my father cut christmas trees for me in virginia and he had not forgotten how much i liked them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.04, - "num_samples": 112640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0008.wav", - "speed": 1 - } - ], - "original_duration": 7.04, - "original_num_samples": 112640, - "transcript": "by the time we had placed the cold fresh smelling little tree in a corner of the sitting room it was already christmas eve" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.97, - "num_samples": 143520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.97, - "original_num_samples": 143520, - "transcript": "from under the lining he now produced a collection of brilliantly colored paper figures several inches high and stiff enough to stand alone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.565, - "num_samples": 329040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147964/1993-147964-0010.wav", - "speed": 1 - } - ], - "original_duration": 20.565, - "original_num_samples": 329040, - "transcript": "i can see them now exactly as they looked working about the table in the lamplight jake with his heavy features so rudely moulded that his face seemed somehow unfinished otto with his half ear and the savage scar that made his upper lip curl so ferociously under his twisted mustache" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.715, - "num_samples": 107440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.715, - "original_num_samples": 107440, - "transcript": "how infinite the wealth of love and hope garnered in these same tiny treasure houses and oh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.535, - "num_samples": 152560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.535, - "original_num_samples": 152560, - "transcript": "what bankrupts in the world we feel when death like some remorseless creditor seizes on all we fondly thought our own the twins" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.76, - "num_samples": 92160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.76, - "original_num_samples": 92160, - "transcript": "the ghoul like fever was not to be braved with impunity and baulked of its prey" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.3, - "num_samples": 228800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0003.wav", - "speed": 1 - } - ], - "original_duration": 14.3, - "original_num_samples": 228800, - "transcript": "the board not so formidable as she had imagined had inquired into her case and instead of sending her to stoke claypole her husband's buckinghamshire parish as she had dreaded had agreed to pay her rent" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.4, - "num_samples": 118400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.4, - "original_num_samples": 118400, - "transcript": "margaret met jem wilson several days after his brothers were seriously ill and heard from him the state of things at his home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.98, - "num_samples": 127680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.98, - "original_num_samples": 127680, - "transcript": "she stopped with her hand on the latch of the wilsons door to still her beating heart and listened to the hushed quiet within" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 27.88, - "num_samples": 446080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0006.wav", - "speed": 1 - } - ], - "original_duration": 27.88, - "original_num_samples": 446080, - "transcript": "she opened the door softly there sat missus wilson in the old rocking chair with one sick death like boy lying on her knee crying without let or pause but softly gently as fearing to disturb the troubled gasping child while behind her old alice let her fast dropping tears fall down on the dead body of the other twin which she was laying out on a board placed on a sort of sofa settee in a corner of the room" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.91, - "num_samples": 126560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0007.wav", - "speed": 1 - } - ], - "original_duration": 7.91, - "original_num_samples": 126560, - "transcript": "over the child which yet breathed the father bent watching anxiously for some ground of hope where hope there was none" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.39, - "num_samples": 54240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.39, - "original_num_samples": 54240, - "transcript": "is there any chance for the other one think you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.28, - "num_samples": 276480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0009.wav", - "speed": 1 - } - ], - "original_duration": 17.28, - "original_num_samples": 276480, - "transcript": "but earnest as the father was in watching the yet living he had eyes and ears for all that concerned the dead and sprang gently up and took his dead son on his hard couch in his arms with tender strength and carried him upstairs as if afraid of wakening him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "wishing him said mary in a tone of inquiry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.12, - "num_samples": 49920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.12, - "original_num_samples": 49920, - "transcript": "then the mother lifted up her voice and wept" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.28, - "num_samples": 84480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.28, - "original_num_samples": 84480, - "transcript": "her cries brought her husband down to try with his aching heart to comfort hers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.075, - "num_samples": 81200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.075, - "original_num_samples": 81200, - "transcript": "mary and alice drew near the fire and stood in quiet sorrow for some time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.055, - "num_samples": 48880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.055, - "original_num_samples": 48880, - "transcript": "then alice broke the silence by saying" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.260063, - "num_samples": 132161, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.260063, - "original_num_samples": 132161, - "transcript": "afore christmas time i was as full as full could be of going home for good and all yo han heard how i've wished it this terrible long time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.845, - "num_samples": 93520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.845, - "original_num_samples": 93520, - "transcript": "but he stayed long there and at last his sturdy frame shook with his strong agony" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.78, - "num_samples": 60480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.78, - "original_num_samples": 60480, - "transcript": "oh jem don't give way so i cannot bear to see you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.29, - "num_samples": 212640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0018.wav", - "speed": 1 - } - ], - "original_duration": 13.29, - "original_num_samples": 212640, - "transcript": "he did not speak as though fearing to destroy by sound or motion the happiness of that moment when her soft hand's touch thrilled through his frame and her silvery voice was whispering tenderness in his ear" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.96, - "num_samples": 111360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.96, - "original_num_samples": 111360, - "transcript": "don't jem please don't whispered she again believing that his silence was only another form of grief" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.98, - "num_samples": 271680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0020.wav", - "speed": 1 - } - ], - "original_duration": 16.98, - "original_num_samples": 271680, - "transcript": "mary i almost loathe myself when i feel i would not give up this minute when my brothers lie dead and father and mother are in such trouble for all my life that's past and gone and mary as she tried to release her hand you know what makes me feel so blessed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.49, - "num_samples": 167840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0021.wav", - "speed": 1 - } - ], - "original_duration": 10.49, - "original_num_samples": 167840, - "transcript": "he remained up stairs until after the early dawn showed mary that she need have no fear of going home through the deserted and quiet streets to try and get a little sleep before work hour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.54, - "num_samples": 264640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0022.wav", - "speed": 1 - } - ], - "original_duration": 16.54, - "original_num_samples": 264640, - "transcript": "so leaving kind messages to george and jane wilson and hesitating whether she might dare to send a few kind words to jem and deciding that she had better not she stepped out into the bright morning light so fresh a contrast to the darkened room where death had been" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.7, - "num_samples": 171200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0023.wav", - "speed": 1 - } - ], - "original_duration": 10.7, - "original_num_samples": 171200, - "transcript": "her thoughts ran on jem's manner and words not but what she had known the tale they told for many a day but still she wished he had not put it so plainly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.69, - "num_samples": 187040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0024.wav", - "speed": 1 - } - ], - "original_duration": 11.69, - "original_num_samples": 187040, - "transcript": "i cannot think what possesses me that i must always be wanting to comfort him when he's downcast and that i must go meddling wi him to night when sure enough it was his aunt's place to speak to him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.645, - "num_samples": 154320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0025.wav", - "speed": 1 - } - ], - "original_duration": 9.645, - "original_num_samples": 154320, - "transcript": "i think i cannot go right for i either check myself till i'm downright cross to him or else i speak just natural and that's too kind and tender by half" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.255, - "num_samples": 36080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.255, - "original_num_samples": 36080, - "transcript": "but will he thank me for it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.9, - "num_samples": 270400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0027.wav", - "speed": 1 - } - ], - "original_duration": 16.9, - "original_num_samples": 270400, - "transcript": "there was something of keen practical shrewdness about her which contrasted very bewitchingly with the simple foolish unworldly ideas she had picked up from the romances which miss simmonds young ladies were in the habit of recommending to each other yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.17, - "num_samples": 162720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0028.wav", - "speed": 1 - } - ], - "original_duration": 10.17, - "original_num_samples": 162720, - "transcript": "the old leaven infused years ago by her aunt esther fermented in her little bosom and perhaps all the more for her father's aversion to the rich and the gentle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.665, - "num_samples": 186640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0029.wav", - "speed": 1 - } - ], - "original_duration": 11.665, - "original_num_samples": 186640, - "transcript": "it was a comfort to her when scolded by miss simmonds to think of the day when she would drive up to the door in her own carriage to order her gowns from the hasty tempered yet kind dressmaker" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.52, - "num_samples": 248320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147149/1993-147149-0030.wav", - "speed": 1 - } - ], - "original_duration": 15.52, - "original_num_samples": 248320, - "transcript": "but the best of her plans the holiest that which in some measure redeemed the vanity of the rest were those relating to her father her dear father now oppressed with care and always a disheartened gloomy person" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.135, - "num_samples": 178160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.135, - "original_num_samples": 178160, - "transcript": "the week following christmas brought in a thaw and by new year's day all the world about us was a broth of gray slush and the guttered slope between the windmill and the barn was running black water" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.565, - "num_samples": 201040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0001.wav", - "speed": 1 - } - ], - "original_duration": 12.565, - "original_num_samples": 201040, - "transcript": "it was the first time missus shimerda had been to our house and she ran about examining our carpets and curtains and furniture all the while commenting upon them to her daughter in an envious complaining tone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.185, - "num_samples": 66960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.185, - "original_num_samples": 66960, - "transcript": "your mama i said angrily wants other people's things" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.595, - "num_samples": 41520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.595, - "original_num_samples": 41520, - "transcript": "for ambrosch my mama come here" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.865, - "num_samples": 77840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.865, - "original_num_samples": 77840, - "transcript": "but you see a body never knows what traits poverty might bring out in em" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.345, - "num_samples": 53520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.345, - "original_num_samples": 53520, - "transcript": "they began to laugh boisterously when they saw me calling" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.29, - "num_samples": 52640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1993/147966/1993-147966-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.29, - "original_num_samples": 52640, - "transcript": "you've got a birthday present this time jim and no mistake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.195, - "num_samples": 35120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.195, - "original_num_samples": 35120, - "transcript": "kirkleatham yeast" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.135, - "num_samples": 34160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.135, - "original_num_samples": 34160, - "transcript": "seventeen seventeen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.915, - "num_samples": 46640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.915, - "original_num_samples": 46640, - "transcript": "to make good home made bread" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.075, - "num_samples": 33200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.075, - "original_num_samples": 33200, - "transcript": "seventeen eighteen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.175, - "num_samples": 370800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0004.wav", - "speed": 1 - } - ], - "original_duration": 23.175, - "original_num_samples": 370800, - "transcript": "mode put the flour into a large earthenware bowl or deep pan then with a strong metal or wooden spoon hollow out the middle but do not clear it entirely away from the bottom of the pan as in that case the sponge or leaven as it was formerly termed would stick to it which it ought not to do" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 31.375, - "num_samples": 502000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0005.wav", - "speed": 1 - } - ], - "original_duration": 31.375, - "original_num_samples": 502000, - "transcript": "next take either a large tablespoonful of brewer's yeast which has been rendered solid by mixing it with plenty of cold water and letting it afterwards stand to settle for a day and night or nearly an ounce of german yeast put it into a large basin and proceed to mix it so that it shall be as smooth as cream with three quarters pint of warm milk and water or with water only though even a very little milk will much improve the bread" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.645, - "num_samples": 234320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0006.wav", - "speed": 1 - } - ], - "original_duration": 14.645, - "original_num_samples": 234320, - "transcript": "look at it from time to time when it has been laid for nearly an hour and when the yeast has risen and broken through the flour so that bubbles appear in it you will know that it is ready to be made up into dough" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.74, - "num_samples": 331840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0007.wav", - "speed": 1 - } - ], - "original_duration": 20.74, - "original_num_samples": 331840, - "transcript": "then place the pan on a strong chair or dresser or table of convenient height pour into the sponge the remainder of the warm milk and water stir into it as much of the flour as you can with the spoon then wipe it out clean with your fingers and lay it aside" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.38, - "num_samples": 310080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0008.wav", - "speed": 1 - } - ], - "original_duration": 19.38, - "original_num_samples": 310080, - "transcript": "turn it then on to a paste board or very clean dresser and with a large sharp knife divide it in two make it up quickly into loaves and dispatch it to the oven make one or two incisions across the tops of the loaves as they will rise more easily if this be done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.435, - "num_samples": 54960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.435, - "original_num_samples": 54960, - "transcript": "illustration italian millet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.41, - "num_samples": 230560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0010.wav", - "speed": 1 - } - ], - "original_duration": 14.41, - "original_num_samples": 230560, - "transcript": "italian millet or great indian millet is cultivated in egypt and nubia where it is called dhourra and is used as human food as well as for the fermentation of beer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.53, - "num_samples": 72480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.53, - "original_num_samples": 72480, - "transcript": "it will grow on poor soils and is extremely productive" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.24, - "num_samples": 211840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0012.wav", - "speed": 1 - } - ], - "original_duration": 13.24, - "original_num_samples": 211840, - "transcript": "it has been introduced into italy where they make a coarse bread from it and it is also employed in pastry and puddings they also use it for feeding horses and domestic fowls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.855, - "num_samples": 157680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0013.wav", - "speed": 1 - } - ], - "original_duration": 9.855, - "original_num_samples": 157680, - "transcript": "a yellow variety called golden millet is sold in the grocers shops for making puddings and is very delicate and wholesome" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.2, - "num_samples": 131200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.2, - "original_num_samples": 131200, - "transcript": "another advantage the red wheats possess is their comparative immunity from the attacks of mildew and fly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.84, - "num_samples": 157440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0015.wav", - "speed": 1 - } - ], - "original_duration": 9.84, - "original_num_samples": 157440, - "transcript": "mode boil the rice in water until it is quite tender pour off the water and put the rice before it is cold to the flour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.53, - "num_samples": 56480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.53, - "original_num_samples": 56480, - "transcript": "illustration maize plant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.485, - "num_samples": 103760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.485, - "original_num_samples": 103760, - "transcript": "maize next to wheat and rice maize is the grain most used in the nourishment of man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.33, - "num_samples": 85280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.33, - "original_num_samples": 85280, - "transcript": "if carried any distance it should be stored away in air tight vessels" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.405, - "num_samples": 150480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0019.wav", - "speed": 1 - } - ], - "original_duration": 9.405, - "original_num_samples": 150480, - "transcript": "some of the preparations of maize flour are very good and when partaken in moderation suitable food for almost everybody" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.42, - "num_samples": 150720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0020.wav", - "speed": 1 - } - ], - "original_duration": 9.42, - "original_num_samples": 150720, - "transcript": "mode let the tartaric acid and salt be reduced to the finest possible powder then mix them well with the flour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.14, - "num_samples": 98240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0021.wav", - "speed": 1 - } - ], - "original_duration": 6.14, - "original_num_samples": 98240, - "transcript": "sour milk or buttermilk may be used but then a little less acid will be needed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.3, - "num_samples": 36800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.3, - "original_num_samples": 36800, - "transcript": "excellent rolls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.75, - "num_samples": 28000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0023.wav", - "speed": 1 - } - ], - "original_duration": 1.75, - "original_num_samples": 28000, - "transcript": "hot rolls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.21, - "num_samples": 35360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.21, - "original_num_samples": 35360, - "transcript": "seventeen twenty four" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 29.06, - "num_samples": 464960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0025.wav", - "speed": 1 - } - ], - "original_duration": 29.06, - "original_num_samples": 464960, - "transcript": "when they are quite hot divide them lengthwise into three put some thin flakes of good butter between the slices press the rolls together and put them in the oven for a minute or two but not longer or the butter would oil take them out of the oven spread the butter equally over divide the rolls in half and put them on to a very hot clean dish and send them instantly to table" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.475, - "num_samples": 39600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.475, - "original_num_samples": 39600, - "transcript": "to make dry toast" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.73, - "num_samples": 139680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0027.wav", - "speed": 1 - } - ], - "original_duration": 8.73, - "original_num_samples": 139680, - "transcript": "never use new bread for making any kind of toast as it eats heavy and besides is very extravagant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.425, - "num_samples": 182800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0028.wav", - "speed": 1 - } - ], - "original_duration": 11.425, - "original_num_samples": 182800, - "transcript": "move it backwards and forwards until the bread is nicely coloured then turn it and toast the other side and do not place it so near the fire that it blackens" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.87, - "num_samples": 77920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0029.wav", - "speed": 1 - } - ], - "original_duration": 4.87, - "original_num_samples": 77920, - "transcript": "to make hot buttered toast seventeen twenty six" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.04, - "num_samples": 192640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0030.wav", - "speed": 1 - } - ], - "original_duration": 12.04, - "original_num_samples": 192640, - "transcript": "a loaf of household bread about two days old answers for making toast better than cottage bread the latter not being a good shape and too crusty for the purpose" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.74, - "num_samples": 267840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0031.wav", - "speed": 1 - } - ], - "original_duration": 16.74, - "original_num_samples": 267840, - "transcript": "cut as many nice even slices as may be required rather more than one quarter inch in thickness and toast them before a very bright fire without allowing the bread to blacken which spoils the appearance and flavour of all toast" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.94, - "num_samples": 207040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0032.wav", - "speed": 1 - } - ], - "original_duration": 12.94, - "original_num_samples": 207040, - "transcript": "soyer recommends that each slice should be cut into pieces as soon as it is buttered and when all are ready that they should be piled lightly on the dish they are intended to be served on" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.385, - "num_samples": 182160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0033.wav", - "speed": 1 - } - ], - "original_duration": 11.385, - "original_num_samples": 182160, - "transcript": "he says that by cutting through four or five slices at a time all the butter is squeezed out of the upper ones while the bottom one is swimming in fat liquid" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.755, - "num_samples": 156080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0034.wav", - "speed": 1 - } - ], - "original_duration": 9.755, - "original_num_samples": 156080, - "transcript": "muffins and crumpets should always be served on separate dishes and both toasted and served as expeditiously as possible" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.22, - "num_samples": 67520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0035.wav", - "speed": 1 - } - ], - "original_duration": 4.22, - "original_num_samples": 67520, - "transcript": "sufficient allow two crumpets to each person" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.72, - "num_samples": 59520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0036.wav", - "speed": 1 - } - ], - "original_duration": 3.72, - "original_num_samples": 59520, - "transcript": "plain buns seventeen twenty nine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.87, - "num_samples": 253920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0037.wav", - "speed": 1 - } - ], - "original_duration": 15.87, - "original_num_samples": 253920, - "transcript": "mode put the flour into a basin mix the sugar well with it make a hole in the centre and stir in the yeast and milk which should be lukewarm with enough of the flour to make it the thickness of cream" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.52, - "num_samples": 72320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0038.wav", - "speed": 1 - } - ], - "original_duration": 4.52, - "original_num_samples": 72320, - "transcript": "from fifteen to twenty minutes will be required to bake them nicely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.95, - "num_samples": 335200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0039.wav", - "speed": 1 - } - ], - "original_duration": 20.95, - "original_num_samples": 335200, - "transcript": "these buns may be varied by adding a few currants candied peel or caraway seeds to the other ingredients and the above mixture answers for hot cross buns by putting in a little ground allspice and by pressing a tin mould in the form of a cross in the centre of the bun" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.015, - "num_samples": 112240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0040.wav", - "speed": 1 - } - ], - "original_duration": 7.015, - "original_num_samples": 112240, - "transcript": "sufficient to make twelve buns seasonable at any time light buns" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.66, - "num_samples": 42560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0041.wav", - "speed": 1 - } - ], - "original_duration": 2.66, - "original_num_samples": 42560, - "transcript": "illustration buns" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.23, - "num_samples": 67680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0042.wav", - "speed": 1 - } - ], - "original_duration": 4.23, - "original_num_samples": 67680, - "transcript": "victoria buns seventeen thirty two" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 21.004938, - "num_samples": 336079, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0043.wav", - "speed": 1 - } - ], - "original_duration": 21.004938, - "original_num_samples": 336079, - "transcript": "mode whisk the egg stir in the sugar and beat these ingredients well together beat the butter to a cream stir in the ground rice currants and candied peel and as much flour as will make it of such a consistency that it may be rolled into seven or eight balls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.325, - "num_samples": 37200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0044.wav", - "speed": 1 - } - ], - "original_duration": 2.325, - "original_num_samples": 37200, - "transcript": "italian rusks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.135, - "num_samples": 114160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0045.wav", - "speed": 1 - } - ], - "original_duration": 7.135, - "original_num_samples": 114160, - "transcript": "they should be kept in a closed tin canister in a dry place to preserve their crispness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.355, - "num_samples": 85680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0046.wav", - "speed": 1 - } - ], - "original_duration": 5.355, - "original_num_samples": 85680, - "transcript": "it is not cultivated in england being principally confined to the east" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.46, - "num_samples": 183360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0047.wav", - "speed": 1 - } - ], - "original_duration": 11.46, - "original_num_samples": 183360, - "transcript": "when we take into account that the arabians are fond of lizards and locusts as articles of food their cuisine altogether is scarcely a tempting one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.2, - "num_samples": 35200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0048.wav", - "speed": 1 - } - ], - "original_duration": 2.2, - "original_num_samples": 35200, - "transcript": "seventeen thirty four" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.87, - "num_samples": 45920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0049.wav", - "speed": 1 - } - ], - "original_duration": 2.87, - "original_num_samples": 45920, - "transcript": "illustration rusks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.725, - "num_samples": 123600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0050.wav", - "speed": 1 - } - ], - "original_duration": 7.725, - "original_num_samples": 123600, - "transcript": "mode put the milk and butter into a saucepan and keep shaking it round until the latter is melted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.36, - "num_samples": 165760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2078/142845/2078-142845-0051.wav", - "speed": 1 - } - ], - "original_duration": 10.36, - "original_num_samples": 165760, - "transcript": "when cold they should be put into tin canisters to keep them dry and if intended for the cheese course the sifted sugar should be omitted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.855, - "num_samples": 93680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0000.wav", - "speed": 1 - } - ], - "original_duration": 5.855, - "original_num_samples": 93680, - "transcript": "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.815, - "num_samples": 77040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.815, - "original_num_samples": 77040, - "transcript": "nor is mister quilter's manner less interesting than his matter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.485, - "num_samples": 199760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0002.wav", - "speed": 1 - } - ], - "original_duration": 12.485, - "original_num_samples": 199760, - "transcript": "he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.9, - "num_samples": 158400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.9, - "original_num_samples": 158400, - "transcript": "he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 29.4, - "num_samples": 470400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0004.wav", - "speed": 1 - } - ], - "original_duration": 29.4, - "original_num_samples": 470400, - "transcript": "linnell's pictures are a sort of up guards and at em paintings and mason's exquisite idylls are as national as a jingo poem mister birket foster's landscapes smile at one much in the same way that mister carker used to flash his teeth and mister john collier gives his sitter a cheerful slap on the back before he says like a shampooer in a turkish bath next man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.01, - "num_samples": 144160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.01, - "original_num_samples": 144160, - "transcript": "it is obviously unnecessary for us to point out how luminous these criticisms are how delicate in expression" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.64, - "num_samples": 90240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.64, - "original_num_samples": 90240, - "transcript": "on the general principles of art mister quilter writes with equal lucidity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.24, - "num_samples": 147840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.24, - "original_num_samples": 147840, - "transcript": "painting he tells us is of a different quality to mathematics and finish in art is adding more fact" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.12, - "num_samples": 81920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.12, - "original_num_samples": 81920, - "transcript": "as for etchings they are of two kinds british and foreign" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.29, - "num_samples": 292640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0009.wav", - "speed": 1 - } - ], - "original_duration": 18.29, - "original_num_samples": 292640, - "transcript": "he laments most bitterly the divorce that has been made between decorative art and what we usually call pictures makes the customary appeal to the last judgment and reminds us that in the great days of art michael angelo was the furnishing upholsterer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.6, - "num_samples": 89600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0010.wav", - "speed": 1 - } - ], - "original_duration": 5.6, - "original_num_samples": 89600, - "transcript": "near the fire and the ornaments fred brought home from india on the mantel board" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.115, - "num_samples": 241840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0011.wav", - "speed": 1 - } - ], - "original_duration": 15.115, - "original_num_samples": 241840, - "transcript": "in fact he is quite severe on mister ruskin for not recognising that a picture should denote the frailty of man and remarks with pleasing courtesy and felicitous grace that many phases of feeling" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.38, - "num_samples": 86080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.38, - "original_num_samples": 86080, - "transcript": "only unfortunately his own work never does get good" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.1, - "num_samples": 113600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0013.wav", - "speed": 1 - } - ], - "original_duration": 7.1, - "original_num_samples": 113600, - "transcript": "mister quilter has missed his chance for he has failed even to make himself the tupper of painting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.245, - "num_samples": 35920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/128104/1272-128104-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.245, - "original_num_samples": 35920, - "transcript": "by harry quilter m a" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.885, - "num_samples": 174160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.885, - "original_num_samples": 174160, - "transcript": "because you were sleeping instead of conquering the lovely rose princess has become a fiddle without a bow while poor shaggy sits there a cooing dove" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.13, - "num_samples": 178080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.13, - "original_num_samples": 178080, - "transcript": "he has gone and gone for good answered polychrome who had managed to squeeze into the room beside the dragon and had witnessed the occurrences with much interest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.475, - "num_samples": 183600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0002.wav", - "speed": 1 - } - ], - "original_duration": 11.475, - "original_num_samples": 183600, - "transcript": "i have remained a prisoner only because i wished to be one and with this he stepped forward and burst the stout chains as easily as if they had been threads" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.755, - "num_samples": 76080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.755, - "original_num_samples": 76080, - "transcript": "the little girl had been asleep but she heard the raps and opened the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.225, - "num_samples": 67600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.225, - "original_num_samples": 67600, - "transcript": "the king has fled in disgrace and your friends are asking for you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.625, - "num_samples": 74000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.625, - "original_num_samples": 74000, - "transcript": "i begged ruggedo long ago to send him away but he would not do so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.09, - "num_samples": 65440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.09, - "original_num_samples": 65440, - "transcript": "i also offered to help your brother to escape but he would not go" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.055, - "num_samples": 64880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.055, - "original_num_samples": 64880, - "transcript": "he eats and sleeps very steadily replied the new king" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.67, - "num_samples": 58720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.67, - "original_num_samples": 58720, - "transcript": "i hope he doesn't work too hard said shaggy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.91, - "num_samples": 30560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0009.wav", - "speed": 1 - } - ], - "original_duration": 1.91, - "original_num_samples": 30560, - "transcript": "he doesn't work at all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.995, - "num_samples": 143920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.995, - "original_num_samples": 143920, - "transcript": "in fact there is nothing he can do in these dominions as well as our nomes whose numbers are so great that it worries us to keep them all busy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.18, - "num_samples": 50880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.18, - "original_num_samples": 50880, - "transcript": "not exactly returned kaliko" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.04, - "num_samples": 32640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.04, - "original_num_samples": 32640, - "transcript": "where is my brother now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "inquired shaggy in the metal forest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.74, - "num_samples": 27840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0014.wav", - "speed": 1 - } - ], - "original_duration": 1.74, - "original_num_samples": 27840, - "transcript": "where is that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.375, - "num_samples": 118000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.375, - "original_num_samples": 118000, - "transcript": "the metal forest is in the great domed cavern the largest in all our dominions replied kaliko" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.28, - "num_samples": 36480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.28, - "original_num_samples": 36480, - "transcript": "kaliko hesitated" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.86, - "num_samples": 109760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.86, - "original_num_samples": 109760, - "transcript": "however if we look sharp we may be able to discover one of these secret ways" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.444937, - "num_samples": 39119, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.444937, - "original_num_samples": 39119, - "transcript": "oh no i'm quite sure he didn't" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.525, - "num_samples": 56400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.525, - "original_num_samples": 56400, - "transcript": "that's funny remarked betsy thoughtfully" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.65, - "num_samples": 74400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.65, - "original_num_samples": 74400, - "transcript": "i don't believe ann knew any magic or she'd have worked it before" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.595, - "num_samples": 41520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.595, - "original_num_samples": 41520, - "transcript": "i do not know confessed shaggy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.695, - "num_samples": 43120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0022.wav", - "speed": 1 - } - ], - "original_duration": 2.695, - "original_num_samples": 43120, - "transcript": "true agreed kaliko" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.52, - "num_samples": 120320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.52, - "original_num_samples": 120320, - "transcript": "kaliko went to the big gong and pounded on it just as ruggedo used to do but no one answered the summons" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.47, - "num_samples": 231520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/135031/1272-135031-0024.wav", - "speed": 1 - } - ], - "original_duration": 14.47, - "original_num_samples": 231520, - "transcript": "having returned to the royal cavern kaliko first pounded the gong and then sat in the throne wearing ruggedo's discarded ruby crown and holding in his hand the sceptre which ruggedo had so often thrown at his head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.65, - "num_samples": 74400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.65, - "original_num_samples": 74400, - "transcript": "a man said to the universe sir i exist" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.535, - "num_samples": 104560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.535, - "original_num_samples": 104560, - "transcript": "sweat covered brion's body trickling into the tight loincloth that was the only garment he wore" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.335, - "num_samples": 213360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0002.wav", - "speed": 1 - } - ], - "original_duration": 13.335, - "original_num_samples": 213360, - "transcript": "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.42, - "num_samples": 86720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.42, - "original_num_samples": 86720, - "transcript": "his instant of panic was followed by a small sharp blow high on his chest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.09, - "num_samples": 81440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.09, - "original_num_samples": 81440, - "transcript": "one minute a voice said and the time buzzer sounded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.81, - "num_samples": 92960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.81, - "original_num_samples": 92960, - "transcript": "a minute is not a very large measure of time and his body needed every fraction of it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.93, - "num_samples": 78880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.93, - "original_num_samples": 78880, - "transcript": "the buzzer's whirr triggered his muscles into complete relaxation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.735, - "num_samples": 75760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.735, - "original_num_samples": 75760, - "transcript": "only his heart and lungs worked on at a strong measured rate" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.595, - "num_samples": 73520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.595, - "original_num_samples": 73520, - "transcript": "he was in reverie sliding along the borders of consciousness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.62, - "num_samples": 137920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.62, - "original_num_samples": 137920, - "transcript": "the contestants in the twenties needed undisturbed rest therefore nights in the dormitories were as quiet as death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.76, - "num_samples": 172160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.76, - "original_num_samples": 172160, - "transcript": "particularly so on this last night when only two of the little cubicles were occupied the thousands of others standing with dark empty doors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.015, - "num_samples": 80240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.015, - "original_num_samples": 80240, - "transcript": "the other voice snapped with a harsh urgency clearly used to command" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.74, - "num_samples": 123840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0012.wav", - "speed": 1 - } - ], - "original_duration": 7.74, - "original_num_samples": 123840, - "transcript": "i'm here because the matter is of utmost importance and brandd is the one i must see now stand aside" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.639938, - "num_samples": 26239, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0013.wav", - "speed": 1 - } - ], - "original_duration": 1.639938, - "original_num_samples": 26239, - "transcript": "the twenties" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.305, - "num_samples": 116880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.305, - "original_num_samples": 116880, - "transcript": "he must have drawn his gun because the intruder said quickly put that away you're being a fool out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.68, - "num_samples": 90880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.68, - "original_num_samples": 90880, - "transcript": "there was silence then and still wondering brion was once more asleep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.85, - "num_samples": 29600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0016.wav", - "speed": 1 - } - ], - "original_duration": 1.85, - "original_num_samples": 29600, - "transcript": "ten seconds" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.735, - "num_samples": 59760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.735, - "original_num_samples": 59760, - "transcript": "he asked the handler who was kneading his aching muscles" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.0, - "num_samples": 96000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.0, - "original_num_samples": 96000, - "transcript": "a red haired mountain of a man with an apparently inexhaustible store of energy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.7, - "num_samples": 75200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.7, - "original_num_samples": 75200, - "transcript": "there could be little art in this last and final round of fencing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.32, - "num_samples": 69120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.32, - "original_num_samples": 69120, - "transcript": "just thrust and parry and victory to the stronger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.65, - "num_samples": 74400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.65, - "original_num_samples": 74400, - "transcript": "every man who entered the twenties had his own training tricks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.35, - "num_samples": 133600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0022.wav", - "speed": 1 - } - ], - "original_duration": 8.35, - "original_num_samples": 133600, - "transcript": "there appeared to be an immediate association with the death trauma as if the two were inextricably linked into one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.66, - "num_samples": 138560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0023.wav", - "speed": 1 - } - ], - "original_duration": 8.66, - "original_num_samples": 138560, - "transcript": "the strength that enables someone in a trance to hold his body stiff and unsupported except at two points the head and heels" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.64, - "num_samples": 58240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.64, - "original_num_samples": 58240, - "transcript": "this is physically impossible when conscious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.725, - "num_samples": 139600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0025.wav", - "speed": 1 - } - ], - "original_duration": 8.725, - "original_num_samples": 139600, - "transcript": "others had died before during the twenties and death during the last round was in some ways easier than defeat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.635, - "num_samples": 106160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0026.wav", - "speed": 1 - } - ], - "original_duration": 6.635, - "original_num_samples": 106160, - "transcript": "breathing deeply brion softly spoke the auto hypnotic phrases that triggered the process" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.57, - "num_samples": 105120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0027.wav", - "speed": 1 - } - ], - "original_duration": 6.57, - "original_num_samples": 105120, - "transcript": "when the buzzer sounded he pulled his foil from his second's startled grasp and ran forward" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.65, - "num_samples": 90400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0028.wav", - "speed": 1 - } - ], - "original_duration": 5.65, - "original_num_samples": 90400, - "transcript": "irolg looked amazed at the sudden fury of the attack then smiled" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.88, - "num_samples": 110080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0029.wav", - "speed": 1 - } - ], - "original_duration": 6.88, - "original_num_samples": 110080, - "transcript": "he thought it was a last burst of energy he knew how close they both were to exhaustion" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.13, - "num_samples": 114080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0030.wav", - "speed": 1 - } - ], - "original_duration": 7.13, - "original_num_samples": 114080, - "transcript": "brion saw something close to panic on his opponent's face when the man finally recognized his error" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.115, - "num_samples": 113840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0031.wav", - "speed": 1 - } - ], - "original_duration": 7.115, - "original_num_samples": 113840, - "transcript": "a wave of despair rolled out from irolg brion sensed it and knew the fifth point was his" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.48, - "num_samples": 71680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1272/141231/1272-141231-0032.wav", - "speed": 1 - } - ], - "original_duration": 4.48, - "original_num_samples": 71680, - "transcript": "then the powerful twist that thrust it aside in and under the guard" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.02, - "num_samples": 64320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.02, - "original_num_samples": 64320, - "transcript": "forgotten too the name of gillian the lovely captive" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.86, - "num_samples": 253760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0001.wav", - "speed": 1 - } - ], - "original_duration": 15.86, - "original_num_samples": 253760, - "transcript": "worse and worse he is even presumed to be the captive's sweetheart who wheedles the flower the ring and the prison key out of the strict virgins for his own purposes and flies with her at last in his shallop across the sea to live with her happily ever after" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.145, - "num_samples": 34320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.145, - "original_num_samples": 34320, - "transcript": "but this is a fallacy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.34, - "num_samples": 53440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.34, - "original_num_samples": 53440, - "transcript": "the wandering singer approaches them with his lute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.06, - "num_samples": 32960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.06, - "original_num_samples": 32960, - "transcript": "the emperor's daughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.705, - "num_samples": 91280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.705, - "original_num_samples": 91280, - "transcript": "lady lady my rose white lady but will you not hear a roundel lady" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.215, - "num_samples": 83440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.215, - "original_num_samples": 83440, - "transcript": "o if you play us a roundel singer how can that harm the emperor's daughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.855, - "num_samples": 173680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.855, - "original_num_samples": 173680, - "transcript": "she would not speak though we danced a week with her thoughts a thousand leagues over the water singer singer wandering singer o my honey sweet singer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.31, - "num_samples": 260960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0008.wav", - "speed": 1 - } - ], - "original_duration": 16.31, - "original_num_samples": 260960, - "transcript": "but if i play you a roundel lady get me a gift from the emperor's daughter her finger ring for my finger bring though she's pledged a thousand leagues over the water lady lady my fair lady o my rose white lady" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.04, - "num_samples": 32640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.04, - "original_num_samples": 32640, - "transcript": "the wandering singer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.91, - "num_samples": 238560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0010.wav", - "speed": 1 - } - ], - "original_duration": 14.91, - "original_num_samples": 238560, - "transcript": "but i did once have the luck to hear and see the lady played in entirety the children had been granted leave to play just one more game before bed time and of course they chose the longest and played it without missing a syllable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.52, - "num_samples": 152320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.52, - "original_num_samples": 152320, - "transcript": "the ladies in yellow dresses stand again in a ring about the emperor's daughter and are for the last time accosted by the singer with his lute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.07, - "num_samples": 33120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.07, - "original_num_samples": 33120, - "transcript": "the wandering singer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.615, - "num_samples": 169840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0013.wav", - "speed": 1 - } - ], - "original_duration": 10.615, - "original_num_samples": 169840, - "transcript": "i'll play for you now neath the apple bough and you shall dream on the lawn so shady lady lady my fair lady o my apple gold lady" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.8, - "num_samples": 28800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0014.wav", - "speed": 1 - } - ], - "original_duration": 1.8, - "original_num_samples": 28800, - "transcript": "the ladies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.76, - "num_samples": 268160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0015.wav", - "speed": 1 - } - ], - "original_duration": 16.76, - "original_num_samples": 268160, - "transcript": "now you may play a serena singer a dream of night for an apple gold lady for the fruit is now on the apple bough and the moon is up and the lawn is shady singer singer wandering singer o my honey sweet singer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.455, - "num_samples": 215280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0016.wav", - "speed": 1 - } - ], - "original_duration": 13.455, - "original_num_samples": 215280, - "transcript": "once more the singer plays and the ladies dance but one by one they fall asleep to the drowsy music and then the singer steps into the ring and unlocks the tower and kisses the emperor's daughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.825, - "num_samples": 45200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.825, - "original_num_samples": 45200, - "transcript": "i don't know what becomes of the ladies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.12, - "num_samples": 33920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.12, - "original_num_samples": 33920, - "transcript": "bed time children" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.26, - "num_samples": 68160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/50561/174-50561-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.26, - "original_num_samples": 68160, - "transcript": "you see the treatment is a trifle fanciful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.4, - "num_samples": 38400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.4, - "original_num_samples": 38400, - "transcript": "how we must simplify" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.33, - "num_samples": 293280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0001.wav", - "speed": 1 - } - ], - "original_duration": 18.33, - "original_num_samples": 293280, - "transcript": "it seems to me more and more as i live longer that most poetry and most literature and particularly the literature of the past is discordant with the vastness and variety the reserves and resources and recuperations of life as we live it to day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.2, - "num_samples": 195200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0002.wav", - "speed": 1 - } - ], - "original_duration": 12.2, - "original_num_samples": 195200, - "transcript": "it is the expression of life under cruder and more rigid conditions than ours lived by people who loved and hated more naively aged sooner and died younger than we do" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.445, - "num_samples": 119120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.445, - "original_num_samples": 119120, - "transcript": "we range wider last longer and escape more and more from intensity towards understanding" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.53, - "num_samples": 328480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0004.wav", - "speed": 1 - } - ], - "original_duration": 20.53, - "original_num_samples": 328480, - "transcript": "and already this astounding blow begins to take its place among other events as a thing strange and terrible indeed but related to all the strangeness and mystery of life part of the universal mysteries of despair and futility and death that have troubled my consciousness since childhood" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.61, - "num_samples": 137760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.61, - "original_num_samples": 137760, - "transcript": "for a time the death of mary obscured her life for me but now her living presence is more in my mind again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.03, - "num_samples": 112480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.03, - "original_num_samples": 112480, - "transcript": "it was that idea of waste that dominated my mind in a strange interview i had with justin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.73, - "num_samples": 155680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.73, - "original_num_samples": 155680, - "transcript": "i became grotesquely anxious to assure him that indeed she and i had been as they say innocent throughout our last day together" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.52, - "num_samples": 72320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.52, - "original_num_samples": 72320, - "transcript": "you were wrong in all that i said she kept her faith with you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.1, - "num_samples": 49600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.1, - "original_num_samples": 49600, - "transcript": "we never planned to meet and when we met" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.36, - "num_samples": 69760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.36, - "original_num_samples": 69760, - "transcript": "if we had been brother and sister indeed there was nothing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.355, - "num_samples": 53680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.355, - "original_num_samples": 53680, - "transcript": "but now it doesn't seem to matter very much" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.43, - "num_samples": 230880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0012.wav", - "speed": 1 - } - ], - "original_duration": 14.43, - "original_num_samples": 230880, - "transcript": "and it is upon this effect of sweet and beautiful possibilities caught in the net of animal jealousies and thoughtless motives and ancient rigid institutions that i would end this writing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.86, - "num_samples": 269760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0013.wav", - "speed": 1 - } - ], - "original_duration": 16.86, - "original_num_samples": 269760, - "transcript": "in mary it seems to me i found both womanhood and fellowship i found what many have dreamt of love and friendship freely given and i could do nothing but clutch at her to make her my possession" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.010063, - "num_samples": 48161, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.010063, - "original_num_samples": 48161, - "transcript": "what alternative was there for her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.73, - "num_samples": 219680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/84280/174-84280-0015.wav", - "speed": 1 - } - ], - "original_duration": 13.73, - "original_num_samples": 219680, - "transcript": "she was destroyed not merely by the unconsidered undisciplined passions of her husband and her lover but by the vast tradition that sustains and enforces the subjugation of her sex" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.53, - "num_samples": 72480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.53, - "original_num_samples": 72480, - "transcript": "he had never been father lover husband friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.65, - "num_samples": 74400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.65, - "original_num_samples": 74400, - "transcript": "the heart of that ex convict was full of virginity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.86, - "num_samples": 253760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0002.wav", - "speed": 1 - } - ], - "original_duration": 15.86, - "original_num_samples": 253760, - "transcript": "his sister and his sister's children had left him only a vague and far off memory which had finally almost completely vanished he had made every effort to find them and not having been able to find them he had forgotten them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.18, - "num_samples": 210880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0003.wav", - "speed": 1 - } - ], - "original_duration": 13.18, - "original_num_samples": 210880, - "transcript": "he suffered all the pangs of a mother and he knew not what it meant for that great and singular movement of a heart which begins to love is a very obscure and a very sweet thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.77, - "num_samples": 204320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0004.wav", - "speed": 1 - } - ], - "original_duration": 12.77, - "original_num_samples": 204320, - "transcript": "only as he was five and fifty and cosette eight years of age all that might have been love in the whole course of his life flowed together into a sort of ineffable light" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.93, - "num_samples": 126880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.93, - "original_num_samples": 126880, - "transcript": "cosette on her side had also unknown to herself become another being poor little thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.57, - "num_samples": 105120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.57, - "original_num_samples": 105120, - "transcript": "she felt that which she had never felt before a sensation of expansion" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.21, - "num_samples": 163360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.21, - "original_num_samples": 163360, - "transcript": "the man no longer produced on her the effect of being old or poor she thought jean valjean handsome just as she thought the hovel pretty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.235, - "num_samples": 163760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.235, - "original_num_samples": 163760, - "transcript": "nature a difference of fifty years had set a profound gulf between jean valjean and cosette destiny filled in this gulf" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "to meet was to find each other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.06, - "num_samples": 160960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.06, - "original_num_samples": 160960, - "transcript": "when these two souls perceived each other they recognized each other as necessary to each other and embraced each other closely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.645, - "num_samples": 74320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.645, - "original_num_samples": 74320, - "transcript": "moreover jean valjean had chosen his refuge well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.36, - "num_samples": 133760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.36, - "original_num_samples": 133760, - "transcript": "he had paid her six months in advance and had commissioned the old woman to furnish the chamber and dressing room as we have seen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.87, - "num_samples": 93920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.87, - "original_num_samples": 93920, - "transcript": "week followed week these two beings led a happy life in that hovel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.825062, - "num_samples": 77201, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.825062, - "original_num_samples": 77201, - "transcript": "cosette was no longer in rags she was in mourning" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.19, - "num_samples": 67040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.19, - "original_num_samples": 67040, - "transcript": "and then he talked of her mother and he made her pray" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.06, - "num_samples": 112960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0016.wav", - "speed": 1 - } - ], - "original_duration": 7.06, - "original_num_samples": 112960, - "transcript": "he passed hours in watching her dressing and undressing her doll and in listening to her prattle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.57, - "num_samples": 73120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.57, - "original_num_samples": 73120, - "transcript": "the best of us are not exempt from egotistical thoughts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 27.11, - "num_samples": 433760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0018.wav", - "speed": 1 - } - ], - "original_duration": 27.11, - "original_num_samples": 433760, - "transcript": "he had returned to prison this time for having done right he had quaffed fresh bitterness disgust and lassitude were overpowering him even the memory of the bishop probably suffered a temporary eclipse though sure to reappear later on luminous and triumphant but after all that sacred memory was growing dim" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.0, - "num_samples": 128000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0019.wav", - "speed": 1 - } - ], - "original_duration": 8.0, - "original_num_samples": 128000, - "transcript": "who knows whether jean valjean had not been on the eve of growing discouraged and of falling once more" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.62, - "num_samples": 73920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.62, - "original_num_samples": 73920, - "transcript": "alas he walked with no less indecision than cosette" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.17, - "num_samples": 66720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.17, - "original_num_samples": 66720, - "transcript": "he protected her and she strengthened him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/174/168635/174-168635-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "he was that child's stay and she was his prop" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.7, - "num_samples": 187200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.7, - "original_num_samples": 187200, - "transcript": "once there was a father who thought he would build for his children a beautiful home putting into it every thing they could need or desire throughout their lives" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.495, - "num_samples": 151920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.495, - "original_num_samples": 151920, - "transcript": "the sweetest perfumes floated through the air while thousands of birds answered the music of fountains with their songs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.91, - "num_samples": 78560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.91, - "original_num_samples": 78560, - "transcript": "why did he give that so odd a shape or so strange a covering" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.14, - "num_samples": 162240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0003.wav", - "speed": 1 - } - ], - "original_duration": 10.14, - "original_num_samples": 162240, - "transcript": "and so through many questions and many experiments they learn at last how to use the contents of this one storehouse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.225, - "num_samples": 179600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0004.wav", - "speed": 1 - } - ], - "original_duration": 11.225, - "original_num_samples": 179600, - "transcript": "the entrance is light because it opens so wide but we can see that the floor slopes downward and the way looks dark and narrow before us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.795, - "num_samples": 268720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0005.wav", - "speed": 1 - } - ], - "original_duration": 16.795, - "original_num_samples": 268720, - "transcript": "walk down the sloping foot path now and be careful to keep out of the way of the little cars that are coming and going on each side of you loaded on one side and empty on the other and seeming to run up and down by themselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.47, - "num_samples": 135520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.47, - "original_num_samples": 135520, - "transcript": "look a little closer while our guide lets the light of his lamp fall upon the black wall at your side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.715, - "num_samples": 203440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0007.wav", - "speed": 1 - } - ], - "original_duration": 12.715, - "original_num_samples": 203440, - "transcript": "see beneath your feet is the marking of great tree trunks lying aslant across the floor and the forms of gigantic palm leaves strewed among them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.14, - "num_samples": 226240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0008.wav", - "speed": 1 - } - ], - "original_duration": 14.14, - "original_num_samples": 226240, - "transcript": "here is something different rounded like a nut shell you can split off one side and behold there is the nut lying snugly as does any chestnut in its bur" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.695, - "num_samples": 395120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0009.wav", - "speed": 1 - } - ], - "original_duration": 24.695, - "original_num_samples": 395120, - "transcript": "ferns and palms mosses and trees and animals all perfect all beautiful and yet all hidden away under this hill and turned into shining black coal now i can very well remember when i first saw a coal fire and how odd it looked to see what seemed to be burning stones" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.815, - "num_samples": 221040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0010.wav", - "speed": 1 - } - ], - "original_duration": 13.815, - "original_num_samples": 221040, - "transcript": "for when i was a little girl we always had logs of wood blazing in an open fireplace and so did many other people and coal was just coming into use for fuel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.495, - "num_samples": 87920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.495, - "original_num_samples": 87920, - "transcript": "what should we have done if everybody had kept on burning wood to this day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.785, - "num_samples": 252560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0012.wav", - "speed": 1 - } - ], - "original_duration": 15.785, - "original_num_samples": 252560, - "transcript": "these forests were of trees different in some ways from those we have now great ferns as tall as this house and mosses as high as little trees and palm leaves of enormous size" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.51, - "num_samples": 184160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0013.wav", - "speed": 1 - } - ], - "original_duration": 11.51, - "original_num_samples": 184160, - "transcript": "then the hills were piled up on top of it all but here and there some edge of a coal bed was tilted up and appeared above the ground" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.6, - "num_samples": 217600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0014.wav", - "speed": 1 - } - ], - "original_duration": 13.6, - "original_num_samples": 217600, - "transcript": "but by and by the wise men thought about it and said to themselves we must find out what useful purpose god made the gas for we know that he does not make any thing for harm only" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.115, - "num_samples": 209840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0015.wav", - "speed": 1 - } - ], - "original_duration": 13.115, - "original_num_samples": 209840, - "transcript": "it was only a trouble to the gas makers who had no use for it and even threw it away until some one more thoughtful than the others found out that water would not pass through it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.785, - "num_samples": 92560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.785, - "original_num_samples": 92560, - "transcript": "don't you see how many uses we have found for this refuse coal tar" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.01, - "num_samples": 128160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/161169/2803-161169-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.01, - "original_num_samples": 128160, - "transcript": "when your hands or lips are cracked and rough from the cold does your mother ever put on glycerin to heal them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.48, - "num_samples": 183680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.48, - "original_num_samples": 183680, - "transcript": "fortunately will halley was not a man in a hurry and did not use a press of canvas or his masts would inevitably have come down" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.56, - "num_samples": 248960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0001.wav", - "speed": 1 - } - ], - "original_duration": 15.56, - "original_num_samples": 248960, - "transcript": "john mangles therefore hoped that the wretched hull would reach port without accident but it grieved him that his companions should have to suffer so much discomfort from the defective arrangements of the brig" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.53, - "num_samples": 56480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.53, - "original_num_samples": 56480, - "transcript": "their friends did their best to amuse them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.475, - "num_samples": 103600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.475, - "original_num_samples": 103600, - "transcript": "their minds were so distracted at this change of route as to be quite unhinged" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.005, - "num_samples": 192080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0004.wav", - "speed": 1 - } - ], - "original_duration": 12.005, - "original_num_samples": 192080, - "transcript": "much as they had been interested in his dissertation on the pampas or australia his lectures on new zealand fell on cold and indifferent ears" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.77, - "num_samples": 76320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.77, - "original_num_samples": 76320, - "transcript": "his eyes wandered ceaselessly over the blank horizon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.18, - "num_samples": 34880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.18, - "original_num_samples": 34880, - "transcript": "what then my lord" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.51, - "num_samples": 72160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.51, - "original_num_samples": 72160, - "transcript": "god keep us from such a meeting why john" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.59, - "num_samples": 57440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.59, - "original_num_samples": 57440, - "transcript": "we could not even fly fly john" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "yes my lord we should try in vain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.01, - "num_samples": 64160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.01, - "original_num_samples": 64160, - "transcript": "we would fight to the death of course but after that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.28, - "num_samples": 68480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.28, - "original_num_samples": 68480, - "transcript": "think of lady glenarvan think of mary grant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.81, - "num_samples": 156960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.81, - "original_num_samples": 156960, - "transcript": "will halley is a brute but i am keeping my eyes open and if the coast looks dangerous i will put the ship's head to sea again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.365, - "num_samples": 69840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.365, - "original_num_samples": 69840, - "transcript": "so that on that score there is little or no danger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.29, - "num_samples": 84640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154320/2803-154320-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.29, - "original_num_samples": 84640, - "transcript": "but as to getting alongside the duncan god forbid" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.93, - "num_samples": 126880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0000.wav", - "speed": 1 - } - ], - "original_duration": 7.93, - "original_num_samples": 126880, - "transcript": "they were not to leave it again till the tops of the wahiti ranges were lit with the first fires of day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.825, - "num_samples": 61200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.825, - "original_num_samples": 61200, - "transcript": "they had one night in which to prepare for death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.06, - "num_samples": 32960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.06, - "original_num_samples": 32960, - "transcript": "the meal ended" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.8, - "num_samples": 204800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0003.wav", - "speed": 1 - } - ], - "original_duration": 12.8, - "original_num_samples": 204800, - "transcript": "sleep which keeps all sorrow in abeyance soon weighed down their eyelids they slept in each other's arms overcome by exhaustion and prolonged watching" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.19, - "num_samples": 179040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0004.wav", - "speed": 1 - } - ], - "original_duration": 11.19, - "original_num_samples": 179040, - "transcript": "if it is decreed that we die to morrow let us die bravely like christian men ready to appear without terror before the supreme judge" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.1, - "num_samples": 97600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.1, - "original_num_samples": 97600, - "transcript": "god who reads our hearts knows that we had a noble end in view" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.005, - "num_samples": 64080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.005, - "original_num_samples": 64080, - "transcript": "glenarvan's voice firm till now faltered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.77, - "num_samples": 108320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.77, - "original_num_samples": 108320, - "transcript": "john you have promised mary what i promised lady helena what is your plan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.37, - "num_samples": 117920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0008.wav", - "speed": 1 - } - ], - "original_duration": 7.37, - "original_num_samples": 117920, - "transcript": "i believe said john that in the sight of god i have a right to fulfill that promise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.3, - "num_samples": 132800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.3, - "original_num_samples": 132800, - "transcript": "my lord whichever of us survives the other will fulfill the wish of lady helena and mary grant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.34, - "num_samples": 101440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.34, - "original_num_samples": 101440, - "transcript": "at last the major said my friends keep that to the last moment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.595, - "num_samples": 105520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.595, - "original_num_samples": 105520, - "transcript": "the jailer may forget that he is on guard the prisoner never forgets that he is guarded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.57, - "num_samples": 153120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.57, - "original_num_samples": 153120, - "transcript": "on that side descent was impossible and had it been possible the bottom was shut in by the enormous rock" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.975, - "num_samples": 63600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.975, - "original_num_samples": 63600, - "transcript": "listen said he motioning them to stoop" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.925, - "num_samples": 78800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.925, - "original_num_samples": 78800, - "transcript": "animal or man answered the major i will soon find out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.549937, - "num_samples": 328799, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0015.wav", - "speed": 1 - } - ], - "original_duration": 20.549937, - "original_num_samples": 328799, - "transcript": "wilson and olbinett joined their companions and all united to dig through the wall john with his dagger the others with stones taken from the ground or with their nails while mulrady stretched along the ground watched the native guard through a crevice of the matting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.18, - "num_samples": 34880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.18, - "original_num_samples": 34880, - "transcript": "what could be the object" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.435, - "num_samples": 118960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.435, - "original_num_samples": 118960, - "transcript": "did they know of the existence of the prisoners or was it some private enterprise that led to the undertaking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.38, - "num_samples": 246080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0018.wav", - "speed": 1 - } - ], - "original_duration": 15.38, - "original_num_samples": 246080, - "transcript": "their fingers bled but still they worked on after half an hour they had gone three feet deep they perceived by the increased sharpness of the sounds that only a thin layer of earth prevented immediate communication" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.18, - "num_samples": 162880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0019.wav", - "speed": 1 - } - ], - "original_duration": 10.18, - "original_num_samples": 162880, - "transcript": "john mangles inserting the blade of his poniard avoided the knife which now protruded above the soil but seized the hand that wielded it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.92, - "num_samples": 222720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0020.wav", - "speed": 1 - } - ], - "original_duration": 13.92, - "original_num_samples": 222720, - "transcript": "but softly as the name was breathed mary grant already awakened by the sounds in the hut slipped over toward glenarvan and seizing the hand all stained with earth she covered it with kisses" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.05, - "num_samples": 48800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.05, - "original_num_samples": 48800, - "transcript": "watch the savages outside said robert" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.805, - "num_samples": 76880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.805, - "original_num_samples": 76880, - "transcript": "round his body was rolled a long coil of flax rope" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.59, - "num_samples": 105440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2803/154328/2803-154328-0023.wav", - "speed": 1 - } - ], - "original_duration": 6.59, - "original_num_samples": 105440, - "transcript": "my child my child murmured lady helena the savages did not kill you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.155, - "num_samples": 50480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.155, - "original_num_samples": 50480, - "transcript": "ferdinand meditates over his good fortune" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.07, - "num_samples": 129120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.07, - "original_num_samples": 129120, - "transcript": "in moments of deep feeling alike sudden bursts of prosperity as in darker hours man must be alone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.11, - "num_samples": 145760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.11, - "original_num_samples": 145760, - "transcript": "it requires some self communion to prepare ourselves for good fortune as well as to encounter difficulty and danger and disgrace" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 28.945, - "num_samples": 463120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0003.wav", - "speed": 1 - } - ], - "original_duration": 28.945, - "original_num_samples": 463120, - "transcript": "this violent and triumphant revolution in his prospects and his fortunes was hardly yet completely comprehended by our friend ferdinand armine and when he had left a note for the generous mirabel whose slumbers he would not disturb at this early hour even with good news he strolled along up charles street and to the park in one of those wild and joyous reveries in which we brood over coming bliss and create a thousand glorious consequences" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.865, - "num_samples": 61840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.865, - "original_num_samples": 61840, - "transcript": "ferdinand felt his freedom as well as his happiness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.14, - "num_samples": 34240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.14, - "original_num_samples": 34240, - "transcript": "it was indeed her handwriting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.075, - "num_samples": 161200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.075, - "original_num_samples": 161200, - "transcript": "restless with impending joy he sauntered to the bridge and leant over the balustrade gazing on the waters in charmed and charming vacancy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.275, - "num_samples": 164400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.275, - "original_num_samples": 164400, - "transcript": "how many incidents how many characters how many feelings flitted over his memory of what sweet and bitter experience did he not chew the cud" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.61, - "num_samples": 169760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.61, - "original_num_samples": 169760, - "transcript": "four and twenty hours ago and he deemed himself the most miserable and forlorn of human beings and now all the blessings of the world seemed showered at his feet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.345, - "num_samples": 117520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.345, - "original_num_samples": 117520, - "transcript": "the most gifted individuals in the land emulated each other in proving which entertained for him the most sincere affection" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.04, - "num_samples": 224640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0010.wav", - "speed": 1 - } - ], - "original_duration": 14.04, - "original_num_samples": 224640, - "transcript": "he could not flatter himself that he indeed merited such singular blessings and yet with all his faults which with him were but the consequences of his fiery youth ferdinand had been faithful to henrietta" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.96, - "num_samples": 47360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.96, - "original_num_samples": 47360, - "transcript": "his constancy to her was now rewarded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.38, - "num_samples": 70080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.38, - "original_num_samples": 70080, - "transcript": "as for his friends the future must prove his gratitude to them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.58, - "num_samples": 41280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.58, - "original_num_samples": 41280, - "transcript": "it was indeed dancing on a volcano" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.475, - "num_samples": 39600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.475, - "original_num_samples": 39600, - "transcript": "and now all had ended so happily" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.675, - "num_samples": 122800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.675, - "original_num_samples": 122800, - "transcript": "was it not all a dream of his own creation while his eye had been fixed in abstraction on that bright and flowing river" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.59, - "num_samples": 57440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.59, - "original_num_samples": 57440, - "transcript": "he might be enchanted but that was the talisman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.97, - "num_samples": 95520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.97, - "original_num_samples": 95520, - "transcript": "in the present unsettled though hopeful state of affairs ferdinand would not go home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 28.4, - "num_samples": 454400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0018.wav", - "speed": 1 - } - ], - "original_duration": 28.4, - "original_num_samples": 454400, - "transcript": "in exactly ten minutes it is in the power of every man to free himself from all the tumult of the world the pangs of love the throbs of ambition the wear and tear of play the recriminating boudoir the conspiring club the rattling hell and find himself in a sublime sylvan solitude superior to the cedars of lebanon and inferior only in extent to the chestnut forests of anatolia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.075, - "num_samples": 49200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/73752/7850-73752-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.075, - "original_num_samples": 49200, - "transcript": "is papa alone enquired miss temple" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.56, - "num_samples": 120960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0000.wav", - "speed": 1 - } - ], - "original_duration": 7.56, - "original_num_samples": 120960, - "transcript": "through the influence of hon thomas l hamer he was admitted at west point in eighteen thirty nine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.65, - "num_samples": 106400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.65, - "original_num_samples": 106400, - "transcript": "at this time grant was not taken with war and probably evinced little interest in army tactics" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.32, - "num_samples": 133120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.32, - "original_num_samples": 133120, - "transcript": "grant acted as mustering officer until being commissioned colonel of the twenty first illinois volunteers he took the field" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.425, - "num_samples": 54800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.425, - "original_num_samples": 54800, - "transcript": "general halleck in speaking of this battle said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.005062, - "num_samples": 176081, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0004.wav", - "speed": 1 - } - ], - "original_duration": 11.005062, - "original_num_samples": 176081, - "transcript": "indeed if ever a general deserved honor grant had won it he had opened the mississippi to navigation and had captured nearly one hundred thousand prisoners and arms" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.295, - "num_samples": 52720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.295, - "original_num_samples": 52720, - "transcript": "he was now commander of all the federal forces" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.83, - "num_samples": 61280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.83, - "original_num_samples": 61280, - "transcript": "the capture of lee was a far more difficult undertaking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.975, - "num_samples": 175600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.975, - "original_num_samples": 175600, - "transcript": "time wore away and on the ninth of april eighteen sixty five grant captured the confederate army under lee thus virtually ending the war" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.915, - "num_samples": 110640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.915, - "original_num_samples": 110640, - "transcript": "when his public services were finished he started in company with his wife son jesse and a few friends" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.3, - "num_samples": 132800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/111771/7850-111771-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.3, - "original_num_samples": 132800, - "transcript": "his success seems to have been the outgrowth of hard study and ability to perform the most exhaustive labor without fatigue" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.455, - "num_samples": 135280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0000.wav", - "speed": 1 - } - ], - "original_duration": 8.455, - "original_num_samples": 135280, - "transcript": "a person would think that after a family had lived so long in a place all the neighbors would be fond of them yet it is not so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.85, - "num_samples": 45600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.85, - "original_num_samples": 45600, - "transcript": "it is disgraceful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.71, - "num_samples": 91360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.71, - "original_num_samples": 91360, - "transcript": "they thought the trouble came from bad bringing up or no bringing up at all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.22, - "num_samples": 99520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.22, - "original_num_samples": 99520, - "transcript": "they always ate plain food and plenty of it and they never ate between meals" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.02, - "num_samples": 128320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0004.wav", - "speed": 1 - } - ], - "original_duration": 8.02, - "original_num_samples": 128320, - "transcript": "you would think that with six legs apiece and three joints in each leg they might walk quite fast yet they never did" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.7, - "num_samples": 123200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.7, - "original_num_samples": 123200, - "transcript": "they did not breathe it into their mouths or through gills but took it in through some openings in the back part of their bodies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.3, - "num_samples": 52800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.3, - "original_num_samples": 52800, - "transcript": "both lips asked the larvae" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.455, - "num_samples": 55280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.455, - "original_num_samples": 55280, - "transcript": "well our lower lips anyway answered the nymph" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.75, - "num_samples": 44000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.75, - "original_num_samples": 44000, - "transcript": "our upper lips are so small they don't matter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.19, - "num_samples": 115040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.19, - "original_num_samples": 115040, - "transcript": "they knew that whenever they stuck out their lower lips at the small fishes and bugs they swam away as fast as they could" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.855, - "num_samples": 205680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0010.wav", - "speed": 1 - } - ], - "original_duration": 12.855, - "original_num_samples": 205680, - "transcript": "indeed the lower lip of a dragon fly child might well frighten people for it is fastened on a long jointed arm like thing and has pincers on it with which it catches and holds its food" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.15, - "num_samples": 146400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.15, - "original_num_samples": 146400, - "transcript": "but sometimes he straightens the joint and holds his lip out before him and then its pincers catch hold of things he does this when he is hungry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.02, - "num_samples": 64320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.02, - "original_num_samples": 64320, - "transcript": "scared dah who's afraid answered he" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.345, - "num_samples": 37520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.345, - "original_num_samples": 37520, - "transcript": "here comes the snapping turtle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.78, - "num_samples": 124480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.78, - "original_num_samples": 124480, - "transcript": "sure enough there he came through the shallow water his wet back shell partly out of it and shining in the sunlight" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.53, - "num_samples": 56480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.53, - "original_num_samples": 56480, - "transcript": "they thought he might be going to take a nap after his dinner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.145, - "num_samples": 178320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0016.wav", - "speed": 1 - } - ], - "original_duration": 11.145, - "original_num_samples": 178320, - "transcript": "he began to draw in his legs very very slowly and just as his great hard lower shell touched the mud the last larva crawled out under his tail" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.745, - "num_samples": 43920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/286674/7850-286674-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.745, - "original_num_samples": 43920, - "transcript": "the nymphs had already gotten away" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.175, - "num_samples": 66800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.175, - "original_num_samples": 66800, - "transcript": "some are wonderfully wrought pretty little homes for birdikins" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "indeed it is not a nest at all only the beginning of one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "and there is an old story about this which i shall tell you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.205, - "num_samples": 51280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.205, - "original_num_samples": 51280, - "transcript": "oh what shall we do for a home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.345, - "num_samples": 133520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0004.wav", - "speed": 1 - } - ], - "original_duration": 8.345, - "original_num_samples": 133520, - "transcript": "and the poor silly things ruffled up their feathers and looked miserable as only a little bird can look when it is unhappy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "she was indeed a clever bird" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.54, - "num_samples": 120640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.54, - "original_num_samples": 120640, - "transcript": "she popped into her new house and sat there comfortably peering out through the window slits with her sharp little eyes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.31, - "num_samples": 68960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.31, - "original_num_samples": 68960, - "transcript": "and she saw the other birds hopping about and twittering helplessly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.11, - "num_samples": 97760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.11, - "original_num_samples": 97760, - "transcript": "then all the other birds chirped eagerly yes yes let us ask her to teach us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.705, - "num_samples": 139280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.705, - "original_num_samples": 139280, - "transcript": "so in a great company they came fluttering hopping twittering up to the elm tree where mother magpie nestled comfortably in her new house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.06, - "num_samples": 192960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0010.wav", - "speed": 1 - } - ], - "original_duration": 12.06, - "original_num_samples": 192960, - "transcript": "o wise mother magpie dear mother magpie they cried teach us how to build our nests like yours for it is growing night and we are tired and sleepy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.535, - "num_samples": 120560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.535, - "original_num_samples": 120560, - "transcript": "the magpie said she would teach them if they would be a patient diligent obedient class of little birds" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.275, - "num_samples": 68400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.275, - "original_num_samples": 68400, - "transcript": "and where each bird perched there it was to build its nest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.04, - "num_samples": 96640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.04, - "original_num_samples": 96640, - "transcript": "she began to show them how to weave the bits of things together into nests as they should be made" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.965, - "num_samples": 127440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.965, - "original_num_samples": 127440, - "transcript": "and some of the birds who were attentive and careful soon saw how it was done and started nice homes for themselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.88, - "num_samples": 46080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.88, - "original_num_samples": 46080, - "transcript": "i thought that was the way to begin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.305, - "num_samples": 52880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.305, - "original_num_samples": 52880, - "transcript": "certainly of course screamed the jackdaw" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.71, - "num_samples": 139360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.71, - "original_num_samples": 139360, - "transcript": "here wood pigeon said mother magpie you must place those sticks through and across criss cross criss cross so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.72, - "num_samples": 75520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.72, - "original_num_samples": 75520, - "transcript": "criss cross criss cross so interrupted the wood pigeon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.485, - "num_samples": 87760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.485, - "original_num_samples": 87760, - "transcript": "you say you know all about it then go on and finish your nests by yourselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.174937, - "num_samples": 34799, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.174937, - "original_num_samples": 34799, - "transcript": "much luck may you have" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.865062, - "num_samples": 125841, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0021.wav", - "speed": 1 - } - ], - "original_duration": 7.865062, - "original_num_samples": 125841, - "transcript": "and away she flew to her own cosy nest in the elm tree where she was soon fast asleep forgetting all about the matter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.85, - "num_samples": 61600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.85, - "original_num_samples": 61600, - "transcript": "but the wood pigeon was in the worst case of them all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.21, - "num_samples": 83360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/7850/281318/7850-281318-0023.wav", - "speed": 1 - } - ], - "original_duration": 5.21, - "original_num_samples": 83360, - "transcript": "for she had only the foundation laid criss cross as the magpie had shown her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.8, - "num_samples": 76800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.8, - "original_num_samples": 76800, - "transcript": "one who writes of such an era labours under a troublesome disadvantage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.07, - "num_samples": 369120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0001.wav", - "speed": 1 - } - ], - "original_duration": 23.07, - "original_num_samples": 369120, - "transcript": "in the present case that disadvantage is doubled for while the sins of the church however heinous were still such as admit of being expressed in words the sins of the heathen world against which she fought were utterly indescribable and the christian apologist is thus compelled for the sake of decency to state the church's case far more weakly than the facts deserve" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.875, - "num_samples": 190000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0002.wav", - "speed": 1 - } - ], - "original_duration": 11.875, - "original_num_samples": 190000, - "transcript": "not be it ever remembered that the slightest suspicion of immorality attaches either to the heroine of this book or to the leading philosophers of her school for several centuries" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.8, - "num_samples": 236800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0003.wav", - "speed": 1 - } - ], - "original_duration": 14.8, - "original_num_samples": 236800, - "transcript": "that divine word who is the light who lighteth every man which cometh into the world had awakened in the heart of mankind a moral craving never before felt in any strength except by a few isolated philosophers or prophets" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.8, - "num_samples": 60800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.8, - "original_num_samples": 60800, - "transcript": "the very emperors had arrayed themselves on her side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.31, - "num_samples": 516960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0005.wav", - "speed": 1 - } - ], - "original_duration": 32.31, - "original_num_samples": 516960, - "transcript": "julian's last attempt to restore paganism by imperial influence had only proved that the old faith had lost all hold upon the hearts of the masses at his death the great tide wave of new opinion rolled on unchecked and the rulers of earth were fain to swim with the stream to accept in words at least the church's laws as theirs to acknowledge a king of kings to whom even they owed homage and obedience and to call their own slaves their poorer brethren and often too their spiritual superiors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.0, - "num_samples": 64000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.0, - "original_num_samples": 64000, - "transcript": "but if the emperors had become christian the empire had not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 31.64, - "num_samples": 506240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0007.wav", - "speed": 1 - } - ], - "original_duration": 31.64, - "original_num_samples": 506240, - "transcript": "in the meanwhile the minds of men cut adrift from their ancient moorings wandered wildly over pathless seas of speculative doubt and especially in the more metaphysical and contemplative east attempted to solve for themselves the questions of man's relation to the unseen by those thousand schisms heresies and theosophies it is a disgrace to the word philosophy to call them by it on the records of which the student now gazes bewildered unable alike to count or to explain their fantasies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.08, - "num_samples": 177280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0008.wav", - "speed": 1 - } - ], - "original_duration": 11.08, - "original_num_samples": 177280, - "transcript": "they brought before the minds of churchmen a thousand new questions which must be solved unless the church was to relinquish for ever her claims as the great teacher and satisfier of the human soul" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.97, - "num_samples": 191520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0009.wav", - "speed": 1 - } - ], - "original_duration": 11.97, - "original_num_samples": 191520, - "transcript": "but the health of a church depends not merely on the creed which it professes not even on the wisdom and holiness of a few great ecclesiastics but on the faith and virtue of its individual members" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.17, - "num_samples": 66720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.17, - "original_num_samples": 66720, - "transcript": "the mens sana must have a corpus sanum to inhabit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.325, - "num_samples": 69200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0011.wav", - "speed": 1 - } - ], - "original_duration": 4.325, - "original_num_samples": 69200, - "transcript": "and the new blood at the era of this story was at hand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.42, - "num_samples": 102720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.42, - "original_num_samples": 102720, - "transcript": "tribe after tribe was crowding down to the alps and trampling upon each other on the frontiers of the empire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.77, - "num_samples": 284320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0013.wav", - "speed": 1 - } - ], - "original_duration": 17.77, - "original_num_samples": 284320, - "transcript": "the huns singly their inferiors pressed them from behind with the irresistible weight of numbers italy with her rich cities and fertile lowlands beckoned them on to plunder as auxiliaries they had learned their own strength and roman weakness a casus belli was soon found" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.41, - "num_samples": 326560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0014.wav", - "speed": 1 - } - ], - "original_duration": 20.41, - "original_num_samples": 326560, - "transcript": "how iniquitous was the conduct of the sons of theodosius in refusing the usual bounty by which the goths were bribed not to attack the empire the whole pent up deluge burst over the plains of italy and the western empire became from that day forth a dying idiot while the new invaders divided europe among themselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.485, - "num_samples": 519760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0015.wav", - "speed": 1 - } - ], - "original_duration": 32.485, - "original_num_samples": 519760, - "transcript": "the countless treasures which five centuries of rapine had accumulated round the capitol had become the prey of men clothed in sheepskins and horse hide and the sister of an emperor had found her beauty virtue and pride of race worthily matched by those of the hard handed northern hero who led her away from italy as his captive and his bride to found new kingdoms in south france and spain and to drive the newly arrived vandals across the straits of gibraltar into the then blooming coast land of northern africa" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.02, - "num_samples": 192320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0016.wav", - "speed": 1 - } - ], - "original_duration": 12.02, - "original_num_samples": 192320, - "transcript": "that extraordinary reform in morals which according to salvian and his contemporaries the vandal conquerors worked in north africa availed them nothing they lost more than they gave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.76, - "num_samples": 380160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0017.wav", - "speed": 1 - } - ], - "original_duration": 23.76, - "original_num_samples": 380160, - "transcript": "climate bad example and the luxury of power degraded them in one century into a race of helpless and debauched slave holders doomed to utter extermination before the semi gothic armies of belisarius and with them vanished the last chance that the gothic races would exercise on the eastern world the same stern yet wholesome discipline under which the western had been restored to life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.05, - "num_samples": 512800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0018.wav", - "speed": 1 - } - ], - "original_duration": 32.05, - "original_num_samples": 512800, - "transcript": "that wonderful metaphysic subtlety which in phrases and definitions too often unmeaning to our grosser intellect saw the symbols of the most important spiritual realities and felt that on the distinction between homoousios and homoiousios might hang the solution of the whole problem of humanity was set to battle in alexandria the ancient stronghold of greek philosophy with the effete remains of the very scientific thought to which it owed its extraordinary culture" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.23, - "num_samples": 227680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0019.wav", - "speed": 1 - } - ], - "original_duration": 14.23, - "original_num_samples": 227680, - "transcript": "to synesius's most charming letters as well as to those of isidore the good abbot of pelusium i beg leave to refer those readers who wish for further information about the private life of the fifth century" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.425, - "num_samples": 86800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9006/2902-9006-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.425, - "original_num_samples": 86800, - "transcript": "i cannot hope that these pages will be altogether free from anachronisms and errors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.72, - "num_samples": 187520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.72, - "original_num_samples": 187520, - "transcript": "the place seemed fragrant with all the riches of greek thought and song since the days when ptolemy philadelphus walked there with euclid and theocritus callimachus and lycophron" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.915, - "num_samples": 270640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0001.wav", - "speed": 1 - } - ], - "original_duration": 16.915, - "original_num_samples": 270640, - "transcript": "the room had neither carpet nor fireplace and the only movables in it were a sofa bed a table and an arm chair all of such delicate and graceful forms as may be seen on ancient vases of a far earlier period than that whereof we write" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.395, - "num_samples": 390320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0002.wav", - "speed": 1 - } - ], - "original_duration": 24.395, - "original_num_samples": 390320, - "transcript": "but most probably had any of us entered that room that morning we should not have been able to spare a look either for the furniture or the general effect or the museum gardens or the sparkling mediterranean beyond but we should have agreed that the room was quite rich enough for human eyes for the sake of one treasure which it possessed and beside which nothing was worth a moment's glance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.475, - "num_samples": 231600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0003.wav", - "speed": 1 - } - ], - "original_duration": 14.475, - "original_num_samples": 231600, - "transcript": "she has lifted her eyes off her manuscript she is looking out with kindling countenance over the gardens of the museum her ripe curling greek lips such as we never see now even among her own wives and sisters open" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.63, - "num_samples": 90080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.63, - "original_num_samples": 90080, - "transcript": "if they have ceased to guide nations they have not ceased to speak to their own elect" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.21, - "num_samples": 83360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.21, - "original_num_samples": 83360, - "transcript": "if they have cast off the vulgar herd they have not cast off hypatia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.345, - "num_samples": 293520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0006.wav", - "speed": 1 - } - ], - "original_duration": 18.345, - "original_num_samples": 293520, - "transcript": "to be welcomed into the celestial ranks of the heroic to rise to the immortal gods to the ineffable powers onward upward ever through ages and through eternities till i find my home at last and vanish in the glory of the nameless and the absolute one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.97, - "num_samples": 95520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.97, - "original_num_samples": 95520, - "transcript": "i to believe against the authority of porphyry himself too in evil eyes and magic" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.155, - "num_samples": 34480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.155, - "original_num_samples": 34480, - "transcript": "what do i care for food" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.415, - "num_samples": 86640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.415, - "original_num_samples": 86640, - "transcript": "how can he whose sphere lies above the stars stoop every moment to earth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.02, - "num_samples": 128320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.02, - "original_num_samples": 128320, - "transcript": "ay she answered half bitterly and would that we could live without food and imitate perfectly the immortal gods" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.91, - "num_samples": 110560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.91, - "original_num_samples": 110560, - "transcript": "there is fruit with lentils and rice waiting for you in the next room and bread unless you despise it too much" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.825, - "num_samples": 109200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.825, - "original_num_samples": 109200, - "transcript": "strange that men should be content to grovel and be men when they might rise to the rank of gods" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.775, - "num_samples": 188400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0013.wav", - "speed": 1 - } - ], - "original_duration": 11.775, - "original_num_samples": 188400, - "transcript": "not that such a creature as that disturbs me no created thing i hope can move my equanimity but if i could stoop to hate i should hate her hate her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.49, - "num_samples": 215840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0014.wav", - "speed": 1 - } - ], - "original_duration": 13.49, - "original_num_samples": 215840, - "transcript": "and her voice took a tone which made it somewhat uncertain whether in spite of all the lofty impassibility which she felt bound to possess she did not hate pelagia with a most human and mundane hatred" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.72, - "num_samples": 43520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.72, - "original_num_samples": 43520, - "transcript": "his excellency madam the prefect" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.365, - "num_samples": 53840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2902/9008/2902-9008-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.365, - "original_num_samples": 53840, - "transcript": "and why should that disturb me let him enter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.83, - "num_samples": 141280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0000.wav", - "speed": 1 - } - ], - "original_duration": 8.83, - "original_num_samples": 141280, - "transcript": "he had been a clerk in a banking house and was transported for embezzlement though by some grave doubts as to his guilt were entertained" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.08, - "num_samples": 177280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.08, - "original_num_samples": 177280, - "transcript": "when the muster bell rang and the gang broke up rufus dawes on his silent way to his separate cell observed a notable change of custom in the disposition of the new convict" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.21, - "num_samples": 115360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.21, - "original_num_samples": 115360, - "transcript": "i'm not to go in there says the ex bank clerk drawing back in dismay from the cloud of foul faces which lowered upon him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.25, - "num_samples": 100000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.25, - "original_num_samples": 100000, - "transcript": "what is he more than anybody else said the wretched man to himself as he hugged his misery close" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.01, - "num_samples": 224160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0004.wav", - "speed": 1 - } - ], - "original_duration": 14.01, - "original_num_samples": 224160, - "transcript": "about dawn the next morning mister north who amongst other vagaries not approved of by his bishop had a habit of prowling about the prison at unofficial hours was attracted by a dispute at the door of the dormitory" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.105, - "num_samples": 49680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.105, - "original_num_samples": 49680, - "transcript": "a prisoner refractory your reverence said the watchman" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.655, - "num_samples": 42480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.655, - "original_num_samples": 42480, - "transcript": "wants to come out mister north" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.0, - "num_samples": 160000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0007.wav", - "speed": 1 - } - ], - "original_duration": 10.0, - "original_num_samples": 160000, - "transcript": "kirkland ghastly pale bleeding with his woollen shirt torn and his blue eyes wide open with terror was clinging to the bars" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "and beat on the bars with white and sweating hands" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.595, - "num_samples": 57520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.595, - "original_num_samples": 57520, - "transcript": "i order you sir north cried indignant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.08, - "num_samples": 81280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0010.wav", - "speed": 1 - } - ], - "original_duration": 5.08, - "original_num_samples": 81280, - "transcript": "very sorry your reverence but your reverence knows that i daren't do such a thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.97, - "num_samples": 111520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.97, - "original_num_samples": 111520, - "transcript": "oh you ministers of christ wolves in sheep's clothing you shall be judged for this" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.4, - "num_samples": 54400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.4, - "original_num_samples": 54400, - "transcript": "let him out cried north again stamping his foot" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.535, - "num_samples": 88560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.535, - "original_num_samples": 88560, - "transcript": "there's more trouble with you bloody aristocrats than enough lie quiet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.99, - "num_samples": 47840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.99, - "original_num_samples": 47840, - "transcript": "you can guess what that unhappy boy has suffered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.96, - "num_samples": 47360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.96, - "original_num_samples": 47360, - "transcript": "impertinent young beggar said burgess" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.485, - "num_samples": 39760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.485, - "original_num_samples": 39760, - "transcript": "do him good curse him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.275, - "num_samples": 36400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.275, - "original_num_samples": 36400, - "transcript": "it's hard for such young uns" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.265, - "num_samples": 84240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.265, - "original_num_samples": 84240, - "transcript": "have you ever been in that that place i was in last night asked kirkland" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.495, - "num_samples": 39920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.495, - "original_num_samples": 39920, - "transcript": "what does he care care" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.875, - "num_samples": 78000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.875, - "original_num_samples": 78000, - "transcript": "if you fall we must fall over you and then you're done for" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.515, - "num_samples": 72240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.515, - "original_num_samples": 72240, - "transcript": "he had hardly uttered the words when the boy flung himself beneath the log" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.195, - "num_samples": 83120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0022.wav", - "speed": 1 - } - ], - "original_duration": 5.195, - "original_num_samples": 83120, - "transcript": "hold on to me miss nancy said the giant i'm big enough to carry double" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.055, - "num_samples": 48880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.055, - "original_num_samples": 48880, - "transcript": "but kirkland kept steadily on for the river" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.655, - "num_samples": 90480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0024.wav", - "speed": 1 - } - ], - "original_duration": 5.655, - "original_num_samples": 90480, - "transcript": "just as he reached it however the figure of mister north rose from behind a pile of stones" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.245, - "num_samples": 83920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.245, - "original_num_samples": 83920, - "transcript": "kirkland jumped for the jetty missed his footing and fell into the arms of the chaplain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.05, - "num_samples": 64800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.05, - "original_num_samples": 64800, - "transcript": "oh mister north says kirkland why did you stop me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.805, - "num_samples": 44880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.805, - "original_num_samples": 44880, - "transcript": "must stop that fifty lashes troke" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "that last fellow you had ought to have been tied up himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.835, - "num_samples": 61360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.835, - "original_num_samples": 61360, - "transcript": "i won't have my men knocked up with flogging these rascals" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.395, - "num_samples": 38320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4943/3752-4943-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.395, - "original_num_samples": 38320, - "transcript": "very good your honour says troke" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.335, - "num_samples": 53360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.335, - "original_num_samples": 53360, - "transcript": "captain frere says that the scenery is delightful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.415, - "num_samples": 70640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.415, - "original_num_samples": 70640, - "transcript": "abandoned indeed by god and man almost" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.225, - "num_samples": 51600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.225, - "original_num_samples": 51600, - "transcript": "in the valley of the shadow of death he is with us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.76, - "num_samples": 44160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.76, - "original_num_samples": 44160, - "transcript": "you have not been long in the colony mister meekin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.425, - "num_samples": 38800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.425, - "original_num_samples": 38800, - "transcript": "if you please said meekin gravely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.42, - "num_samples": 38720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.42, - "original_num_samples": 38720, - "transcript": "pray help yourself to wine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.885, - "num_samples": 62160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.885, - "original_num_samples": 62160, - "transcript": "have you many visitors captain burgess very few" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.8, - "num_samples": 44800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.8, - "original_num_samples": 44800, - "transcript": "i was quartered with him at sarah island" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.94, - "num_samples": 31040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0008.wav", - "speed": 1 - } - ], - "original_duration": 1.94, - "original_num_samples": 31040, - "transcript": "so he's a friend of yours eh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.61, - "num_samples": 41760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.61, - "original_num_samples": 41760, - "transcript": "i had the pleasure of meeting him in society" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.599937, - "num_samples": 57599, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.599937, - "original_num_samples": 57599, - "transcript": "he is just married you know is he said burgess" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.395062, - "num_samples": 54321, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.395062, - "original_num_samples": 54321, - "transcript": "the devil he is i heard something about it too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.545, - "num_samples": 88720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.545, - "original_num_samples": 88720, - "transcript": "yes one oughtn't to leave the colony without seeing it says burgess it's worth seeing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.945, - "num_samples": 95120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.945, - "original_num_samples": 95120, - "transcript": "in fact the ringleader john rex gave me his confession and i sent it to the bishop" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.305, - "num_samples": 36880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.305, - "original_num_samples": 36880, - "transcript": "a great rascal put in north" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.135, - "num_samples": 66160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.135, - "original_num_samples": 66160, - "transcript": "well now said meekin with asperity i don't agree with you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.89, - "num_samples": 174240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0016.wav", - "speed": 1 - } - ], - "original_duration": 10.89, - "original_num_samples": 174240, - "transcript": "he seems to me to be truly penitent for his offences a misguided but not a hypocritical man if my knowledge of human nature goes for anything i hope he is said north" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.045, - "num_samples": 112720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.045, - "original_num_samples": 112720, - "transcript": "there's no fear of him said burgess cheerily if he grows uproarious we'll soon give him a touch of the cat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.375, - "num_samples": 118000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.375, - "original_num_samples": 118000, - "transcript": "i suppose severity is necessary returned meekin though to my ears a flogging sounds a little distasteful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.93, - "num_samples": 46880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.93, - "original_num_samples": 46880, - "transcript": "i have these attacks at times" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.485, - "num_samples": 215760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0020.wav", - "speed": 1 - } - ], - "original_duration": 13.485, - "original_num_samples": 215760, - "transcript": "the reverend meekin eyed his clerical brother with horror the reverend meekin was not accustomed to clergymen who wore black neckties smoked clay pipes chewed tobacco and drank neat brandy out of tumblers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.27, - "num_samples": 116320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0021.wav", - "speed": 1 - } - ], - "original_duration": 7.27, - "original_num_samples": 116320, - "transcript": "so they went on to the verandah and looked down upon the lights of the prison and listened to the sea lapping the shore" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.4, - "num_samples": 262400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0022.wav", - "speed": 1 - } - ], - "original_duration": 16.4, - "original_num_samples": 262400, - "transcript": "by and by a short figure smoking a cheroot came up out of the dark and proved to be doctor macklewain who had been prevented from attending the dinner by reason of an accident to a constable at norfolk bay which had claimed his professional attention" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.5, - "num_samples": 40000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.5, - "original_num_samples": 40000, - "transcript": "dead said doctor macklewain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.03, - "num_samples": 32480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.03, - "original_num_samples": 32480, - "transcript": "delighted to see you mister meekin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.735, - "num_samples": 59760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.735, - "original_num_samples": 59760, - "transcript": "but macklewain was tired and wanted to get home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.55, - "num_samples": 40800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.55, - "original_num_samples": 40800, - "transcript": "our roads lie together doctor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.39, - "num_samples": 182240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0027.wav", - "speed": 1 - } - ], - "original_duration": 11.39, - "original_num_samples": 182240, - "transcript": "before the two clergymen had got half way down the steep path that led from the commandant's house to the flat on which the cottages of the doctor and chaplain were built macklewain rejoined them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.21, - "num_samples": 51360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.21, - "original_num_samples": 51360, - "transcript": "another flogging to morrow said he grumblingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.225, - "num_samples": 35600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0029.wav", - "speed": 1 - } - ], - "original_duration": 2.225, - "original_num_samples": 35600, - "transcript": "whom is he going to flog now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.205, - "num_samples": 51280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.205, - "original_num_samples": 51280, - "transcript": "you don't mean to say he's going to flog kirkland" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.65, - "num_samples": 138400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0031.wav", - "speed": 1 - } - ], - "original_duration": 8.65, - "original_num_samples": 138400, - "transcript": "perhaps you'll have the goodness to allow me to be the best judge of that returned macklewain drawing up his little body to its least insignificant stature" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.74, - "num_samples": 75840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0032.wav", - "speed": 1 - } - ], - "original_duration": 4.74, - "original_num_samples": 75840, - "transcript": "mister meekin expressed some alarm but doctor macklewain re assured him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.225, - "num_samples": 67600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0033.wav", - "speed": 1 - } - ], - "original_duration": 4.225, - "original_num_samples": 67600, - "transcript": "we can't do anything without evidence complain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.42, - "num_samples": 70720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0034.wav", - "speed": 1 - } - ], - "original_duration": 4.42, - "original_num_samples": 70720, - "transcript": "i shall find my portmanteau in my room you said yes yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.285, - "num_samples": 52560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0035.wav", - "speed": 1 - } - ], - "original_duration": 3.285, - "original_num_samples": 52560, - "transcript": "he sleeps at the back and north hurried off" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.85, - "num_samples": 109600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0036.wav", - "speed": 1 - } - ], - "original_duration": 6.85, - "original_num_samples": 109600, - "transcript": "an impulsive gentleman said meekin to macklewain as the sound of mister north's footsteps died away in the distance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.575, - "num_samples": 41200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0037.wav", - "speed": 1 - } - ], - "original_duration": 2.575, - "original_num_samples": 41200, - "transcript": "macklewain shook his head seriously" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.865, - "num_samples": 45840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0038.wav", - "speed": 1 - } - ], - "original_duration": 2.865, - "original_num_samples": 45840, - "transcript": "he has the strangest fits at times" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.685, - "num_samples": 90960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0039.wav", - "speed": 1 - } - ], - "original_duration": 5.685, - "original_num_samples": 90960, - "transcript": "unless it's a cancer in the stomach i don't know what it can be cancer in the stomach" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0040.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "doctor we all have our crosses have we not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.475, - "num_samples": 39600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0041.wav", - "speed": 1 - } - ], - "original_duration": 2.475, - "original_num_samples": 39600, - "transcript": "how delightful the grass smells" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.21, - "num_samples": 51360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0042.wav", - "speed": 1 - } - ], - "original_duration": 3.21, - "original_num_samples": 51360, - "transcript": "good night sir i hope you will be comfortable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.3, - "num_samples": 52800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0043.wav", - "speed": 1 - } - ], - "original_duration": 3.3, - "original_num_samples": 52800, - "transcript": "i'll teach my prisoners to attempt suicide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.365, - "num_samples": 37840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0044.wav", - "speed": 1 - } - ], - "original_duration": 2.365, - "original_num_samples": 37840, - "transcript": "that's macklewain's business" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0045.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "then don't you interfere with me sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.86, - "num_samples": 29760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0046.wav", - "speed": 1 - } - ], - "original_duration": 1.86, - "original_num_samples": 29760, - "transcript": "i've given my orders sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.975, - "num_samples": 111600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0047.wav", - "speed": 1 - } - ], - "original_duration": 6.975, - "original_num_samples": 111600, - "transcript": "then captain burgess cried north his pale face flushing i tell you the boy's blood will be on your head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.83, - "num_samples": 77280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0048.wav", - "speed": 1 - } - ], - "original_duration": 4.83, - "original_num_samples": 77280, - "transcript": "i am a minister of god sir and i forbid you to commit this crime" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0049.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "damn your impertinence sir burst out burgess" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.72, - "num_samples": 43520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0050.wav", - "speed": 1 - } - ], - "original_duration": 2.72, - "original_num_samples": 43520, - "transcript": "you're a dismissed officer of the government sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.535, - "num_samples": 56560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0051.wav", - "speed": 1 - } - ], - "original_duration": 3.535, - "original_num_samples": 56560, - "transcript": "this of course was mere bravado on the part of the commandant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.925, - "num_samples": 126800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0052.wav", - "speed": 1 - } - ], - "original_duration": 7.925, - "original_num_samples": 126800, - "transcript": "north knew well that he would never dare to attempt any such act of violence but the insult stung him like the cut of a whip" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.795, - "num_samples": 140720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0053.wav", - "speed": 1 - } - ], - "original_duration": 8.795, - "original_num_samples": 140720, - "transcript": "show mister north out he said and go down to the barracks and tell troke that kirkland is to have a hundred lashes to morrow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.18, - "num_samples": 50880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0054.wav", - "speed": 1 - } - ], - "original_duration": 3.18, - "original_num_samples": 50880, - "transcript": "i'll show you who's master here my good sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.17, - "num_samples": 34720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0055.wav", - "speed": 1 - } - ], - "original_duration": 2.17, - "original_num_samples": 34720, - "transcript": "this is murderous" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.53, - "num_samples": 72480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0056.wav", - "speed": 1 - } - ], - "original_duration": 4.53, - "original_num_samples": 72480, - "transcript": "the government may go to and you too roared burgess get out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.535, - "num_samples": 40560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0057.wav", - "speed": 1 - } - ], - "original_duration": 2.535, - "original_num_samples": 40560, - "transcript": "they shall not flog that boy he said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.05, - "num_samples": 32800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0058.wav", - "speed": 1 - } - ], - "original_duration": 2.05, - "original_num_samples": 32800, - "transcript": "i'll report this to the government" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.62, - "num_samples": 73920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0059.wav", - "speed": 1 - } - ], - "original_duration": 4.62, - "original_num_samples": 73920, - "transcript": "i wish i hadn't taken that brandy he said fool that i am" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.025, - "num_samples": 64400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0060.wav", - "speed": 1 - } - ], - "original_duration": 4.025, - "original_num_samples": 64400, - "transcript": "oh god give me strength aid me help me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.485, - "num_samples": 39760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0061.wav", - "speed": 1 - } - ], - "original_duration": 2.485, - "original_num_samples": 39760, - "transcript": "o lord look down upon me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.24, - "num_samples": 67840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0062.wav", - "speed": 1 - } - ], - "original_duration": 4.24, - "original_num_samples": 67840, - "transcript": "he mixed a teaspoonful of this in a pannikin of water and drank it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.07, - "num_samples": 33120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0063.wav", - "speed": 1 - } - ], - "original_duration": 2.07, - "original_num_samples": 33120, - "transcript": "it relieved him for a while" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.805, - "num_samples": 108880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0064.wav", - "speed": 1 - } - ], - "original_duration": 6.805, - "original_num_samples": 108880, - "transcript": "it was as though he had reached the crisis of a disease which had been for days gathering force" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.2, - "num_samples": 67200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0065.wav", - "speed": 1 - } - ], - "original_duration": 4.2, - "original_num_samples": 67200, - "transcript": "i must have a teaspoonful he said to allay the craving" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.125, - "num_samples": 114000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0066.wav", - "speed": 1 - } - ], - "original_duration": 7.125, - "original_num_samples": 114000, - "transcript": "twice he paused on the way to the sitting room and twice was he driven on by a power stronger than his will" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.695063, - "num_samples": 43121, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0067.wav", - "speed": 1 - } - ], - "original_duration": 2.695063, - "original_num_samples": 43121, - "transcript": "he smelt the nutty aroma of the spirit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.755, - "num_samples": 236080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0068.wav", - "speed": 1 - } - ], - "original_duration": 14.755, - "original_num_samples": 236080, - "transcript": "that at all times debasing at this particular time it was infamous that a vice unworthy of any man was doubly sinful in a man of education and a minister of god in vain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.21, - "num_samples": 163360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3752/4944/3752-4944-0069.wav", - "speed": 1 - } - ], - "original_duration": 10.21, - "original_num_samples": 163360, - "transcript": "in the midst of his arguments he found himself at the cupboard with the bottle at his lips in an attitude that was at once ludicrous and horrible" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.5, - "num_samples": 168000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.5, - "original_num_samples": 168000, - "transcript": "when we took our seats at the breakfast table it was with the feeling of being no longer looked upon as connected in any way with this case" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.62, - "num_samples": 153920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.62, - "original_num_samples": 153920, - "transcript": "instantly they absorbed all my attention though i dared not give them a direct look and continued to observe them only in the glass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.83, - "num_samples": 45280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.83, - "original_num_samples": 45280, - "transcript": "yes and a very respectable one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.125, - "num_samples": 66000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.125, - "original_num_samples": 66000, - "transcript": "the lady is not the mother of the boys but their aunt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.575, - "num_samples": 57200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.575, - "original_num_samples": 57200, - "transcript": "the boys belong to the gentleman who is a widower" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.02, - "num_samples": 32320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.02, - "original_num_samples": 32320, - "transcript": "george nodded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.775, - "num_samples": 60400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.775, - "original_num_samples": 60400, - "transcript": "the boys look wide awake enough if the father does not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.655, - "num_samples": 90480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.655, - "original_num_samples": 90480, - "transcript": "do they still insist that miss challoner was the only person in the room with them at this time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.77, - "num_samples": 28320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0008.wav", - "speed": 1 - } - ], - "original_duration": 1.77, - "original_num_samples": 28320, - "transcript": "george" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.78, - "num_samples": 60480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.78, - "original_num_samples": 60480, - "transcript": "have you ever thought that she might have been a suicide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.205, - "num_samples": 83280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0010.wav", - "speed": 1 - } - ], - "original_duration": 5.205, - "original_num_samples": 83280, - "transcript": "i know it sounds foolish but the alternative is so improbable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.21, - "num_samples": 99360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.21, - "original_num_samples": 99360, - "transcript": "the boys look wide awake enough but who can tell i would sooner believe that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.72, - "num_samples": 107520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.72, - "original_num_samples": 107520, - "transcript": "a man was looking in from the corridor behind at the four persons we were just discussing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.22, - "num_samples": 83520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.22, - "original_num_samples": 83520, - "transcript": "i inquired of george with my eyes still on this furtive watcher" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.520062, - "num_samples": 56321, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.520062, - "original_num_samples": 56321, - "transcript": "i took quite a fancy to him why" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.72, - "num_samples": 43520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.72, - "original_num_samples": 43520, - "transcript": "i am looking at him now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.685, - "num_samples": 122960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0016.wav", - "speed": 1 - } - ], - "original_duration": 7.685, - "original_num_samples": 122960, - "transcript": "just an everyday detective but ambitious i suppose and quite alive to the importance of being thorough" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.005, - "num_samples": 64080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.005, - "original_num_samples": 64080, - "transcript": "yes he's mercurial in all his movements" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.775, - "num_samples": 28400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0018.wav", - "speed": 1 - } - ], - "original_duration": 1.775, - "original_num_samples": 28400, - "transcript": "what does he want" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.9, - "num_samples": 62400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.9, - "original_num_samples": 62400, - "transcript": "i asked as soon as george had returned to my side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.34, - "num_samples": 85440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.34, - "original_num_samples": 85440, - "transcript": "he wants me to stand ready to obey any summons the police may send me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.91, - "num_samples": 46560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.91, - "original_num_samples": 46560, - "transcript": "i emphasised complacently" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.595, - "num_samples": 169520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.595, - "original_num_samples": 169520, - "transcript": "he appeared to know for he told me at once that he was detective gryce a man who had grown old in solving just such baffling problems as these" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.285, - "num_samples": 164560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0023.wav", - "speed": 1 - } - ], - "original_duration": 10.285, - "original_num_samples": 164560, - "transcript": "he gave up work some time ago i have been told my husband went on but evidently a great case still has its allurement for him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "the trail here must be a very blind one for them to call him in" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.615, - "num_samples": 41840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.615, - "original_num_samples": 41840, - "transcript": "i wish we had not left so soon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.81, - "num_samples": 156960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0026.wav", - "speed": 1 - } - ], - "original_duration": 9.81, - "original_num_samples": 156960, - "transcript": "he was late of course but when he did appear i almost forgot our usual greeting in my hurry to ask him if he had seen the evening papers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.975, - "num_samples": 63600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0027.wav", - "speed": 1 - } - ], - "original_duration": 3.975, - "original_num_samples": 63600, - "transcript": "however a little later we had a comfortable chat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.98, - "num_samples": 63680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.98, - "original_num_samples": 63680, - "transcript": "it was one which gave me a small triumph over george" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.885, - "num_samples": 110160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0029.wav", - "speed": 1 - } - ], - "original_duration": 6.885, - "original_num_samples": 110160, - "transcript": "the suggestion he had laughed at was not so entirely foolish as he had been pleased to consider it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.985, - "num_samples": 223760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0030.wav", - "speed": 1 - } - ], - "original_duration": 13.985, - "original_num_samples": 223760, - "transcript": "their greeting was cordial and the lines on the latter's face relaxed a little as he met the still bright eye of the man upon whose instinct and judgment so much reliance had always been placed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.55, - "num_samples": 168800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0031.wav", - "speed": 1 - } - ], - "original_duration": 10.55, - "original_num_samples": 168800, - "transcript": "this is very good of you he began glancing down at the aged detective's bundled up legs and gently pushing a chair towards him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.69, - "num_samples": 59040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.69, - "original_num_samples": 59040, - "transcript": "it's the most inexplicable there" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.73, - "num_samples": 43680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0033.wav", - "speed": 1 - } - ], - "original_duration": 2.73, - "original_num_samples": 43680, - "transcript": "but clews there are absolutely none" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.965, - "num_samples": 79440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0034.wav", - "speed": 1 - } - ], - "original_duration": 4.965, - "original_num_samples": 79440, - "transcript": "that is we have not been able to find any perhaps you can" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.37, - "num_samples": 37920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0035.wav", - "speed": 1 - } - ], - "original_duration": 2.37, - "original_num_samples": 37920, - "transcript": "at least that is what we hope" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.74, - "num_samples": 43840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0036.wav", - "speed": 1 - } - ], - "original_duration": 2.74, - "original_num_samples": 43840, - "transcript": "it's a case in a thousand gryce" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.53, - "num_samples": 104480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0037.wav", - "speed": 1 - } - ], - "original_duration": 6.53, - "original_num_samples": 104480, - "transcript": "the old man's eyes shot fire and unconsciously one foot slipped to the floor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.085, - "num_samples": 49360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0038.wav", - "speed": 1 - } - ], - "original_duration": 3.085, - "original_num_samples": 49360, - "transcript": "she had no companion near her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.2, - "num_samples": 67200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0039.wav", - "speed": 1 - } - ], - "original_duration": 4.2, - "original_num_samples": 67200, - "transcript": "no word no cry just a collapse and sudden fall" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.895, - "num_samples": 78320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0040.wav", - "speed": 1 - } - ], - "original_duration": 4.895, - "original_num_samples": 78320, - "transcript": "in olden days they would have said struck by a bolt from heaven" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.735, - "num_samples": 43760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0041.wav", - "speed": 1 - } - ], - "original_duration": 2.735, - "original_num_samples": 43760, - "transcript": "what do you make of it gryce" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.195, - "num_samples": 83120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0042.wav", - "speed": 1 - } - ], - "original_duration": 5.195, - "original_num_samples": 83120, - "transcript": "i should like to see the desk you speak of and the spot where she fell" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.625, - "num_samples": 90000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0043.wav", - "speed": 1 - } - ], - "original_duration": 5.625, - "original_num_samples": 90000, - "transcript": "a young fellow who had been hovering in the background at once stepped forward" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.495, - "num_samples": 71920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0044.wav", - "speed": 1 - } - ], - "original_duration": 4.495, - "original_num_samples": 71920, - "transcript": "he was the plain faced detective who had spoken to george" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.485, - "num_samples": 151760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0045.wav", - "speed": 1 - } - ], - "original_duration": 9.485, - "original_num_samples": 151760, - "transcript": "this sweetwater as they called him was i have since understood one of his proteges and more or less of a favourite" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.98, - "num_samples": 63680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0046.wav", - "speed": 1 - } - ], - "original_duration": 3.98, - "original_num_samples": 63680, - "transcript": "been over the ground studied the affair carefully" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.76, - "num_samples": 76160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0047.wav", - "speed": 1 - } - ], - "original_duration": 4.76, - "original_num_samples": 76160, - "transcript": "very well then you're in a position to pioneer me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.81, - "num_samples": 76960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0048.wav", - "speed": 1 - } - ], - "original_duration": 4.81, - "original_num_samples": 76960, - "transcript": "well well that's honest at all events" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.495, - "num_samples": 55920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0049.wav", - "speed": 1 - } - ], - "original_duration": 3.495, - "original_num_samples": 55920, - "transcript": "but i'm in no position to make promises" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0050.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "old days don't return for the asking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.23, - "num_samples": 179680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0051.wav", - "speed": 1 - } - ], - "original_duration": 11.23, - "original_num_samples": 179680, - "transcript": "whether he got anything else it would be impossible to say from his manner as he finally sank into a chair by one of the openings and looked down on the lobby below" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.41, - "num_samples": 54560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0052.wav", - "speed": 1 - } - ], - "original_duration": 3.41, - "original_num_samples": 54560, - "transcript": "one or two of the musicians from the end of the hall" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.675, - "num_samples": 42800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0053.wav", - "speed": 1 - } - ], - "original_duration": 2.675, - "original_num_samples": 42800, - "transcript": "naturally they reached her first" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.445063, - "num_samples": 55121, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0054.wav", - "speed": 1 - } - ], - "original_duration": 3.445063, - "original_num_samples": 55121, - "transcript": "mark sowerby and claus hennerberg" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.875, - "num_samples": 62000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0055.wav", - "speed": 1 - } - ], - "original_duration": 3.875, - "original_num_samples": 62000, - "transcript": "honest germans men who have played here for years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.73, - "num_samples": 75680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0056.wav", - "speed": 1 - } - ], - "original_duration": 4.73, - "original_num_samples": 75680, - "transcript": "who came next on the scene some people from the lobby" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.67, - "num_samples": 42720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0057.wav", - "speed": 1 - } - ], - "original_duration": 2.67, - "original_num_samples": 42720, - "transcript": "anybody before the father came in" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.435, - "num_samples": 70960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0058.wav", - "speed": 1 - } - ], - "original_duration": 4.435, - "original_num_samples": 70960, - "transcript": "yes miss clarke the middle aged lady with the parrishes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.92, - "num_samples": 94720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0059.wav", - "speed": 1 - } - ], - "original_duration": 5.92, - "original_num_samples": 94720, - "transcript": "i suppose she has been carefully questioned very i should say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.74, - "num_samples": 43840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0060.wav", - "speed": 1 - } - ], - "original_duration": 2.74, - "original_num_samples": 43840, - "transcript": "and she speaks of no weapon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.285, - "num_samples": 84560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0061.wav", - "speed": 1 - } - ], - "original_duration": 5.285, - "original_num_samples": 84560, - "transcript": "not till the doctor came her doctor who was happily in his office in this very building" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.07, - "num_samples": 129120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0062.wav", - "speed": 1 - } - ], - "original_duration": 8.07, - "original_num_samples": 129120, - "transcript": "yes mister slater the assistant manager who was in the lobby at the time says that ten minutes at least must have elapsed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.74, - "num_samples": 27840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0063.wav", - "speed": 1 - } - ], - "original_duration": 1.74, - "original_num_samples": 27840, - "transcript": "no doubt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.93, - "num_samples": 62880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0064.wav", - "speed": 1 - } - ], - "original_duration": 3.93, - "original_num_samples": 62880, - "transcript": "sweetwater someone drew that weapon out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0065.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "not altogether by me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.7, - "num_samples": 75200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0066.wav", - "speed": 1 - } - ], - "original_duration": 4.7, - "original_num_samples": 75200, - "transcript": "wherever she pleases only i can't walk far" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.955, - "num_samples": 127280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0067.wav", - "speed": 1 - } - ], - "original_duration": 7.955, - "original_num_samples": 127280, - "transcript": "it has not been running since last night or it would be full of curious people all the time hustling to get a glimpse of this place" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.53, - "num_samples": 40480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0068.wav", - "speed": 1 - } - ], - "original_duration": 2.53, - "original_num_samples": 40480, - "transcript": "but they'll put a man on for you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.71, - "num_samples": 59360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0069.wav", - "speed": 1 - } - ], - "original_duration": 3.71, - "original_num_samples": 59360, - "transcript": "very good manage it as you will" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.735, - "num_samples": 187760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0070.wav", - "speed": 1 - } - ], - "original_duration": 11.735, - "original_num_samples": 187760, - "transcript": "i'll wait here till you're ready explain yourself to the lady tell her i'm an old and rheumatic invalid who has been used to asking his own questions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.345, - "num_samples": 101520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0071.wav", - "speed": 1 - } - ], - "original_duration": 6.345, - "original_num_samples": 101520, - "transcript": "as her quiet figure appeared in the doorway sweetwater stole a glance at mister gryce" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.23, - "num_samples": 51680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0072.wav", - "speed": 1 - } - ], - "original_duration": 3.23, - "original_num_samples": 51680, - "transcript": "there was no doubting them in this instance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.445, - "num_samples": 23120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0073.wav", - "speed": 1 - } - ], - "original_duration": 1.445, - "original_num_samples": 23120, - "transcript": "yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.12, - "num_samples": 177920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0074.wav", - "speed": 1 - } - ], - "original_duration": 11.12, - "original_num_samples": 177920, - "transcript": "for some little time that is it seemed long though i believe it was not more than a minute before two men came running from the musicians gallery" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.475, - "num_samples": 39600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0075.wav", - "speed": 1 - } - ], - "original_duration": 2.475, - "original_num_samples": 39600, - "transcript": "yes many times" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.94, - "num_samples": 31040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0076.wav", - "speed": 1 - } - ], - "original_duration": 1.94, - "original_num_samples": 31040, - "transcript": "what made the difference" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.425, - "num_samples": 166800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0077.wav", - "speed": 1 - } - ], - "original_duration": 10.425, - "original_num_samples": 166800, - "transcript": "miss clarke started and her sweet face showed a moment's perplexity did i she queried musingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.33, - "num_samples": 69280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0078.wav", - "speed": 1 - } - ], - "original_duration": 4.33, - "original_num_samples": 69280, - "transcript": "no a very natural one i should say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.255, - "num_samples": 132080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0079.wav", - "speed": 1 - } - ], - "original_duration": 8.255, - "original_num_samples": 132080, - "transcript": "and the glance she cast him while not meeting his eye showed that she understood the importance of the admission" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.315, - "num_samples": 69040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0080.wav", - "speed": 1 - } - ], - "original_duration": 4.315, - "original_num_samples": 69040, - "transcript": "i know she said what you are going to ask me now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.055, - "num_samples": 48880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0081.wav", - "speed": 1 - } - ], - "original_duration": 3.055, - "original_num_samples": 48880, - "transcript": "there was no poniard in the wound" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.66, - "num_samples": 122560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0082.wav", - "speed": 1 - } - ], - "original_duration": 7.66, - "original_num_samples": 122560, - "transcript": "the time is narrowed down to one and in that one miss clarke was the only person to touch her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.865, - "num_samples": 45840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0083.wav", - "speed": 1 - } - ], - "original_duration": 2.865, - "original_num_samples": 45840, - "transcript": "i will trouble you no further" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.35, - "num_samples": 53600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0084.wav", - "speed": 1 - } - ], - "original_duration": 3.35, - "original_num_samples": 53600, - "transcript": "sweetwater help me out of this" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.285, - "num_samples": 180560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0085.wav", - "speed": 1 - } - ], - "original_duration": 11.285, - "original_num_samples": 180560, - "transcript": "but vigour returned to him before he had well reached the door and he showed some of his old spirit as he thanked miss clarke and turned to take the elevator" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.68, - "num_samples": 58880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0086.wav", - "speed": 1 - } - ], - "original_duration": 3.68, - "original_num_samples": 58880, - "transcript": "the next minute she was in this lady's arms" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.625, - "num_samples": 154000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0087.wav", - "speed": 1 - } - ], - "original_duration": 9.625, - "original_num_samples": 154000, - "transcript": "no weapon protruded from the wound nor was any found on or near her in the mezzanine what follows" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.81, - "num_samples": 172960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0088.wav", - "speed": 1 - } - ], - "original_duration": 10.81, - "original_num_samples": 172960, - "transcript": "she struck the blow herself and the strength of purpose which led her to do this gave her the additional force to pull the weapon out and fling it from her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.52, - "num_samples": 200320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3081/166546/3081-166546-0089.wav", - "speed": 1 - } - ], - "original_duration": 12.52, - "original_num_samples": 200320, - "transcript": "it did not fall upon the floor around her therefore it flew through one of those openings into the lobby and there it either will be or has been found" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.55, - "num_samples": 232800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0000.wav", - "speed": 1 - } - ], - "original_duration": 14.55, - "original_num_samples": 232800, - "transcript": "he had written a number of books himself among them a history of dancing a history of costume a key to shakespeare's sonnets a study of the poetry of ernest dowson et cetera" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.985, - "num_samples": 63760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.985, - "original_num_samples": 63760, - "transcript": "hugh's written a delightful part for her and she's quite inexpressible" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.645, - "num_samples": 74320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.645, - "original_num_samples": 74320, - "transcript": "i happen to have mac connell's box for tonight or there'd be no chance of our getting places" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.32, - "num_samples": 37120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.32, - "original_num_samples": 37120, - "transcript": "alexander exclaimed mildly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.51, - "num_samples": 40160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.51, - "original_num_samples": 40160, - "transcript": "myself i always knew she had it in her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.165, - "num_samples": 210640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0005.wav", - "speed": 1 - } - ], - "original_duration": 13.165, - "original_num_samples": 210640, - "transcript": "do you know alexander mainhall looked with perplexity up into the top of the hansom and rubbed his pink cheek with his gloved finger do you know i sometimes think of taking to criticism seriously myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.58, - "num_samples": 137280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.58, - "original_num_samples": 137280, - "transcript": "when they entered the stage box on the left the first act was well under way the scene being the interior of a cabin in the south of ireland" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.255, - "num_samples": 84080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.255, - "original_num_samples": 84080, - "transcript": "as they sat down a burst of applause drew alexander's attention to the stage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.32, - "num_samples": 101120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.32, - "original_num_samples": 101120, - "transcript": "of course hilda is irish the burgoynes have been stage people for generations and she has the irish voice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.505, - "num_samples": 40080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.505, - "original_num_samples": 40080, - "transcript": "it's delightful to hear it in a london theatre" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.615, - "num_samples": 137840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.615, - "original_num_samples": 137840, - "transcript": "when she began to dance by way of showing the gossoons what she had seen in the fairy rings at night the house broke into a prolonged uproar" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.42, - "num_samples": 182720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0011.wav", - "speed": 1 - } - ], - "original_duration": 11.42, - "original_num_samples": 182720, - "transcript": "after her dance she withdrew from the dialogue and retreated to the ditch wall back of philly's burrow where she sat singing the rising of the moon and making a wreath of primroses for her donkey" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.86, - "num_samples": 61760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.86, - "original_num_samples": 61760, - "transcript": "mac connell let me introduce mister bartley alexander" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.02, - "num_samples": 112320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0013.wav", - "speed": 1 - } - ], - "original_duration": 7.02, - "original_num_samples": 112320, - "transcript": "the playwright gave mainhall a curious look out of his deep set faded eyes and made a wry face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.92, - "num_samples": 78720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.92, - "original_num_samples": 78720, - "transcript": "he nodded curtly and made for the door dodging acquaintances as he went" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.38, - "num_samples": 54080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.38, - "original_num_samples": 54080, - "transcript": "i dare say it's quite true that there's never been any one else" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.43, - "num_samples": 54880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.43, - "original_num_samples": 54880, - "transcript": "he's another who's awfully keen about her let me introduce you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.21, - "num_samples": 99360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.21, - "original_num_samples": 99360, - "transcript": "sir harry towne bowed and said that he had met mister alexander and his wife in tokyo" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.404937, - "num_samples": 70479, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.404937, - "original_num_samples": 70479, - "transcript": "i say sir harry the little girl's going famously to night isn't she" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.555, - "num_samples": 56880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.555, - "original_num_samples": 56880, - "transcript": "the fact is she's feeling rather seedy poor child" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.35, - "num_samples": 37600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.35, - "original_num_samples": 37600, - "transcript": "a little attack of nerves possibly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.645, - "num_samples": 154320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0021.wav", - "speed": 1 - } - ], - "original_duration": 9.645, - "original_num_samples": 154320, - "transcript": "he bowed as the warning bell rang and mainhall whispered you know lord westmere of course the stooped man with the long gray mustache talking to lady dowle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.185, - "num_samples": 98960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.185, - "original_num_samples": 98960, - "transcript": "in a moment peggy was on the stage again and alexander applauded vigorously with the rest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.5, - "num_samples": 152000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0023.wav", - "speed": 1 - } - ], - "original_duration": 9.5, - "original_num_samples": 152000, - "transcript": "in the half light he looked about at the stalls and boxes and smiled a little consciously recalling with amusement sir harry's judicial frown" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.35, - "num_samples": 261600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0024.wav", - "speed": 1 - } - ], - "original_duration": 16.35, - "original_num_samples": 261600, - "transcript": "he leaned forward and beamed felicitations as warmly as mainhall himself when at the end of the play she came again and again before the curtain panting a little and flushed her eyes dancing and her eager nervous little mouth tremulous with excitement" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.44, - "num_samples": 71040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.44, - "original_num_samples": 71040, - "transcript": "all the same he lifted his glass here's to you little hilda" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.385, - "num_samples": 38160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.385, - "original_num_samples": 38160, - "transcript": "i'm glad she's held her own since" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.005, - "num_samples": 80080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170138/1462-170138-0027.wav", - "speed": 1 - } - ], - "original_duration": 5.005, - "original_num_samples": 80080, - "transcript": "it was youth and poverty and proximity and everything was young and kindly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.405, - "num_samples": 246480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0000.wav", - "speed": 1 - } - ], - "original_duration": 15.405, - "original_num_samples": 246480, - "transcript": "on the last saturday in april the new york times published an account of the strike complications which were delaying alexander's new jersey bridge and stated that the engineer himself was in town and at his office on west tenth street" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.825062, - "num_samples": 77201, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.825062, - "original_num_samples": 77201, - "transcript": "over the fireplace there was a large old fashioned gilt mirror" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.845, - "num_samples": 45520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.845, - "original_num_samples": 45520, - "transcript": "he rose and crossed the room quickly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.735, - "num_samples": 139760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.735, - "original_num_samples": 139760, - "transcript": "of course i know bartley she said at last that after this you won't owe me the least consideration but we sail on tuesday" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.275, - "num_samples": 132400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0004.wav", - "speed": 1 - } - ], - "original_duration": 8.275, - "original_num_samples": 132400, - "transcript": "i saw that interview in the paper yesterday telling where you were and i thought i had to see you that's all good night i'm going now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.72, - "num_samples": 59520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.72, - "original_num_samples": 59520, - "transcript": "let me take off your coat and your boots they're oozing water" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.46, - "num_samples": 55360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.46, - "original_num_samples": 55360, - "transcript": "if you'd sent me a note or telephoned me or anything" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.32, - "num_samples": 101120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.32, - "original_num_samples": 101120, - "transcript": "i told myself that if i were really thinking of you and not of myself a letter would be better than nothing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.15, - "num_samples": 50400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.15, - "original_num_samples": 50400, - "transcript": "he paused they never did to me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.925, - "num_samples": 46800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.925, - "original_num_samples": 46800, - "transcript": "oh bartley did you write to me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.945, - "num_samples": 47120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.945, - "original_num_samples": 47120, - "transcript": "alexander slipped his arm about her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.465, - "num_samples": 39440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.465, - "original_num_samples": 39440, - "transcript": "i think i have felt that you were coming" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.7, - "num_samples": 43200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.7, - "original_num_samples": 43200, - "transcript": "he bent his face over her hair" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.8, - "num_samples": 76800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.8, - "original_num_samples": 76800, - "transcript": "and i she whispered i felt that you were feeling that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.97, - "num_samples": 47520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.97, - "original_num_samples": 47520, - "transcript": "but when i came i thought i had been mistaken" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.055, - "num_samples": 96880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.055, - "original_num_samples": 96880, - "transcript": "i've been up in canada with my bridge and i arranged not to come to new york until after you had gone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.175, - "num_samples": 66800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.175, - "original_num_samples": 66800, - "transcript": "then when your manager added two more weeks i was already committed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.255, - "num_samples": 52080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.255, - "original_num_samples": 52080, - "transcript": "i'm going to do what you asked me to do when you were in london" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.025, - "num_samples": 32400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.025, - "original_num_samples": 32400, - "transcript": "only i'll do it more completely" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.69, - "num_samples": 43040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.69, - "original_num_samples": 43040, - "transcript": "then you don't know what you're talking about" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.15, - "num_samples": 34400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.15, - "original_num_samples": 34400, - "transcript": "yes i know very well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.555, - "num_samples": 40880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.555, - "original_num_samples": 40880, - "transcript": "alexander flushed angrily" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.51, - "num_samples": 104160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170145/1462-170145-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.51, - "original_num_samples": 104160, - "transcript": "i don't know what i ought to say but i don't believe you'd be happy truly i don't aren't you trying to frighten me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.715, - "num_samples": 75440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.715, - "original_num_samples": 75440, - "transcript": "the last two days of the voyage bartley found almost intolerable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.585, - "num_samples": 153360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.585, - "original_num_samples": 153360, - "transcript": "emerging at euston at half past three o'clock in the afternoon alexander had his luggage sent to the savoy and drove at once to bedford square" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.45, - "num_samples": 103200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.45, - "original_num_samples": 103200, - "transcript": "she blushed and smiled and fumbled his card in her confusion before she ran upstairs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.37, - "num_samples": 37920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.37, - "original_num_samples": 37920, - "transcript": "the room was empty when he entered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.255, - "num_samples": 116080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.255, - "original_num_samples": 116080, - "transcript": "a coal fire was crackling in the grate and the lamps were lit for it was already beginning to grow dark outside" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.08, - "num_samples": 177280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0005.wav", - "speed": 1 - } - ], - "original_duration": 11.08, - "original_num_samples": 177280, - "transcript": "she called his name on the threshold but in her swift flight across the room she felt a change in him and caught herself up so deftly that he could not tell just when she did it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.195, - "num_samples": 99120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.195, - "original_num_samples": 99120, - "transcript": "she merely brushed his cheek with her lips and put a hand lightly and joyously on either shoulder" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.44, - "num_samples": 39040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.44, - "original_num_samples": 39040, - "transcript": "i never dreamed it would be you bartley" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.81, - "num_samples": 76960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.81, - "original_num_samples": 76960, - "transcript": "when did you come bartley and how did it happen you haven't spoken a word" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.335, - "num_samples": 101360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0009.wav", - "speed": 1 - } - ], - "original_duration": 6.335, - "original_num_samples": 101360, - "transcript": "she looked at his heavy shoulders and big determined head thrust forward like a catapult in leash" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.58, - "num_samples": 73280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.58, - "original_num_samples": 73280, - "transcript": "i'll do anything you wish me to bartley she said tremulously" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.26, - "num_samples": 52160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.26, - "original_num_samples": 52160, - "transcript": "he pulled up a window as if the air were heavy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.67, - "num_samples": 106720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.67, - "original_num_samples": 106720, - "transcript": "hilda watched him from her corner trembling and scarcely breathing dark shadows growing about her eyes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.9, - "num_samples": 62400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.9, - "original_num_samples": 62400, - "transcript": "it it hasn't always made you miserable has it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.55, - "num_samples": 40800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.55, - "original_num_samples": 40800, - "transcript": "always but it's worse now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.01, - "num_samples": 48160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.01, - "original_num_samples": 48160, - "transcript": "it's unbearable it tortures me every minute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.24, - "num_samples": 99840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.24, - "original_num_samples": 99840, - "transcript": "i am not a man who can live two lives he went on feverishly each life spoils the other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.315, - "num_samples": 37040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.315, - "original_num_samples": 37040, - "transcript": "i get nothing but misery out of either" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.265, - "num_samples": 52240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0018.wav", - "speed": 1 - } - ], - "original_duration": 3.265, - "original_num_samples": 52240, - "transcript": "there is this deception between me and everything" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.245, - "num_samples": 163920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0019.wav", - "speed": 1 - } - ], - "original_duration": 10.245, - "original_num_samples": 163920, - "transcript": "at that word deception spoken with such self contempt the color flashed back into hilda's face as suddenly as if she had been struck by a whiplash" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.945, - "num_samples": 79120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.945, - "original_num_samples": 79120, - "transcript": "she bit her lip and looked down at her hands which were clasped tightly in front of her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.17, - "num_samples": 130720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0021.wav", - "speed": 1 - } - ], - "original_duration": 8.17, - "original_num_samples": 130720, - "transcript": "could you could you sit down and talk about it quietly bartley as if i were a friend and not some one who had to be defied" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.76, - "num_samples": 60160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.76, - "original_num_samples": 60160, - "transcript": "he dropped back heavily into his chair by the fire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.81, - "num_samples": 44960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.81, - "original_num_samples": 44960, - "transcript": "i have thought about it until i am worn out" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.155, - "num_samples": 34480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.155, - "original_num_samples": 34480, - "transcript": "after the very first" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.55, - "num_samples": 88800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.55, - "original_num_samples": 88800, - "transcript": "hilda's face quivered but she whispered yes i think it must have been" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.835, - "num_samples": 77360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.835, - "original_num_samples": 77360, - "transcript": "she pressed his hand gently in gratitude weren't you happy then at all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.19, - "num_samples": 67040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.19, - "original_num_samples": 67040, - "transcript": "something of their troubling sweetness came back to alexander too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.005, - "num_samples": 48080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.005, - "original_num_samples": 48080, - "transcript": "presently it stole back to his coat sleeve" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.975, - "num_samples": 63600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.975, - "original_num_samples": 63600, - "transcript": "yes hilda i know that he said simply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.705, - "num_samples": 43280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.705, - "original_num_samples": 43280, - "transcript": "i understand bartley i was wrong" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.825, - "num_samples": 77200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0031.wav", - "speed": 1 - } - ], - "original_duration": 4.825, - "original_num_samples": 77200, - "transcript": "she listened intently but she heard nothing but the creaking of his chair" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.77, - "num_samples": 44320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0032.wav", - "speed": 1 - } - ], - "original_duration": 2.77, - "original_num_samples": 44320, - "transcript": "you want me to say it she whispered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.18, - "num_samples": 66880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0033.wav", - "speed": 1 - } - ], - "original_duration": 4.18, - "original_num_samples": 66880, - "transcript": "bartley leaned his head in his hands and spoke through his teeth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.52, - "num_samples": 40320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0034.wav", - "speed": 1 - } - ], - "original_duration": 2.52, - "original_num_samples": 40320, - "transcript": "it's got to be a clean break hilda" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.67, - "num_samples": 42720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0035.wav", - "speed": 1 - } - ], - "original_duration": 2.67, - "original_num_samples": 42720, - "transcript": "oh bartley what am i to do" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.07, - "num_samples": 49120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0036.wav", - "speed": 1 - } - ], - "original_duration": 3.07, - "original_num_samples": 49120, - "transcript": "you ask me to stay away from you because you want me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.325, - "num_samples": 69200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0037.wav", - "speed": 1 - } - ], - "original_duration": 4.325, - "original_num_samples": 69200, - "transcript": "i will ask the least imaginable but i must have something" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.86, - "num_samples": 77760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0038.wav", - "speed": 1 - } - ], - "original_duration": 4.86, - "original_num_samples": 77760, - "transcript": "hilda sat on the arm of it and put her hands lightly on his shoulders" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.715, - "num_samples": 75440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0039.wav", - "speed": 1 - } - ], - "original_duration": 4.715, - "original_num_samples": 75440, - "transcript": "you see loving some one as i love you makes the whole world different" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.6, - "num_samples": 73600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0040.wav", - "speed": 1 - } - ], - "original_duration": 4.6, - "original_num_samples": 73600, - "transcript": "and then you came back not caring very much but it made no difference" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.575, - "num_samples": 89200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0041.wav", - "speed": 1 - } - ], - "original_duration": 5.575, - "original_num_samples": 89200, - "transcript": "she slid to the floor beside him as if she were too tired to sit up any longer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.24, - "num_samples": 51840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1462/170142/1462-170142-0042.wav", - "speed": 1 - } - ], - "original_duration": 3.24, - "original_num_samples": 51840, - "transcript": "don't cry don't cry he whispered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.81, - "num_samples": 156960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.81, - "original_num_samples": 156960, - "transcript": "they also found a martian calendar the year had been divided into ten more or less equal months and one of them had been doma" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.855, - "num_samples": 109680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.855, - "original_num_samples": 109680, - "transcript": "bill chandler the zoologist had been going deeper and deeper into the old sea bottom of syrtis" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.55, - "num_samples": 104800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.55, - "original_num_samples": 104800, - "transcript": "that took the center of interest away from archaeology and started a new burst of activity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.36, - "num_samples": 261760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0003.wav", - "speed": 1 - } - ], - "original_duration": 16.36, - "original_num_samples": 261760, - "transcript": "the civilian specialists in other fields and the space force people who had been holding tape lines and making sketches and snapping cameras were all flying to lower syrtis to find out how much oxygen there was and what kind of life it supported" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.8, - "num_samples": 396800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0004.wav", - "speed": 1 - } - ], - "original_duration": 24.8, - "original_num_samples": 396800, - "transcript": "they had four or five species of what might loosely be called birds and something that could easily be classed as a reptile and a carnivorous mammal the size of a cat with birdlike claws and a herbivore almost identical with the piglike thing in the big darfhulva mural and another like a gazelle with a single horn in the middle of its forehead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.755, - "num_samples": 92080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.755, - "original_num_samples": 92080, - "transcript": "the daily newscasts from terra showed a corresponding shift in interest at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.565, - "num_samples": 41040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.565, - "original_num_samples": 41040, - "transcript": "tony's found the martians" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.605, - "num_samples": 89680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.605, - "original_num_samples": 89680, - "transcript": "it was locked from the inside and we had to burn it down with a torch that's where they are" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.79, - "num_samples": 156640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.79, - "original_num_samples": 156640, - "transcript": "gloria standish who had dropped in for lunch was on the mezzanine fairly screaming into a radiophone extension dozen and a half of them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.625, - "num_samples": 58000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.625, - "original_num_samples": 58000, - "transcript": "well of course they're dead what a question" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.56, - "num_samples": 104960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.56, - "original_num_samples": 104960, - "transcript": "martha remembered the closed door on the first survey they hadn't attempted opening it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.445, - "num_samples": 135120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.445, - "original_num_samples": 135120, - "transcript": "now it was burned away at both sides and lay still hot along the edges on the floor of the big office room in front" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.79, - "num_samples": 140640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.79, - "original_num_samples": 140640, - "transcript": "a floodlight was on in the room inside and lattimer was going around looking at things while a space force officer stood by the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.68, - "num_samples": 74880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.68, - "original_num_samples": 74880, - "transcript": "mass suicide that's what it was notice what's in the corners" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.265, - "num_samples": 36240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.265, - "original_num_samples": 36240, - "transcript": "yes charcoal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.63, - "num_samples": 122080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.63, - "original_num_samples": 122080, - "transcript": "so they just came in here and lit the charcoal and sat drinking together till they all fell asleep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.255, - "num_samples": 148080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0016.wav", - "speed": 1 - } - ], - "original_duration": 9.255, - "original_num_samples": 148080, - "transcript": "the terran public wanted to hear about martians and if live martians couldn't be found a room full of dead ones was the next best thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.365, - "num_samples": 245840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0017.wav", - "speed": 1 - } - ], - "original_duration": 15.365, - "original_num_samples": 245840, - "transcript": "tony lattimer the discoverer was beginning to cash in on his attentions to gloria and his ingratiation with sid he was always either making voice and image talks for telecast or listening to the news from the home planet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.875, - "num_samples": 110000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0018.wav", - "speed": 1 - } - ], - "original_duration": 6.875, - "original_num_samples": 110000, - "transcript": "without question he had become overnight the most widely known archaeologist in history" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.345, - "num_samples": 149520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0019.wav", - "speed": 1 - } - ], - "original_duration": 9.345, - "original_num_samples": 149520, - "transcript": "not that i'm interested in all this for myself he disclaimed after listening to the telecast from terra two days after his discovery" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.975, - "num_samples": 47600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.975, - "original_num_samples": 47600, - "transcript": "bring it to the public attention dramatize it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.55, - "num_samples": 104800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0021.wav", - "speed": 1 - } - ], - "original_duration": 6.55, - "original_num_samples": 104800, - "transcript": "so i believe i shall go back at least for a while and see what i can do" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.535, - "num_samples": 24560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0022.wav", - "speed": 1 - } - ], - "original_duration": 1.535, - "original_num_samples": 24560, - "transcript": "lectures" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.395, - "num_samples": 134320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/136532/251-136532-0023.wav", - "speed": 1 - } - ], - "original_duration": 8.395, - "original_num_samples": 134320, - "transcript": "the organization of a society of martian archaeology with anthony lattimer ph d the logical candidate for the chair" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.435, - "num_samples": 54960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.435, - "original_num_samples": 54960, - "transcript": "i'll be glad to try sir he replied" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.48, - "num_samples": 87680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0001.wav", - "speed": 1 - } - ], - "original_duration": 5.48, - "original_num_samples": 87680, - "transcript": "inside a secret rocket telemetering device was mounted on its test stand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.13, - "num_samples": 50080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.13, - "original_num_samples": 50080, - "transcript": "this isn't part of your testing routine is it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.4, - "num_samples": 70400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.4, - "original_num_samples": 70400, - "transcript": "another engineer rushed toward the door to see what was happening outside" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.985062, - "num_samples": 175761, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0004.wav", - "speed": 1 - } - ], - "original_duration": 10.985062, - "original_num_samples": 175761, - "transcript": "electronic equipment cascaded from the wall shelves and a heavy duty chain hoist came loose from its overhead track plunging to the floor with a terrifying crash" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.445, - "num_samples": 87120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.445, - "original_num_samples": 87120, - "transcript": "an instant later it crashed over pinning mark faber beneath it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.975, - "num_samples": 63600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.975, - "original_num_samples": 63600, - "transcript": "bud threw up his arms to protect himself but too late" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.54, - "num_samples": 56640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.54, - "original_num_samples": 56640, - "transcript": "for minutes no one stirred among the wreckage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.525, - "num_samples": 104400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.525, - "original_num_samples": 104400, - "transcript": "then tom who had been stunned by some falling debris raised himself to a sitting position" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.835, - "num_samples": 93360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.835, - "original_num_samples": 93360, - "transcript": "tom's eyes focused in horror on the wreckage enveloped by still billowing dust" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.955, - "num_samples": 127280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.955, - "original_num_samples": 127280, - "transcript": "the sky was visible through several gaping holes in the roof which was sagging dangerously on its supporting trusses" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.655, - "num_samples": 90480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.655, - "original_num_samples": 90480, - "transcript": "the young inventor had just noticed his friend lying pinned beneath a heavy beam nearby" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.485, - "num_samples": 39760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.485, - "original_num_samples": 39760, - "transcript": "his friend's eyelids flickered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.96, - "num_samples": 79360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.96, - "original_num_samples": 79360, - "transcript": "we'd better not try to move him tom decided we'll get an ambulance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.98, - "num_samples": 79680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.98, - "original_num_samples": 79680, - "transcript": "they picked their way through the wreckage and emerged on a scene of frightful destruction" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.615, - "num_samples": 41840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.615, - "original_num_samples": 41840, - "transcript": "let's see about getting help for mister faber" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.77, - "num_samples": 108320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.77, - "original_num_samples": 108320, - "transcript": "and the only truck we had available was in that burning shed the superintendent added bitterly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.855, - "num_samples": 61680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.855, - "original_num_samples": 61680, - "transcript": "anyhow we want to help got a job for us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.505, - "num_samples": 120080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.505, - "original_num_samples": 120080, - "transcript": "within minutes tom was in charge of clearing away rubble and extricating anyone who might be trapped inside the buildings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.29, - "num_samples": 196640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0019.wav", - "speed": 1 - } - ], - "original_duration": 12.29, - "original_num_samples": 196640, - "transcript": "the telephone line was soon repaired and a steady stream of rescue vehicles began arriving from harkness fire trucks three ambulances and private cars driven by volunteers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.425, - "num_samples": 86800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.425, - "original_num_samples": 86800, - "transcript": "the two girls were as much upset as tom's mother tom laughed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.82, - "num_samples": 109120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0021.wav", - "speed": 1 - } - ], - "original_duration": 6.82, - "original_num_samples": 109120, - "transcript": "mister swift came into the living room just then and told tom how worried missus swift and sandy had been" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.175, - "num_samples": 114800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.175, - "original_num_samples": 114800, - "transcript": "he smiled guiltily as he added but i must admit i was more than a little concerned myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.335, - "num_samples": 37360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.335, - "original_num_samples": 37360, - "transcript": "he's a great scientist" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.46, - "num_samples": 39360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.46, - "original_num_samples": 39360, - "transcript": "tom nodded unhappily" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.66, - "num_samples": 58560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.66, - "original_num_samples": 58560, - "transcript": "male or female human or animal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/137823/251-137823-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "mister swift's eyes twinkled" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.26, - "num_samples": 100160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.26, - "original_num_samples": 100160, - "transcript": "he was young no spear had touched him no poison lurked in his wine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.725, - "num_samples": 59600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.725, - "original_num_samples": 59600, - "transcript": "i tell you it is not poison she cried" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.965, - "num_samples": 111440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.965, - "original_num_samples": 111440, - "transcript": "since his birth he has been guarded so closely that the cleverest poisoners of the east could not reach him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.02, - "num_samples": 176320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0003.wav", - "speed": 1 - } - ], - "original_duration": 11.02, - "original_num_samples": 176320, - "transcript": "as you well know there are ten men and ten women whose sole duty is to taste his food and wine and fifty armed warriors guard his chamber as they guard it now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.325, - "num_samples": 53200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.325, - "original_num_samples": 53200, - "transcript": "a low confused moan waned from his mouth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.31, - "num_samples": 84960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.31, - "original_num_samples": 84960, - "transcript": "the man shrugged his broad shoulders and turned back into the arabesque chamber" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.965, - "num_samples": 111440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.965, - "original_num_samples": 111440, - "transcript": "this man was clad in a brown camel hair robe and sandals and a green turban was on his head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.45, - "num_samples": 87200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.45, - "original_num_samples": 87200, - "transcript": "not until the heavens were in the proper order could they perform this necromancy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.23, - "num_samples": 99680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0008.wav", - "speed": 1 - } - ], - "original_duration": 6.23, - "original_num_samples": 99680, - "transcript": "with a long stained fingernail he mapped the constellations on the marble tiled floor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.955, - "num_samples": 143280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.955, - "original_num_samples": 143280, - "transcript": "the slant of the moon presaged evil for the king of vendhya the stars are in turmoil the serpent in the house of the elephant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.25, - "num_samples": 52000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.25, - "original_num_samples": 52000, - "transcript": "point of contact inquired the other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.47, - "num_samples": 119520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.47, - "original_num_samples": 119520, - "transcript": "all discarded portions of the human body still remain part of it attached to it by intangible connections" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.105, - "num_samples": 177680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0012.wav", - "speed": 1 - } - ], - "original_duration": 11.105, - "original_num_samples": 177680, - "transcript": "but at the urgent entreaty of the princess of khosala who loved bhunda chand vainly he gave her a lock of his long black hair as a token of remembrance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.225, - "num_samples": 131600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.225, - "original_num_samples": 131600, - "transcript": "by which a soul is drawn from its body and across gulfs of echoing space returned the man on the mat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.685, - "num_samples": 106960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0014.wav", - "speed": 1 - } - ], - "original_duration": 6.685, - "original_num_samples": 106960, - "transcript": "on the dais under the golden dome the king cried out again racked by awful paroxysms" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.38, - "num_samples": 86080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.38, - "original_num_samples": 86080, - "transcript": "they seek to snap the silver cord that binds me to my dying body" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.97, - "num_samples": 143520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.97, - "original_num_samples": 143520, - "transcript": "they cluster around me their hands are taloned their eyes are red like flame burning in darkness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.885, - "num_samples": 46160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.885, - "original_num_samples": 46160, - "transcript": "their fingers sear me like fire" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.83, - "num_samples": 45280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.83, - "original_num_samples": 45280, - "transcript": "i know now what brings me to the pyre" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.475, - "num_samples": 183600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0019.wav", - "speed": 1 - } - ], - "original_duration": 11.475, - "original_num_samples": 183600, - "transcript": "there they strove to break the silver cord of life and thrust my soul into the body of a foul night weird their sorcery summoned up from hell ah" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.555, - "num_samples": 88880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.555, - "original_num_samples": 88880, - "transcript": "your cry and the grip of your fingers brought me back but i am going fast" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.98, - "num_samples": 63680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.98, - "original_num_samples": 63680, - "transcript": "there was the old imperious note in his failing whisper" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.54, - "num_samples": 72640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.54, - "original_num_samples": 72640, - "transcript": "you have never disobeyed me obey my last command" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.77, - "num_samples": 44320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/251/118436/251-118436-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.77, - "original_num_samples": 44320, - "transcript": "send my soul clean to asura" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.82, - "num_samples": 61120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.82, - "original_num_samples": 61120, - "transcript": "her father is a most remarkable person to say the least" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.845, - "num_samples": 109520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.845, - "original_num_samples": 109520, - "transcript": "but it is quite plain to me that all the arrangements for my wedding are going to be made by the snellings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.45, - "num_samples": 263200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0002.wav", - "speed": 1 - } - ], - "original_duration": 16.45, - "original_num_samples": 263200, - "transcript": "i do not know when it is going to be but it will be either next week or the week after certainly at the earliest possible moment and i shouldn't be at all surprised to learn that all mary ann's things had been already bought and perhaps some of them marked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.145, - "num_samples": 34320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.145, - "original_num_samples": 34320, - "transcript": "it is most delightful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.47, - "num_samples": 119520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.47, - "original_num_samples": 119520, - "transcript": "it might just as well be some one else's wedding so unimportant is the part which i am set to play in it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.265, - "num_samples": 68240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.265, - "original_num_samples": 68240, - "transcript": "for instance look at their behaviour in the matter of the ring" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.665, - "num_samples": 58640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.665, - "original_num_samples": 58640, - "transcript": "the accident in question occurred upon the sunday evening" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.33, - "num_samples": 53280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.33, - "original_num_samples": 53280, - "transcript": "the girl is fretting but you don't seem to notice it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.615, - "num_samples": 57840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.615, - "original_num_samples": 57840, - "transcript": "i gasped positively gasped" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.12, - "num_samples": 33920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.12, - "original_num_samples": 33920, - "transcript": "that's it on your account" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.63, - "num_samples": 42080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.63, - "original_num_samples": 42080, - "transcript": "from a cousin of ours who's in that line" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.115, - "num_samples": 97840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.115, - "original_num_samples": 97840, - "transcript": "i never saw people like the snellings for possessing relatives in all sorts of lines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.89, - "num_samples": 142240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.89, - "original_num_samples": 142240, - "transcript": "i was persuaded that somebody besides that cousin got a profit out of mary ann's engagement ring but i handed over the amount" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.84, - "num_samples": 77440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.84, - "original_num_samples": 77440, - "transcript": "it is from her action in that matter that my suspicion springs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.17, - "num_samples": 66720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.17, - "original_num_samples": 66720, - "transcript": "there she owns a cottage or it may be a pigstye for all i know" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.985, - "num_samples": 175760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0015.wav", - "speed": 1 - } - ], - "original_duration": 10.985, - "original_num_samples": 175760, - "transcript": "when she heard of my engagement with mary ann she wrote and suggested that we should spend our honeymoon in her cottage or pigstye and that i should pay her rent for it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.64, - "num_samples": 58240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0016.wav", - "speed": 1 - } - ], - "original_duration": 3.64, - "original_num_samples": 58240, - "transcript": "there were no signs of faltering about her flow of language" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.99, - "num_samples": 79840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.99, - "original_num_samples": 79840, - "transcript": "i found that as a woman of business she was beyond all my expectations" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.885, - "num_samples": 94160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.885, - "original_num_samples": 94160, - "transcript": "it turned out that she had a little money of her own about a hundred and thirty pounds a year" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.075, - "num_samples": 81200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.075, - "original_num_samples": 81200, - "transcript": "and of course i had my expectations and she had hers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.525, - "num_samples": 88400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.525, - "original_num_samples": 88400, - "transcript": "it was plain that together we should manage most comfortably delightfully in fact" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.875, - "num_samples": 62000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.875, - "original_num_samples": 62000, - "transcript": "i shall make papa give me five hundred pounds at least" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.525, - "num_samples": 88400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0022.wav", - "speed": 1 - } - ], - "original_duration": 5.525, - "original_num_samples": 88400, - "transcript": "a bird in the hand is worth two in a bush' and it will be something to have by us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.845, - "num_samples": 61520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0023.wav", - "speed": 1 - } - ], - "original_duration": 3.845, - "original_num_samples": 61520, - "transcript": "i know what mamma can afford to give and i will see she gives it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.94, - "num_samples": 79040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.94, - "original_num_samples": 79040, - "transcript": "and i will see that there is no shirking about the boys or about the girls either" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.845, - "num_samples": 157520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0025.wav", - "speed": 1 - } - ], - "original_duration": 9.845, - "original_num_samples": 157520, - "transcript": "i have drawn up a list of all the people who ought to give us a present and i shall tell them what they ought to give it won't be my fault if i don't get it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.205, - "num_samples": 163280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0026.wav", - "speed": 1 - } - ], - "original_duration": 10.205, - "original_num_samples": 163280, - "transcript": "of course there are some people with whom you can't be perfectly plain but i shall be as plain as i can there's a way and a manner of doing that kind of thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.32, - "num_samples": 37120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.32, - "original_num_samples": 37120, - "transcript": "hers has been prodigious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.25, - "num_samples": 148000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0028.wav", - "speed": 1 - } - ], - "original_duration": 9.25, - "original_num_samples": 148000, - "transcript": "she has a knack of getting people to do what she wishes and to give her what she wants which is a little short of miraculous" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.83, - "num_samples": 93280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0029.wav", - "speed": 1 - } - ], - "original_duration": 5.83, - "original_num_samples": 93280, - "transcript": "i notice that they are generally persons who have already tendered their offerings" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.21, - "num_samples": 147360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0030.wav", - "speed": 1 - } - ], - "original_duration": 9.21, - "original_num_samples": 147360, - "transcript": "the fact of having given mary ann a wedding present seems to fill them with a feeling of rancorous acidity which to me is inexplicable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.705, - "num_samples": 43280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0031.wav", - "speed": 1 - } - ], - "original_duration": 2.705, - "original_num_samples": 43280, - "transcript": "such is the selfishness of human nature" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.5, - "num_samples": 104000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0032.wav", - "speed": 1 - } - ], - "original_duration": 6.5, - "original_num_samples": 104000, - "transcript": "but why on that account they should pity me i altogether fail to understand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.82, - "num_samples": 125120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0033.wav", - "speed": 1 - } - ], - "original_duration": 7.82, - "original_num_samples": 125120, - "transcript": "we have all been giving mary ann presents and i suppose you mister whiting have been giving her something too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.45, - "num_samples": 87200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0034.wav", - "speed": 1 - } - ], - "original_duration": 5.45, - "original_num_samples": 87200, - "transcript": "that was what missus macpherson said to me only the other day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.865, - "num_samples": 93840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0035.wav", - "speed": 1 - } - ], - "original_duration": 5.865, - "original_num_samples": 93840, - "transcript": "and what inquired missus macpherson has mary ann given you her love" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.96, - "num_samples": 31360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0036.wav", - "speed": 1 - } - ], - "original_duration": 1.96, - "original_num_samples": 31360, - "transcript": "someone sniggered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.365, - "num_samples": 149840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0037.wav", - "speed": 1 - } - ], - "original_duration": 9.365, - "original_num_samples": 149840, - "transcript": "i cannot pretend to explain why except on the supposition that romance is dead at least in that circle of society in which the snellings move" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.305, - "num_samples": 116880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0038.wav", - "speed": 1 - } - ], - "original_duration": 7.305, - "original_num_samples": 116880, - "transcript": "as it is unless i am mistaken some of the rending will be on our side and they know it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.2, - "num_samples": 51200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0039.wav", - "speed": 1 - } - ], - "original_duration": 3.2, - "original_num_samples": 51200, - "transcript": "p s the cards are out for the wedding" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.365, - "num_samples": 69840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0040.wav", - "speed": 1 - } - ], - "original_duration": 4.365, - "original_num_samples": 69840, - "transcript": "we are going for our honeymoon to italy and the south of france" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.96, - "num_samples": 63360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0041.wav", - "speed": 1 - } - ], - "original_duration": 3.96, - "original_num_samples": 63360, - "transcript": "a second cousin of mary ann's is in the cook's tours line" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.175, - "num_samples": 178800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0042.wav", - "speed": 1 - } - ], - "original_duration": 11.175, - "original_num_samples": 178800, - "transcript": "he has given us free passes all the way to the end of our journey and all the way back again and coupons for free board and lodging at the hotel it's a wedding present" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.01, - "num_samples": 96160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83705/2428-83705-0043.wav", - "speed": 1 - } - ], - "original_duration": 6.01, - "original_num_samples": 96160, - "transcript": "besides which we can always sell the coupons and railway passes which we don't use" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.305, - "num_samples": 212880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0000.wav", - "speed": 1 - } - ], - "original_duration": 13.305, - "original_num_samples": 212880, - "transcript": "i imagine there were several kinds of old fashioned christmases but it could hardly be worse than a chop in my chambers or horror of horrors at the club or my cousin lucy's notion of what she calls the festive season" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.07, - "num_samples": 33120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.07, - "original_num_samples": 33120, - "transcript": "festive yes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.26, - "num_samples": 68160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.26, - "original_num_samples": 68160, - "transcript": "the reply was written in a sprawling feminine hand it was a little vague" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.265, - "num_samples": 100240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.265, - "original_num_samples": 100240, - "transcript": "it appeared that the terms would be five guineas but there was no mention of the length of time which that fee would cover" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.88, - "num_samples": 30080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0004.wav", - "speed": 1 - } - ], - "original_duration": 1.88, - "original_num_samples": 30080, - "transcript": "the whole thing was a trifle odd" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.09, - "num_samples": 145440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.09, - "original_num_samples": 145440, - "transcript": "there was nothing said about the sort of accommodation which would be provided nothing about the kind of establishment which was maintained or the table which was kept" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.825, - "num_samples": 109200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0006.wav", - "speed": 1 - } - ], - "original_duration": 6.825, - "original_num_samples": 109200, - "transcript": "now it is a remarkable thing that i have always had an extraordinary predilection for the name madge i do not know why" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.71, - "num_samples": 91360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.71, - "original_num_samples": 91360, - "transcript": "i have never known a madge and yet from my boyhood upward i have desired to meet one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.38, - "num_samples": 166080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.38, - "original_num_samples": 166080, - "transcript": "under such circumstances she was hardly likely to be lively herself but her name was madge and it was the accident of her christian name which decided me to go" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.85, - "num_samples": 29600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0009.wav", - "speed": 1 - } - ], - "original_duration": 1.85, - "original_num_samples": 29600, - "transcript": "i had no illusions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.12, - "num_samples": 49920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.12, - "original_num_samples": 49920, - "transcript": "i did not expect a princely entertainment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.575, - "num_samples": 41200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.575, - "original_num_samples": 41200, - "transcript": "all night it had been blowing and raining" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.125, - "num_samples": 130000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.125, - "original_num_samples": 130000, - "transcript": "i felt quite lively myself as i mingled with the christmas crowd looking for things which might not turn out to be absolutely preposterous" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.29, - "num_samples": 52640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.29, - "original_num_samples": 52640, - "transcript": "i even bought something for madge i mean missus wilson" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.215, - "num_samples": 35440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.215, - "original_num_samples": 35440, - "transcript": "it was a horrible journey" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.195, - "num_samples": 35120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.195, - "original_num_samples": 35120, - "transcript": "he was impervious to reason" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.515, - "num_samples": 136240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.515, - "original_num_samples": 136240, - "transcript": "it is some satisfaction for me to be able to reflect that i made it warm for the officials however cold i might have been myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.285, - "num_samples": 212560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0017.wav", - "speed": 1 - } - ], - "original_duration": 13.285, - "original_num_samples": 212560, - "transcript": "when at last i reached crofton my journey's end it turned out that the station staff consisted of a half witted individual who was stationmaster porter and clerk combined and a hulking lad who did whatever else there was to do" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.345, - "num_samples": 229520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0018.wav", - "speed": 1 - } - ], - "original_duration": 14.345, - "original_num_samples": 229520, - "transcript": "no one had come to meet me the village was about half a mile and hangar dene the house for which my steps were bent about four miles by the road how far it was across ploughed fields my informant did not mention" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.91, - "num_samples": 78560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0019.wav", - "speed": 1 - } - ], - "original_duration": 4.91, - "original_num_samples": 78560, - "transcript": "there was a trap at the boy and blunderbuss but that required fetching" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.89, - "num_samples": 78240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.89, - "original_num_samples": 78240, - "transcript": "when the trap did appear it looked to me uncommonly like an open spring cart" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.815, - "num_samples": 45040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.815, - "original_num_samples": 45040, - "transcript": "in it i was deposited with my luggage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.975, - "num_samples": 31600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0022.wav", - "speed": 1 - } - ], - "original_duration": 1.975, - "original_num_samples": 31600, - "transcript": "i did not know what he meant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.6, - "num_samples": 41600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.6, - "original_num_samples": 41600, - "transcript": "i did not ask i was beyond it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.25, - "num_samples": 68000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.25, - "original_num_samples": 68000, - "transcript": "i was chilled to the bone wet tired hungry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.005, - "num_samples": 128080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0025.wav", - "speed": 1 - } - ], - "original_duration": 8.005, - "original_num_samples": 128080, - "transcript": "i had long been wishing that an old fashioned christmas had been completely extinct before i had thought of adventuring in quest of one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.14, - "num_samples": 50240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0026.wav", - "speed": 1 - } - ], - "original_duration": 3.14, - "original_num_samples": 50240, - "transcript": "here we be that might be so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.965, - "num_samples": 79440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.965, - "original_num_samples": 79440, - "transcript": "there be the door in front of you you go up three steps if you can find em" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.0, - "num_samples": 64000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0028.wav", - "speed": 1 - } - ], - "original_duration": 4.0, - "original_num_samples": 64000, - "transcript": "there's a knocker if none of em haven't twisted it off" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.295, - "num_samples": 100720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0029.wav", - "speed": 1 - } - ], - "original_duration": 6.295, - "original_num_samples": 100720, - "transcript": "there appeared to be no knocker though whether it had been twisted off was more than i could say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.625, - "num_samples": 58000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.625, - "original_num_samples": 58000, - "transcript": "no answer though i allowed a more than decent interval" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.31, - "num_samples": 52960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0031.wav", - "speed": 1 - } - ], - "original_duration": 3.31, - "original_num_samples": 52960, - "transcript": "better ring again suggested the driver hard" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.175, - "num_samples": 50800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.175, - "original_num_samples": 50800, - "transcript": "maybe they're up to some of their games and wants rousing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.92, - "num_samples": 62720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0033.wav", - "speed": 1 - } - ], - "original_duration": 3.92, - "original_num_samples": 62720, - "transcript": "the bell reverberated through what seemed like an empty house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.12, - "num_samples": 129920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0034.wav", - "speed": 1 - } - ], - "original_duration": 8.12, - "original_num_samples": 129920, - "transcript": "presently feet were heard advancing along the passage several pairs it seemed and a light gleamed through the window over the door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.725, - "num_samples": 43600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0035.wav", - "speed": 1 - } - ], - "original_duration": 2.725, - "original_num_samples": 43600, - "transcript": "a voice inquired who's there" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.6, - "num_samples": 73600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0036.wav", - "speed": 1 - } - ], - "original_duration": 4.6, - "original_num_samples": 73600, - "transcript": "the information was greeted with what sounded uncommonly like a chorus of laughter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.395, - "num_samples": 102320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0037.wav", - "speed": 1 - } - ], - "original_duration": 6.395, - "original_num_samples": 102320, - "transcript": "there was a rush of retreating feet an expostulating voice then darkness again and silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.67, - "num_samples": 42720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0038.wav", - "speed": 1 - } - ], - "original_duration": 2.67, - "original_num_samples": 42720, - "transcript": "who lives here are the people mad" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.98, - "num_samples": 31680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0039.wav", - "speed": 1 - } - ], - "original_duration": 1.98, - "original_num_samples": 31680, - "transcript": "i tolled the bell again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.825, - "num_samples": 125200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0040.wav", - "speed": 1 - } - ], - "original_duration": 7.825, - "original_num_samples": 125200, - "transcript": "after a vast amount of unfastening the door was opened and on the threshold there stood a girl with a lighted candle in her hand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.07, - "num_samples": 33120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0041.wav", - "speed": 1 - } - ], - "original_duration": 2.07, - "original_num_samples": 33120, - "transcript": "i'm mister christopher from london" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.43, - "num_samples": 102880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2428/83699/2428-83699-0042.wav", - "speed": 1 - } - ], - "original_duration": 6.43, - "original_num_samples": 102880, - "transcript": "we've lost the key of the cellar and there's nothing out except water and i don't think you'd care for that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 27.99, - "num_samples": 447840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0000.wav", - "speed": 1 - } - ], - "original_duration": 27.99, - "original_num_samples": 447840, - "transcript": "with an education which ought to have ensured me an honourable standing in the world with some intelligence wit good literary and scientific knowledge and endowed with those accidental physical qualities which are such a good passport into society i found myself at the age of twenty the mean follower of a sublime art in which if great talent is rightly admired mediocrity is as rightly despised" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 21.17, - "num_samples": 338720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0001.wav", - "speed": 1 - } - ], - "original_duration": 21.17, - "original_num_samples": 338720, - "transcript": "i was compelled by poverty to become a member of a musical band in which i could expect neither esteem nor consideration and i was well aware that i should be the laughing stock of the persons who had known me as a doctor in divinity as an ecclesiastic and as an officer in the army and had welcomed me in the highest society" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.755, - "num_samples": 316080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0002.wav", - "speed": 1 - } - ], - "original_duration": 19.755, - "original_num_samples": 316080, - "transcript": "i felt that in my first profession as i was not blessed with the vocation necessary to it i should have succeeded only by dint of hypocrisy and i should have been despicable in my own estimation even if i had seen the purple mantle on my shoulders for the greatest dignities cannot silence a man's own conscience" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.35, - "num_samples": 309600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0003.wav", - "speed": 1 - } - ], - "original_duration": 19.35, - "original_num_samples": 309600, - "transcript": "besides i was of opinion that a man's profession whatever it might be ought to supply him with enough money to satisfy all his wants and the very poor pay of an officer would never have been sufficient to cover my expenses because my education had given me greater wants than those of officers in general" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.165, - "num_samples": 66640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.165, - "original_num_samples": 66640, - "transcript": "our scandalous proceedings often exposed us to the greatest danger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.72, - "num_samples": 139520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.72, - "original_num_samples": 139520, - "transcript": "we would very often spend the whole night rambling about the city inventing and carrying into execution the most impertinent practical jokes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.145, - "num_samples": 114320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.145, - "original_num_samples": 114320, - "transcript": "we did the same with physicians whom we often sent half dressed to some nobleman who was enjoying excellent health" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.855, - "num_samples": 285680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0007.wav", - "speed": 1 - } - ], - "original_duration": 17.855, - "original_num_samples": 285680, - "transcript": "whenever we could contrive to get into a church tower we thought it great fun to frighten all the parish by ringing the alarm bell as if some fire had broken out but that was not all we always cut the bell ropes so that in the morning the churchwardens had no means of summoning the faithful to early mass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.83, - "num_samples": 61280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.83, - "original_num_samples": 61280, - "transcript": "this is the amusing adventure which closed our exploits" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.62, - "num_samples": 121920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.62, - "original_num_samples": 121920, - "transcript": "in every one of the seventy two parishes of the city of venice there is a large public house called magazzino" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.01, - "num_samples": 160160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.01, - "original_num_samples": 160160, - "transcript": "yet there are a few private rooms which contain a table surrounded with benches in which a respectable family or a few friends can enjoy themselves in a decent way" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.68, - "num_samples": 138880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.68, - "original_num_samples": 138880, - "transcript": "the waiter of the magazzino came to be paid and our chief gave him what was due enjoining silence under penalty of death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.985, - "num_samples": 47760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.985, - "original_num_samples": 47760, - "transcript": "we took our three prisoners to a large boat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.47, - "num_samples": 39520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.47, - "original_num_samples": 39520, - "transcript": "where is my husband" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.52, - "num_samples": 56320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.52, - "original_num_samples": 56320, - "transcript": "never fear you shall see him again to morrow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.21, - "num_samples": 163360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0015.wav", - "speed": 1 - } - ], - "original_duration": 10.21, - "original_num_samples": 163360, - "transcript": "my readers may imagine whether we felt inclined to laugh when the charming creature bade us good night thanking us all with perfect good faith" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.625, - "num_samples": 74000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.625, - "original_num_samples": 74000, - "transcript": "two days afterwards our nocturnal orgy began to be talked of" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.985, - "num_samples": 223760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0017.wav", - "speed": 1 - } - ], - "original_duration": 13.985, - "original_num_samples": 223760, - "transcript": "it went on to say that the two men who had carried her off had taken her to such a place where they had an hour later been met by the other six and that they had all repaired to the two swords where they had spent an hour in drinking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.51, - "num_samples": 136160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0018.wav", - "speed": 1 - } - ], - "original_duration": 8.51, - "original_num_samples": 136160, - "transcript": "there was no cowardly traitor amongst us although we were all poor but fear had its effect and our nocturnal pranks were not renewed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.33, - "num_samples": 85280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.33, - "original_num_samples": 85280, - "transcript": "i picked it up and coming up to him just as he was going down the steps i handed it to him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.33, - "num_samples": 117280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0020.wav", - "speed": 1 - } - ], - "original_duration": 7.33, - "original_num_samples": 117280, - "transcript": "i told him and he insisted upon my coming with him in the gondola saying that he would leave me at my house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.945, - "num_samples": 159120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0021.wav", - "speed": 1 - } - ], - "original_duration": 9.945, - "original_num_samples": 159120, - "transcript": "i rubbed it with all my strength but he told me in a sort of indistinct whisper that the numbness was spreading all along the left side and that he was dying" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.02, - "num_samples": 224320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0022.wav", - "speed": 1 - } - ], - "original_duration": 14.02, - "original_num_samples": 224320, - "transcript": "i jumped out of the gondola and found myself on the very spot where three years before i had taught razetta such a forcible lesson i enquired for a surgeon at the first coffee house and ran to the house that was pointed out to me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.29, - "num_samples": 196640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0023.wav", - "speed": 1 - } - ], - "original_duration": 12.29, - "original_num_samples": 196640, - "transcript": "taking everything upon myself i ordered a servant to hurry out for a physician who came in a short time and ordered the patient to be bled again thus approving the first bleeding prescribed by me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.76, - "num_samples": 108160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.76, - "original_num_samples": 108160, - "transcript": "thinking i had a right to watch the sick man i settled myself near his bed to give him every care he required" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.445, - "num_samples": 119120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0025.wav", - "speed": 1 - } - ], - "original_duration": 7.445, - "original_num_samples": 119120, - "transcript": "they did not know who i was and did not like to ask me whilst i thought it better to preserve a modest silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.07, - "num_samples": 145120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0026.wav", - "speed": 1 - } - ], - "original_duration": 9.07, - "original_num_samples": 145120, - "transcript": "he had gambled and lost a great deal and his brother was his most bitter enemy because he was infatuated with the idea that he had tried to poison him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.19, - "num_samples": 211040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0027.wav", - "speed": 1 - } - ], - "original_duration": 13.19, - "original_num_samples": 211040, - "transcript": "the physician who attended him was named terro he thought by some peculiar train of reasoning that he could cure him by applying a mercurial ointment to the chest to which no one raised any objection" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.9, - "num_samples": 62400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.9, - "original_num_samples": 62400, - "transcript": "delighted with such a fortunate result we lay down again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.905, - "num_samples": 46480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0029.wav", - "speed": 1 - } - ], - "original_duration": 2.905, - "original_num_samples": 46480, - "transcript": "he entreated me to tell him the truth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.375, - "num_samples": 102000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0030.wav", - "speed": 1 - } - ], - "original_duration": 6.375, - "original_num_samples": 102000, - "transcript": "what extraordinary things will sometimes occur from mere chance or from the force of circumstances" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.145, - "num_samples": 274320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0031.wav", - "speed": 1 - } - ], - "original_duration": 17.145, - "original_num_samples": 274320, - "transcript": "unwilling to hurt his vanity by telling him that he was mistaken i took the wild resolution of informing him in the presence of his two friends that i possessed a certain numeral calculus which gave answers also in numbers to any questions i liked to put" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.77, - "num_samples": 60320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.77, - "original_num_samples": 60320, - "transcript": "i obeyed implicitly and met your excellency" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.465, - "num_samples": 39440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0033.wav", - "speed": 1 - } - ], - "original_duration": 2.465, - "original_num_samples": 39440, - "transcript": "the three friends were astounded" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.865, - "num_samples": 125840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0034.wav", - "speed": 1 - } - ], - "original_duration": 7.865, - "original_num_samples": 125840, - "transcript": "i declared myself quite willing for it was necessary to brazen it out after having ventured as far as i had done" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.9, - "num_samples": 158400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0035.wav", - "speed": 1 - } - ], - "original_duration": 9.9, - "original_num_samples": 158400, - "transcript": "he wrote the question and gave it to me i read it i could not understand either the subject or the meaning of the words but it did not matter i had to give an answer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.82, - "num_samples": 109120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0036.wav", - "speed": 1 - } - ], - "original_duration": 6.82, - "original_num_samples": 109120, - "transcript": "if the question was so obscure that i could not make out the sense of it it was natural that i should not understand the answer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.735, - "num_samples": 91760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0037.wav", - "speed": 1 - } - ], - "original_duration": 5.735, - "original_num_samples": 91760, - "transcript": "they all asked me how long i would require to teach them the rules of my sublime calculus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.955, - "num_samples": 223280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0038.wav", - "speed": 1 - } - ], - "original_duration": 13.955, - "original_num_samples": 223280, - "transcript": "not very long i answered and i will teach you as you wish although the hermit assured me that i would die suddenly within three days if i communicated my science to anyone but i have no faith whatever in that prediction" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.31, - "num_samples": 324960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0039.wav", - "speed": 1 - } - ], - "original_duration": 20.31, - "original_num_samples": 324960, - "transcript": "they believed that through me they possessed the philosopher's stone the universal panacea the intercourse with all the elementary heavenly and infernal spirits they had no doubt whatever that thanks to my sublime science they could find out the secrets of every government in europe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.255, - "num_samples": 212080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0040.wav", - "speed": 1 - } - ], - "original_duration": 13.255, - "original_num_samples": 212080, - "transcript": "but although believing fully in my oracles they were too kind hearted to think them the work of the devil and it suited their natural goodness better to believe my answers inspired by some heavenly spirit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.025, - "num_samples": 112400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0041.wav", - "speed": 1 - } - ], - "original_duration": 7.025, - "original_num_samples": 112400, - "transcript": "they were not only good christians and faithful to the church but even real devotees and full of scruples" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.155, - "num_samples": 178480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0042.wav", - "speed": 1 - } - ], - "original_duration": 11.155, - "original_num_samples": 178480, - "transcript": "as for the eucharist transubstantiation the real presence it was all no mystery to them but palpable evidence and yet they were not jesuits" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.615, - "num_samples": 169840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0043.wav", - "speed": 1 - } - ], - "original_duration": 10.615, - "original_num_samples": 169840, - "transcript": "i might be told that if i had wished to follow the rules of pure morality i ought either to have declined intimate intercourse with them or to have undeceived them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.705, - "num_samples": 267280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0044.wav", - "speed": 1 - } - ], - "original_duration": 16.705, - "original_num_samples": 267280, - "transcript": "besides i found it very flattering to my vanity to become the subject of the speculative chattering of empty fools who having nothing else to do are always trying to find out the cause of every moral phenomenon they meet with which their narrow intellect cannot understand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.675, - "num_samples": 74800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0045.wav", - "speed": 1 - } - ], - "original_duration": 4.675, - "original_num_samples": 74800, - "transcript": "whoever you may be i am indebted to you for my life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.25, - "num_samples": 148000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0046.wav", - "speed": 1 - } - ], - "original_duration": 9.25, - "original_num_samples": 148000, - "transcript": "your apartment is ready you may send your clothes you shall have a servant a gondola at your orders my own table and ten sequins a month" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.755, - "num_samples": 268080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0047.wav", - "speed": 1 - } - ], - "original_duration": 16.755, - "original_num_samples": 268080, - "transcript": "you need not think of the future think only of enjoying yourself and take me as your adviser in everything that may happen to you in everything you may wish to undertake and you may be certain of always finding me your friend" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.245, - "num_samples": 115920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3170/137482/3170-137482-0048.wav", - "speed": 1 - } - ], - "original_duration": 7.245, - "original_num_samples": 115920, - "transcript": "i threw myself at his feet to assure him of my gratitude and embraced him calling him my father" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.13, - "num_samples": 50080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.13, - "original_num_samples": 50080, - "transcript": "shasta rambles and modoc memories" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.86, - "num_samples": 333760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0001.wav", - "speed": 1 - } - ], - "original_duration": 20.86, - "original_num_samples": 333760, - "transcript": "arctic beauty and desolation with their blessings and dangers all may be found here to test the endurance and skill of adventurous climbers but far better than climbing the mountain is going around its warm fertile base enjoying its bounties like a bee circling around a bank of flowers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.17, - "num_samples": 114720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.17, - "original_num_samples": 114720, - "transcript": "perhaps the profession of doing good may be full but every body should be kind at least to himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.67, - "num_samples": 74720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.67, - "original_num_samples": 74720, - "transcript": "go quietly alone no harm will befall you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.885, - "num_samples": 46160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.885, - "original_num_samples": 46160, - "transcript": "but it is far better to go afoot" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.46, - "num_samples": 135360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.46, - "original_num_samples": 135360, - "transcript": "one blanket will be enough to carry or you may forego the pleasure and burden altogether as wood for fires is everywhere abundant" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.55, - "num_samples": 40800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.55, - "original_num_samples": 40800, - "transcript": "only a little food will be required" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.965, - "num_samples": 111440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.965, - "original_num_samples": 111440, - "transcript": "thus one saunters on and on in the glorious radiance in utter peace and forgetfulness of time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.985, - "num_samples": 207760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0008.wav", - "speed": 1 - } - ], - "original_duration": 12.985, - "original_num_samples": 207760, - "transcript": "yet strange to say there are days even here somewhat dull looking when the mountain seems uncommunicative sending out no appreciable invitation as if not at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.975, - "num_samples": 127600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.975, - "original_num_samples": 127600, - "transcript": "at such time its height seems much less as if crouching and weary it were taking rest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.545, - "num_samples": 248720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0010.wav", - "speed": 1 - } - ], - "original_duration": 15.545, - "original_num_samples": 248720, - "transcript": "every crystal dances responsive to the touches of the sun and currents of sap in the growing cells of all the vegetation are ever in a vital whirl and rush and though many feet and wings are folded how many are astir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.73, - "num_samples": 171680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.73, - "original_num_samples": 171680, - "transcript": "slight rainstorms are likely to be encountered in a trip round the mountain but one may easily find shelter beneath well thatched trees that shed the rain like a roof" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.09, - "num_samples": 193440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0012.wav", - "speed": 1 - } - ], - "original_duration": 12.09, - "original_num_samples": 193440, - "transcript": "then the shining of the wet leaves is delightful and the steamy fragrance and the burst of bird song from a multitude of thrushes and finches and warblers that have nests in the chaparral" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.12, - "num_samples": 145920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0013.wav", - "speed": 1 - } - ], - "original_duration": 9.12, - "original_num_samples": 145920, - "transcript": "a thousand thousand voices are heard but so finely blended they seem a part of the night itself and make a deeper silence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.205, - "num_samples": 115280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0014.wav", - "speed": 1 - } - ], - "original_duration": 7.205, - "original_num_samples": 115280, - "transcript": "in setting out from strawberry valley by bearing off to the northwestward a few miles you may see" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.26, - "num_samples": 212160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0015.wav", - "speed": 1 - } - ], - "original_duration": 13.26, - "original_num_samples": 212160, - "transcript": "in approaching it its suspicious looking yellow spotted hood and watchful attitude will be likely to make you go cautiously through the bog where it stands as if you were approaching a dangerous snake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.75, - "num_samples": 140000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.75, - "original_num_samples": 140000, - "transcript": "it is lined with emerald algae and mosses and shaded with alder willow and thorn bushes which give it a fine setting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.085, - "num_samples": 225360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0017.wav", - "speed": 1 - } - ], - "original_duration": 14.085, - "original_num_samples": 225360, - "transcript": "it is three or four miles long and terminates at an elevation of about nine thousand five hundred feet above sea level in moraine sprinkled ice cliffs sixty feet high" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.65, - "num_samples": 90400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.65, - "original_num_samples": 90400, - "transcript": "the long gray slopes leading up to the glacier seem remarkably smooth and unbroken" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.62, - "num_samples": 281920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0019.wav", - "speed": 1 - } - ], - "original_duration": 17.62, - "original_num_samples": 281920, - "transcript": "most of the drainage of the glacier vanishes at once in the porous rocks to reappear in springs in the distant valley and it is only in time of flood that the channel carries much water then there are several fine falls in the gorge six hundred feet or more in height" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.02, - "num_samples": 256320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0020.wav", - "speed": 1 - } - ], - "original_duration": 16.02, - "original_num_samples": 256320, - "transcript": "tracing this wild changing channel gorge gully or canyon the sections will show mount shasta as a huge palimpsest containing the records layer upon layer of strangely contrasted events in its fiery icy history" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.765, - "num_samples": 204240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0021.wav", - "speed": 1 - } - ], - "original_duration": 12.765, - "original_num_samples": 204240, - "transcript": "regaining the low ground at the base of the mountain and holding on in your grand orbit you pass through a belt of juniper woods called the cedars to sheep rock at the foot of the shasta pass" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.47, - "num_samples": 103520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.47, - "original_num_samples": 103520, - "transcript": "here you strike the old emigrant road which leads over the low divide to the eastern slopes of the mountain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.155062, - "num_samples": 82481, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0023.wav", - "speed": 1 - } - ], - "original_duration": 5.155062, - "original_num_samples": 82481, - "transcript": "mount bremer is the most noted stronghold of the sheep in the whole shasta region" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.15, - "num_samples": 226400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0024.wav", - "speed": 1 - } - ], - "original_duration": 14.15, - "original_num_samples": 226400, - "transcript": "large flocks dwell here from year to year winter and summer descending occasionally into the adjacent sage plains and lava beds to feed but ever ready to take refuge in the jagged crags of their mountain at every alarm" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.97, - "num_samples": 79520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.97, - "original_num_samples": 79520, - "transcript": "while traveling with a company of hunters i saw about fifty in one flock" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.54, - "num_samples": 40640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0026.wav", - "speed": 1 - } - ], - "original_duration": 2.54, - "original_num_samples": 40640, - "transcript": "the mule deer are nearly as heavy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.045, - "num_samples": 64720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.045, - "original_num_samples": 64720, - "transcript": "their long massive ears give them a very striking appearance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.48, - "num_samples": 103680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0028.wav", - "speed": 1 - } - ], - "original_duration": 6.48, - "original_num_samples": 103680, - "transcript": "but neither the glorified woods on the one hand nor the lake on the other could at first hold the eye" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.73, - "num_samples": 91680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0029.wav", - "speed": 1 - } - ], - "original_duration": 5.73, - "original_num_samples": 91680, - "transcript": "then fell the gloaming making everything still more forbidding and mysterious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.07, - "num_samples": 49120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.07, - "original_num_samples": 49120, - "transcript": "then darkness like death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.02, - "num_samples": 160320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0031.wav", - "speed": 1 - } - ], - "original_duration": 10.02, - "original_num_samples": 160320, - "transcript": "two or three miles farther on is the main stronghold of the modocs held by them so long and defiantly against all the soldiers that could be brought to the attack" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.9, - "num_samples": 174400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0032.wav", - "speed": 1 - } - ], - "original_duration": 10.9, - "original_num_samples": 174400, - "transcript": "the ducks less wary kept their places merely swimming in and out through openings in the rushes rippling the glassy water and raising spangles in their wake" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.37, - "num_samples": 357920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0033.wav", - "speed": 1 - } - ], - "original_duration": 22.37, - "original_num_samples": 357920, - "transcript": "they are broad rugged crevassed cloudlike masses of down grinding ice pouring forth streams of muddy water as measures of the work they are doing in sculpturing the rocks beneath them very unlike the long majestic glaciers of alaska that riverlike go winding down the valleys through the forests to the sea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.865, - "num_samples": 253840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0034.wav", - "speed": 1 - } - ], - "original_duration": 15.865, - "original_num_samples": 253840, - "transcript": "thus the shasta river issues from a large lake like spring in shasta valley and about two thirds of the volume of the mc cloud gushes forth in a grand spring on the east side of the mountain a few miles back from its immediate base" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.685, - "num_samples": 314960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0035.wav", - "speed": 1 - } - ], - "original_duration": 19.685, - "original_num_samples": 314960, - "transcript": "should the volume of the stream where you strike it seem small then you will know that you are above the spring if large nearly equal to its volume at its confluence with the pitt river then you are below it and in either case have only to follow the river up or down until you come to it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.225, - "num_samples": 195600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0036.wav", - "speed": 1 - } - ], - "original_duration": 12.225, - "original_num_samples": 195600, - "transcript": "under certain conditions you may hear the roar of the water rushing from the rock at a distance of half a mile or even more or you may not hear it until within a few rods" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.07, - "num_samples": 161120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0037.wav", - "speed": 1 - } - ], - "original_duration": 10.07, - "original_num_samples": 161120, - "transcript": "the vivid green of the boulders beneath the water is very striking and colors the entire stream with the exception of the portions broken into foam" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.78, - "num_samples": 252480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0038.wav", - "speed": 1 - } - ], - "original_duration": 15.78, - "original_num_samples": 252480, - "transcript": "asplenium epilobium heuchera hazel dogwood and alder make a luxurious fringe and setting and the forests of douglas spruce along the banks are the finest i have ever seen in the sierra" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.755, - "num_samples": 76080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0039.wav", - "speed": 1 - } - ], - "original_duration": 4.755, - "original_num_samples": 76080, - "transcript": "tracing rivers to their fountains makes the most charming of travels" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.265, - "num_samples": 164240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0040.wav", - "speed": 1 - } - ], - "original_duration": 10.265, - "original_num_samples": 164240, - "transcript": "as the life blood of the landscapes the best of the wilderness comes to their banks and not one dull passage is found in all their eventful histories" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 28.57, - "num_samples": 457120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0041.wav", - "speed": 1 - } - ], - "original_duration": 28.57, - "original_num_samples": 457120, - "transcript": "tracing the mc cloud to its highest springs and over the divide to the fountains of fall river near fort crook thence down that river to its confluence with the pitt on from there to the volcanic region about lassen's butte through the big meadows among the sources of the feather river and down through forests of sugar pine to the fertile plains of chico this is a glorious saunter and imposes no hardship" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.41, - "num_samples": 102560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0042.wav", - "speed": 1 - } - ], - "original_duration": 6.41, - "original_num_samples": 102560, - "transcript": "the ascent of lassen's butte is an easy walk and the views from the summit are extremely telling" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.92, - "num_samples": 158720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0043.wav", - "speed": 1 - } - ], - "original_duration": 9.92, - "original_num_samples": 158720, - "transcript": "the lofty icy shasta towering high above all seems but an hour's walk from you though the distance in an air line is about sixty miles" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.935, - "num_samples": 222960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0044.wav", - "speed": 1 - } - ], - "original_duration": 13.935, - "original_num_samples": 222960, - "transcript": "the big meadows lie near the foot of lassen's butte a beautiful spacious basin set in the heart of the richly forested mountains scarcely surpassed in the grandeur of its surroundings by tahoe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.84, - "num_samples": 205440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0045.wav", - "speed": 1 - } - ], - "original_duration": 12.84, - "original_num_samples": 205440, - "transcript": "the great wilds of our country once held to be boundless and inexhaustible are being rapidly invaded and overrun in every direction and everything destructible in them is being destroyed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.36, - "num_samples": 85760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3000/15664/3000-15664-0046.wav", - "speed": 1 - } - ], - "original_duration": 5.36, - "original_num_samples": 85760, - "transcript": "every landscape low and high seems doomed to be trampled and harried" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.26, - "num_samples": 36160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.26, - "original_num_samples": 36160, - "transcript": "it was established at southwark" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.2, - "num_samples": 51200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.2, - "original_num_samples": 51200, - "transcript": "the dome of saint paul's was a delight to ursus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.925, - "num_samples": 62800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.925, - "original_num_samples": 62800, - "transcript": "saint paul is a saint only with extenuating circumstances" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.145, - "num_samples": 50320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.145, - "original_num_samples": 50320, - "transcript": "he entered heaven only by the artists door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.59, - "num_samples": 41440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0004.wav", - "speed": 1 - } - ], - "original_duration": 2.59, - "original_num_samples": 41440, - "transcript": "it might have been ordered for the green box" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.24, - "num_samples": 35840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.24, - "original_num_samples": 35840, - "transcript": "it was a theatre ready made" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.435, - "num_samples": 118960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.435, - "original_num_samples": 118960, - "transcript": "against this wall was placed the green box which they were able to draw into the yard owing to the height of the gate" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.195, - "num_samples": 131120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0007.wav", - "speed": 1 - } - ], - "original_duration": 8.195, - "original_num_samples": 131120, - "transcript": "the placard gwynplaine the laughing man taken from its nail in the green box was hung up close to the sign of the inn" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.32, - "num_samples": 165120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.32, - "original_num_samples": 165120, - "transcript": "by the side of the door was constructed off hand by means of an empty barrel a box for the money taker who was sometimes fibi and sometimes vinos" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.695, - "num_samples": 75120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.695, - "original_num_samples": 75120, - "transcript": "we are in london said ursus we must be prepared for the gentry" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.15, - "num_samples": 34400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.15, - "original_num_samples": 34400, - "transcript": "they began their performances" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.93, - "num_samples": 110880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.93, - "original_num_samples": 110880, - "transcript": "with that exception their success became so great that no mountebank memory could recall its parallel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.625, - "num_samples": 58000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.625, - "original_num_samples": 58000, - "transcript": "all southwark ran in crowds to admire the laughing man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.02, - "num_samples": 80320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.02, - "original_num_samples": 80320, - "transcript": "the merry andrews and mountebanks of tarrinzeau field were aghast at gwynplaine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.425, - "num_samples": 38800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.425, - "original_num_samples": 38800, - "transcript": "gwynplaine ate up their public" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.17, - "num_samples": 114720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0015.wav", - "speed": 1 - } - ], - "original_duration": 7.17, - "original_num_samples": 114720, - "transcript": "besides the small fry the swallowers of swords and the grimace makers real performances took place on the green" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.805, - "num_samples": 76880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.805, - "original_num_samples": 76880, - "transcript": "even this comedian of jaws and claws was eclipsed in success" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.69, - "num_samples": 75040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.69, - "original_num_samples": 75040, - "transcript": "that success was prodigious still it remained local" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.92, - "num_samples": 94720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.92, - "original_num_samples": 94720, - "transcript": "it took a hundred and thirty years for the name of shakespeare to penetrate from england into france" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.48, - "num_samples": 55680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.48, - "original_num_samples": 55680, - "transcript": "the glory of gwynplaine had not passed london bridge" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.255, - "num_samples": 36080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.255, - "original_num_samples": 36080, - "transcript": "these were remarkable talents" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.37, - "num_samples": 133920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0021.wav", - "speed": 1 - } - ], - "original_duration": 8.37, - "original_num_samples": 133920, - "transcript": "besides this he harangued like cicero as we have just seen sold his drugs attended sickness and even healed the sick" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.04, - "num_samples": 96640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.04, - "original_num_samples": 96640, - "transcript": "ursus was satisfied with the applause of southwark but by no means astonished" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.895, - "num_samples": 126320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.895, - "original_num_samples": 126320, - "transcript": "at every performance the yard of the inn transformed into a pit was filled with a ragged and enthusiastic audience" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.105, - "num_samples": 65680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.105, - "original_num_samples": 65680, - "transcript": "the emptying of tankards did not decrease their success" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.82, - "num_samples": 77120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0025.wav", - "speed": 1 - } - ], - "original_duration": 4.82, - "original_num_samples": 77120, - "transcript": "this connoisseur was suddenly fascinated and had adopted the laughing man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.515, - "num_samples": 216240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0026.wav", - "speed": 1 - } - ], - "original_duration": 13.515, - "original_num_samples": 216240, - "transcript": "he did not come every evening but when he came he led the public applause grew into acclamation success rose not to the roof for there was none but to the clouds for there were plenty of them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.1, - "num_samples": 113600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0027.wav", - "speed": 1 - } - ], - "original_duration": 7.1, - "original_num_samples": 113600, - "transcript": "which clouds seeing that there was no roof sometimes wept over the masterpiece of ursus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.4, - "num_samples": 86400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0028.wav", - "speed": 1 - } - ], - "original_duration": 5.4, - "original_num_samples": 86400, - "transcript": "his enthusiasm caused ursus to remark this man and gwynplaine to observe him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.08, - "num_samples": 49280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.08, - "original_num_samples": 49280, - "transcript": "they had a great friend in this unknown visitor" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.16, - "num_samples": 162560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0030.wav", - "speed": 1 - } - ], - "original_duration": 10.16, - "original_num_samples": 162560, - "transcript": "one evening ursus was in the side scene which was the kitchen door of the green box seeing master nicless standing by him showed him this man in the crowd and asked him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.57, - "num_samples": 57120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0031.wav", - "speed": 1 - } - ], - "original_duration": 3.57, - "original_num_samples": 57120, - "transcript": "what a pity that he should not be a lord" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.805, - "num_samples": 44880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0032.wav", - "speed": 1 - } - ], - "original_duration": 2.805, - "original_num_samples": 44880, - "transcript": "he would make a famous scoundrel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.73, - "num_samples": 123680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34629/5895-34629-0033.wav", - "speed": 1 - } - ], - "original_duration": 7.73, - "original_num_samples": 123680, - "transcript": "at that hour there was no one in the fair ground except perhaps some reeling drunkard making staggering shadows in dark corners" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.37, - "num_samples": 53920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.37, - "original_num_samples": 53920, - "transcript": "what true things are told in stories" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.905, - "num_samples": 94480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0001.wav", - "speed": 1 - } - ], - "original_duration": 5.905, - "original_num_samples": 94480, - "transcript": "in gwynplaine evil thoughts never ripened and he had therefore no remorse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.91, - "num_samples": 46560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.91, - "original_num_samples": 46560, - "transcript": "what was this nothing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.08, - "num_samples": 81280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.08, - "original_num_samples": 81280, - "transcript": "from sixteen eighty to seventeen o four a great change had taken place" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "the wheels were all of the same size and high as wagon wheels" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.335, - "num_samples": 117360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0005.wav", - "speed": 1 - } - ], - "original_duration": 7.335, - "original_num_samples": 117360, - "transcript": "this green colour had succeeded in drawing attention to the carriage which was known in all the fair grounds as the green box" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.36, - "num_samples": 69760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.36, - "original_num_samples": 69760, - "transcript": "on the roof from a tube painted green like the rest smoke arose" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.76, - "num_samples": 76160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.76, - "original_num_samples": 76160, - "transcript": "the astonishment with which the villagers regarded this machine was overwhelming" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.82, - "num_samples": 141120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.82, - "original_num_samples": 141120, - "transcript": "this was the old establishment of ursus its proportions augmented by success and improved from a wretched booth into a theatre" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.89, - "num_samples": 174240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0009.wav", - "speed": 1 - } - ], - "original_duration": 10.89, - "original_num_samples": 174240, - "transcript": "unknown people had worked upon his face he on the other hand had worked on his mind and behind this well executed mask he had placed all that he could of thought" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.26, - "num_samples": 52160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.26, - "original_num_samples": 52160, - "transcript": "the effect of his appearance had been surprising" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.54, - "num_samples": 248640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0011.wav", - "speed": 1 - } - ], - "original_duration": 15.54, - "original_num_samples": 248640, - "transcript": "some believed it to be natural others declared it to be artificial and as conjecture was added to reality everywhere at every cross road on the journey in all the grounds of fairs and fetes the crowd ran after gwynplaine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.56, - "num_samples": 72960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.56, - "original_num_samples": 72960, - "transcript": "the curiosity of one place exhausted they passed on to another" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.325, - "num_samples": 229200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0013.wav", - "speed": 1 - } - ], - "original_duration": 14.325, - "original_num_samples": 229200, - "transcript": "this fortune had allowed ursus who was the administrator of gwynplaine's success to have the chariot of his dreams constructed that is to say a caravan large enough to carry a theatre and to sow science and art in the highways" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.875, - "num_samples": 94000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.875, - "original_num_samples": 94000, - "transcript": "for these read fibi and vinos that we may conform to english pronunciation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.05, - "num_samples": 48800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.05, - "original_num_samples": 48800, - "transcript": "phoebe cooked venus scrubbed the temple" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.58, - "num_samples": 41280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.58, - "original_num_samples": 41280, - "transcript": "ursus and homo took charge of each other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.385, - "num_samples": 134160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0017.wav", - "speed": 1 - } - ], - "original_duration": 8.385, - "original_num_samples": 134160, - "transcript": "this hut in a corner at the back to the right of the door served as bedchamber and dressing room to ursus and gwynplaine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.98, - "num_samples": 79680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0018.wav", - "speed": 1 - } - ], - "original_duration": 4.98, - "original_num_samples": 79680, - "transcript": "the caravan was divided into three compartments partitioned from each other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.455, - "num_samples": 135280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0019.wav", - "speed": 1 - } - ], - "original_duration": 8.455, - "original_num_samples": 135280, - "transcript": "a loft under the arch of the roof contained the scenes and on opening a trap door lamps appeared producing wonders of light" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.715, - "num_samples": 91440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0020.wav", - "speed": 1 - } - ], - "original_duration": 5.715, - "original_num_samples": 91440, - "transcript": "ursus was the poet of these magical representations he wrote the pieces" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.86, - "num_samples": 45760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.86, - "original_num_samples": 45760, - "transcript": "then i look perhaps like what i am" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.225, - "num_samples": 163600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0022.wav", - "speed": 1 - } - ], - "original_duration": 10.225, - "original_num_samples": 163600, - "transcript": "this opening looked for all the world like a mouth of hell in the words of the itinerant puritan preachers who turned away from it with horror" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.845, - "num_samples": 93520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34622/5895-34622-0023.wav", - "speed": 1 - } - ], - "original_duration": 5.845, - "original_num_samples": 93520, - "transcript": "ursus was in everything in the piece in the company in the kitchen in the orchestra" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.335, - "num_samples": 53360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0000.wav", - "speed": 1 - } - ], - "original_duration": 3.335, - "original_num_samples": 53360, - "transcript": "but is laughter a synonym of joy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.305, - "num_samples": 52880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.305, - "original_num_samples": 52880, - "transcript": "such perfect completeness is not in nature" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.73, - "num_samples": 139680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.73, - "original_num_samples": 139680, - "transcript": "had gwynplaine when a child been so worthy of attention that his face had been subjected to transmutation why not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.605, - "num_samples": 105680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.605, - "original_num_samples": 105680, - "transcript": "according to all appearance industrious manipulators of children had worked upon his face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.95, - "num_samples": 255200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0004.wav", - "speed": 1 - } - ], - "original_duration": 15.95, - "original_num_samples": 255200, - "transcript": "it seemed evident that a mysterious and probably occult science which was to surgery what alchemy was to chemistry had chiselled his flesh evidently at a very tender age and manufactured his countenance with premeditation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.495, - "num_samples": 39920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.495, - "original_num_samples": 39920, - "transcript": "gwynplaine was a mountebank" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.525, - "num_samples": 40400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.525, - "original_num_samples": 40400, - "transcript": "he showed himself on the platform" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.705, - "num_samples": 91280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.705, - "original_num_samples": 91280, - "transcript": "it was gwynplaine's laugh which created the laughter of others yet he did not laugh himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.055, - "num_samples": 48880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.055, - "original_num_samples": 48880, - "transcript": "the outside did not depend on the interior" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.78, - "num_samples": 44480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.78, - "original_num_samples": 44480, - "transcript": "no one could escape from this rictus" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.77, - "num_samples": 124320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.77, - "original_num_samples": 124320, - "transcript": "all his emotions whatever they might have been augmented his strange face of joy or to speak more correctly aggravated it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.76, - "num_samples": 44160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.76, - "original_num_samples": 44160, - "transcript": "an everlasting laugh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.335, - "num_samples": 165360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0012.wav", - "speed": 1 - } - ], - "original_duration": 10.335, - "original_num_samples": 165360, - "transcript": "the manichaeans believed the absolute occasionally gives way and that god himself sometimes abdicates for a time so also of the will" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.745, - "num_samples": 75920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.745, - "original_num_samples": 75920, - "transcript": "the whole of existence resembles a letter modified in the postscript" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.160063, - "num_samples": 82561, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.160063, - "original_num_samples": 82561, - "transcript": "with this exception gwynplaine's laugh was everlasting" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.745, - "num_samples": 139920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.745, - "original_num_samples": 139920, - "transcript": "the joyous convulsion of laughter was as a tribute paid they submitted to it gladly but almost mechanically" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.68, - "num_samples": 234880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0016.wav", - "speed": 1 - } - ], - "original_duration": 14.68, - "original_num_samples": 234880, - "transcript": "besides we must remember that they had in those times means of putting patients to sleep and of suppressing all suffering only then it was called magic while now it is called anaesthesia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.36, - "num_samples": 117760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.36, - "original_num_samples": 117760, - "transcript": "besides this face those who had brought him up had given him the resources of a gymnast and an athlete" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.275, - "num_samples": 36400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.275, - "original_num_samples": 36400, - "transcript": "gwynplaine had yellow hair" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.855, - "num_samples": 109680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.855, - "original_num_samples": 109680, - "transcript": "his hair having probably been dyed with some corrosive preparation had left it woolly and rough to the touch" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.53, - "num_samples": 152480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0020.wav", - "speed": 1 - } - ], - "original_duration": 9.53, - "original_num_samples": 152480, - "transcript": "its yellow bristles rather a mane than a head of hair covered and concealed a lofty brow evidently made to contain thought" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.155, - "num_samples": 194480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5895/34615/5895-34615-0021.wav", - "speed": 1 - } - ], - "original_duration": 12.155, - "original_num_samples": 194480, - "transcript": "the operation whatever it had been which had deprived his features of harmony and put all their flesh into disorder had had no effect on the bony structure of his head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.005, - "num_samples": 64080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0000.wav", - "speed": 1 - } - ], - "original_duration": 4.005, - "original_num_samples": 64080, - "transcript": "we remained several months but soon we were on the tramp again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.93, - "num_samples": 78880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.93, - "original_num_samples": 78880, - "transcript": "the federal army was concentrating at nashville there was no rest for the weary" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.645, - "num_samples": 42320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.645, - "original_num_samples": 42320, - "transcript": "our army stopped at murfreesboro" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.985, - "num_samples": 63760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0003.wav", - "speed": 1 - } - ], - "original_duration": 3.985, - "original_num_samples": 63760, - "transcript": "from time to time different regiments were sent forward to do picket duty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.355, - "num_samples": 53680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.355, - "original_num_samples": 53680, - "transcript": "the yankee picket lines were not a half mile off" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.565, - "num_samples": 73040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.565, - "original_num_samples": 73040, - "transcript": "i am a videt you know the responsibility resting on me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.27, - "num_samples": 116320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.27, - "original_num_samples": 116320, - "transcript": "says he i would not trust a secesh on his word oath or bond march i say" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.585, - "num_samples": 105360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.585, - "original_num_samples": 105360, - "transcript": "i soon found out that he had caught sight of the relief on the road and was afraid to shoot i quickly made up my mind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.02, - "num_samples": 48320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.02, - "original_num_samples": 48320, - "transcript": "my gun was at my feet and one step would get it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.16, - "num_samples": 50560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.16, - "original_num_samples": 50560, - "transcript": "i made a quick glance over my shoulder and grabbed at my gun" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.855, - "num_samples": 61680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.855, - "original_num_samples": 61680, - "transcript": "he divined my motive and fired the ball missed its aim" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.4, - "num_samples": 118400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.4, - "original_num_samples": 118400, - "transcript": "i think we must have killed a good many in the old field because we were firing all the time at the solid line as they advanced upon us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.385, - "num_samples": 118160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0012.wav", - "speed": 1 - } - ], - "original_duration": 7.385, - "original_num_samples": 118160, - "transcript": "we kept falling back and firing all day and were relieved by another regiment about dark we rejoined our regiment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.155, - "num_samples": 82480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.155, - "original_num_samples": 82480, - "transcript": "line of battle was formed on the north bank of stone's river on the yankee side" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.82, - "num_samples": 61120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.82, - "original_num_samples": 61120, - "transcript": "bad generalship i thought it was christmas" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.995, - "num_samples": 47920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.995, - "original_num_samples": 47920, - "transcript": "the private could but he was no general you see" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.455, - "num_samples": 167280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0016.wav", - "speed": 1 - } - ], - "original_duration": 10.455, - "original_num_samples": 167280, - "transcript": "i called lieutenant colonel frierson's attention to the yankees and he remarked well i don't know whether they are yankees or not but if they are they will come out of there mighty quick" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.865, - "num_samples": 45840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.865, - "original_num_samples": 45840, - "transcript": "the yankees marched over the hill out of sight" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.485, - "num_samples": 39760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.485, - "original_num_samples": 39760, - "transcript": "we were ordered forward to the attack" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.415, - "num_samples": 54640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0019.wav", - "speed": 1 - } - ], - "original_duration": 3.415, - "original_num_samples": 54640, - "transcript": "we were right upon the yankee line on the wilkerson turnpike" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.35, - "num_samples": 213600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0020.wav", - "speed": 1 - } - ], - "original_duration": 13.35, - "original_num_samples": 213600, - "transcript": "we were not twenty yards off from the yankees and they were pouring the hot shot and shells right into our ranks and every man was yelling at the top of his voice cease firing you are firing on your own men cease firing you are firing on your own men" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.07, - "num_samples": 113120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0021.wav", - "speed": 1 - } - ], - "original_duration": 7.07, - "original_num_samples": 113120, - "transcript": "oakley color bearer of the fourth tennessee regiment ran right up in the midst of the yankee line with his colors begging his men to follow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.9, - "num_samples": 78400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0022.wav", - "speed": 1 - } - ], - "original_duration": 4.9, - "original_num_samples": 78400, - "transcript": "the leaden hail storm swept them off the field they fell back and re formed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.2, - "num_samples": 115200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.2, - "original_num_samples": 115200, - "transcript": "we were at that time at least a hundred yards in advance of the brigade cheatham all the time calling upon the men to come on" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.765, - "num_samples": 172240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0024.wav", - "speed": 1 - } - ], - "original_duration": 10.765, - "original_num_samples": 172240, - "transcript": "i saw and felt that he was not fighting for glory but that he was fighting for his country because he loved that country and he was willing to give his life for his country and the success of our cause" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.465, - "num_samples": 39440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.465, - "original_num_samples": 39440, - "transcript": "i thought it had been torn from my shoulder" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.82, - "num_samples": 77120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0026.wav", - "speed": 1 - } - ], - "original_duration": 4.82, - "original_num_samples": 77120, - "transcript": "as i went back to the field hospital i overtook another man walking along" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.17, - "num_samples": 66720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0027.wav", - "speed": 1 - } - ], - "original_duration": 4.17, - "original_num_samples": 66720, - "transcript": "i looked at it pretty close and i said great god" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.06, - "num_samples": 96960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0028.wav", - "speed": 1 - } - ], - "original_duration": 6.06, - "original_num_samples": 96960, - "transcript": "he was walking along when all at once he dropped down and died without a struggle or a groan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.07, - "num_samples": 65120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0029.wav", - "speed": 1 - } - ], - "original_duration": 4.07, - "original_num_samples": 65120, - "transcript": "but i could not bear the thought of wearing dead men's shoes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.385, - "num_samples": 54160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0030.wav", - "speed": 1 - } - ], - "original_duration": 3.385, - "original_num_samples": 54160, - "transcript": "he was stone dead but i dropped that foot quick" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.385, - "num_samples": 102160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0031.wav", - "speed": 1 - } - ], - "original_duration": 6.385, - "original_num_samples": 102160, - "transcript": "before we arrived at the house we saw a body of yankees approaching and as we started to run back they fired upon us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.125, - "num_samples": 50000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64029/5694-64029-0032.wav", - "speed": 1 - } - ], - "original_duration": 3.125, - "original_num_samples": 50000, - "transcript": "our pickets had run in and reported a night attack" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.67, - "num_samples": 26720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0000.wav", - "speed": 1 - } - ], - "original_duration": 1.67, - "original_num_samples": 26720, - "transcript": "shiloh" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.35, - "num_samples": 69600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0001.wav", - "speed": 1 - } - ], - "original_duration": 4.35, - "original_num_samples": 69600, - "transcript": "this was the first big battle in which our regiment had ever been engaged" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.96, - "num_samples": 175360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.96, - "original_num_samples": 175360, - "transcript": "i do not pretend to tell of what command distinguished itself of heroes of blood and wounds of shrieks and groans of brilliant charges of cannon captured et cetera" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.92, - "num_samples": 78720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.92, - "original_num_samples": 78720, - "transcript": "about daylight on sunday morning chalmers brigade relieved gladden's" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.695, - "num_samples": 59120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.695, - "original_num_samples": 59120, - "transcript": "as gladden rode by us a courier rode up and told him something" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.170063, - "num_samples": 130721, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.170063, - "original_num_samples": 130721, - "transcript": "on sunday morning a clear beautiful and still day the order was given for the whole army to advance and to attack immediately" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.22, - "num_samples": 51520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.22, - "original_num_samples": 51520, - "transcript": "we were supporting an alabama brigade" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.915, - "num_samples": 62640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.915, - "original_num_samples": 62640, - "transcript": "that's right my brave first tennessee give em hail columbia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.554937, - "num_samples": 40879, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.554937, - "original_num_samples": 40879, - "transcript": "the fact was kept from the troops" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.02, - "num_samples": 64320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.02, - "original_num_samples": 64320, - "transcript": "we had to pass over the ground where troops had been fighting all day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 21.62, - "num_samples": 345920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0010.wav", - "speed": 1 - } - ], - "original_duration": 21.62, - "original_num_samples": 345920, - "transcript": "i had heard and read of battlefields seen pictures of battlefields of horses and men of cannon and wagons all jumbled together while the ground was strewn with dead and dying and wounded but i must confess that i never realized the pomp and circumstance of the thing called glorious war until i saw this" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.03, - "num_samples": 112480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0011.wav", - "speed": 1 - } - ], - "original_duration": 7.03, - "original_num_samples": 112480, - "transcript": "i had been feeling mean all the morning as if i had stolen a sheep but when the order to charge was given i got happy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.875, - "num_samples": 46000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.875, - "original_num_samples": 46000, - "transcript": "officers could not curb the men to keep in line" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.44, - "num_samples": 55040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.44, - "original_num_samples": 55040, - "transcript": "on monday the tide was reversed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.875, - "num_samples": 254000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0014.wav", - "speed": 1 - } - ], - "original_duration": 15.875, - "original_num_samples": 254000, - "transcript": "but as i said before reader a private soldier is but an automaton and knows nothing of what is going on among the generals and i am only giving the chronicles of little things and events that came under my own observation as i saw them then and remember them now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.065, - "num_samples": 81040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0015.wav", - "speed": 1 - } - ], - "original_duration": 5.065, - "original_num_samples": 81040, - "transcript": "should you desire to find out more about the battle i refer you to history" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.72, - "num_samples": 171520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0016.wav", - "speed": 1 - } - ], - "original_duration": 10.72, - "original_num_samples": 171520, - "transcript": "about the time he pulled trigger a stray ball from some direction struck him in the side and he fell off dead and his horse becoming frightened galloped off dragging him through the confederate lines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.015, - "num_samples": 48240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.015, - "original_num_samples": 48240, - "transcript": "on monday morning i too captured me a mule" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.685, - "num_samples": 42960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0018.wav", - "speed": 1 - } - ], - "original_duration": 2.685, - "original_num_samples": 42960, - "transcript": "he was wise in his own conceit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.87, - "num_samples": 141920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0019.wav", - "speed": 1 - } - ], - "original_duration": 8.87, - "original_num_samples": 141920, - "transcript": "i frequently thought it would be pleasant to split the difference with that mule and i would gladly have done so if i could have gotten one half of his no" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.35, - "num_samples": 117600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0020.wav", - "speed": 1 - } - ], - "original_duration": 7.35, - "original_num_samples": 117600, - "transcript": "mule did not desire to cross while i was trying to persuade him with a big stick a rock in his ear and a twister on his nose" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.61, - "num_samples": 121760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0021.wav", - "speed": 1 - } - ], - "original_duration": 7.61, - "original_num_samples": 121760, - "transcript": "so he got a large two inch rope tied one end around the mule's neck and the other to the caisson and ordered the driver to whip up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.73, - "num_samples": 123680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0022.wav", - "speed": 1 - } - ], - "original_duration": 7.73, - "original_num_samples": 123680, - "transcript": "the rope however was stronger than the mule's no and he was finally prevailed upon by the strength of the rope to cross the creek" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.52, - "num_samples": 136320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64025/5694-64025-0023.wav", - "speed": 1 - } - ], - "original_duration": 8.52, - "original_num_samples": 136320, - "transcript": "on my taking the rope off he shook himself and seemed to say you think that you are mighty smart folks but you are a leetle too smart" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.595, - "num_samples": 41520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.595, - "original_num_samples": 41520, - "transcript": "advance into tennessee" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.65, - "num_samples": 58400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.65, - "original_num_samples": 58400, - "transcript": "yank says what you doing johnny" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.77, - "num_samples": 140320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0002.wav", - "speed": 1 - } - ], - "original_duration": 8.77, - "original_num_samples": 140320, - "transcript": "we passed around atlanta crossed the chattahoochee and traveled back over the same route on which we had made the arduous campaign under joe johnston" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.82, - "num_samples": 109120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.82, - "original_num_samples": 109120, - "transcript": "outside of these occasional reminders we could see no evidence of the desolation of the track of an invading army" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.58, - "num_samples": 153280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0004.wav", - "speed": 1 - } - ], - "original_duration": 9.58, - "original_num_samples": 153280, - "transcript": "we saw the united states flag flying from the ramparts and thought that yank would probably be asleep or catching lice or maybe engaged in a game of seven up" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.7, - "num_samples": 75200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.7, - "original_num_samples": 75200, - "transcript": "he walked up and says hello boys what is it boss" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.525, - "num_samples": 40400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.525, - "original_num_samples": 40400, - "transcript": "a yankee always says nager" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.37, - "num_samples": 37920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.37, - "original_num_samples": 37920, - "transcript": "they persuaded eloquently" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.95, - "num_samples": 31200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0008.wav", - "speed": 1 - } - ], - "original_duration": 1.95, - "original_num_samples": 31200, - "transcript": "a man in the well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.385, - "num_samples": 38160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.385, - "original_num_samples": 38160, - "transcript": "the voice appeared to be overhead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.38, - "num_samples": 150080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0010.wav", - "speed": 1 - } - ], - "original_duration": 9.38, - "original_num_samples": 150080, - "transcript": "right before me i saw the long dry grass all bending toward a common center and i knew that it was an old well and that my comrade had fallen in it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.995, - "num_samples": 47920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.995, - "original_num_samples": 47920, - "transcript": "but how to get him out was the unsolved problem" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.805, - "num_samples": 44880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.805, - "original_num_samples": 44880, - "transcript": "the poor fellow stayed in that well all night" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.305, - "num_samples": 52880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.305, - "original_num_samples": 52880, - "transcript": "we looked all around and thought that the coast was clear" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0014.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "i don't think his gun was loaded though because we did not hear the ball whistle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.77, - "num_samples": 108320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0015.wav", - "speed": 1 - } - ], - "original_duration": 6.77, - "original_num_samples": 108320, - "transcript": "we walked over this floating bridge and soon found ourselves on the tennessee side of tennessee river" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.585, - "num_samples": 41360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.585, - "original_num_samples": 41360, - "transcript": "we had beef for supper that night" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.33, - "num_samples": 245280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0017.wav", - "speed": 1 - } - ], - "original_duration": 15.33, - "original_num_samples": 245280, - "transcript": "how every pulse did beat and leap and how every heart did throb with emotions of joy which seemed nearly akin to heaven when we received the glad intelligence of our onward march toward the land of promise and of our loved ones" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.795, - "num_samples": 188720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0018.wav", - "speed": 1 - } - ], - "original_duration": 11.795, - "original_num_samples": 188720, - "transcript": "we were inured to privations and hardships had been upon every march in every battle in every skirmish in every advance in every retreat in every victory in every defeat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.255, - "num_samples": 100080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.255, - "original_num_samples": 100080, - "transcript": "he wanted to go by home and tell his wife and children good bye and to get his clothes it was no go" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.84, - "num_samples": 77440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.84, - "original_num_samples": 77440, - "transcript": "but after awhile jim says gentlemen ay ganny the law" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.33, - "num_samples": 53280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0021.wav", - "speed": 1 - } - ], - "original_duration": 3.33, - "original_num_samples": 53280, - "transcript": "you see jim knowed the law" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.77, - "num_samples": 252320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0022.wav", - "speed": 1 - } - ], - "original_duration": 15.77, - "original_num_samples": 252320, - "transcript": "those old soldiers had long long ago forgotten about that old law of the long gone past but jim had treasured it up in his memory lo these many years and he thought it would serve him now as it had no doubt frequently done in the past" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.795, - "num_samples": 76720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.795, - "original_num_samples": 76720, - "transcript": "the third day it was reported that the yankees had taken position on the murfreesboro pike" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.52, - "num_samples": 56320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0024.wav", - "speed": 1 - } - ], - "original_duration": 3.52, - "original_num_samples": 56320, - "transcript": "a regiment was sent to the attack it was jim's regiment" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.48, - "num_samples": 103680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/5694/64038/5694-64038-0025.wav", - "speed": 1 - } - ], - "original_duration": 6.48, - "original_num_samples": 103680, - "transcript": "he hadn't seen anything to shoot at but he blazed away he loaded and fired the second time when they were ordered to retreat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.915, - "num_samples": 158640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.915, - "original_num_samples": 158640, - "transcript": "shortly after passing one of these chapels we came suddenly upon a village which started up out of the mist and i was alarmed lest i should be made an object of curiosity or dislike" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.75, - "num_samples": 156000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.75, - "original_num_samples": 156000, - "transcript": "my guides however were well known and the natural politeness of the people prevented them from putting me to any inconvenience but they could not help eyeing me nor i them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.055, - "num_samples": 64880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.055, - "original_num_samples": 64880, - "transcript": "the streets were narrow and unpaved but very fairly clean" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.15, - "num_samples": 130400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.15, - "original_num_samples": 130400, - "transcript": "the vine grew outside many of the houses and there were some with sign boards on which was painted a bottle and a glass that made me feel much at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.41, - "num_samples": 166560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0004.wav", - "speed": 1 - } - ], - "original_duration": 10.41, - "original_num_samples": 166560, - "transcript": "even on this ledge of human society there was a stunted growth of shoplets which had taken root and vegetated somehow though as in an air mercantile of the bleakest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.845, - "num_samples": 93520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0005.wav", - "speed": 1 - } - ], - "original_duration": 5.845, - "original_num_samples": 93520, - "transcript": "each feature was finished eyelids eyelashes and ears being almost invariably perfect" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.22, - "num_samples": 163520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0006.wav", - "speed": 1 - } - ], - "original_duration": 10.22, - "original_num_samples": 163520, - "transcript": "their expression was divine and as they glanced at me timidly but with parted lips in great bewilderment i forgot all thoughts of their conversion in feelings that were far more earthly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.43, - "num_samples": 134880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0007.wav", - "speed": 1 - } - ], - "original_duration": 8.43, - "original_num_samples": 134880, - "transcript": "even in middle age they were still comely and the old grey haired women at their cottage doors had a dignity not to say majesty of their own" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.735, - "num_samples": 43760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.735, - "original_num_samples": 43760, - "transcript": "the men were as handsome as the women beautiful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.42, - "num_samples": 166720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0009.wav", - "speed": 1 - } - ], - "original_duration": 10.42, - "original_num_samples": 166720, - "transcript": "i have always delighted in and reverenced beauty but i felt simply abashed in the presence of such a splendid type a compound of all that is best in egyptian greek and italian" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.94, - "num_samples": 127040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0010.wav", - "speed": 1 - } - ], - "original_duration": 7.94, - "original_num_samples": 127040, - "transcript": "the children were infinite in number and exceedingly merry i need hardly say that they came in for their full share of the prevailing beauty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.935, - "num_samples": 94960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.935, - "original_num_samples": 94960, - "transcript": "i expressed by signs my admiration and pleasure to my guides and they were greatly pleased" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.12, - "num_samples": 129920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0012.wav", - "speed": 1 - } - ], - "original_duration": 8.12, - "original_num_samples": 129920, - "transcript": "the country was highly cultivated every ledge being planted with chestnuts walnuts and apple trees from which the apples were now gathering" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.915, - "num_samples": 62640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0013.wav", - "speed": 1 - } - ], - "original_duration": 3.915, - "original_num_samples": 62640, - "transcript": "i saw a few sheep with rounded noses and enormous tails" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.615, - "num_samples": 281840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0014.wav", - "speed": 1 - } - ], - "original_duration": 17.615, - "original_num_samples": 281840, - "transcript": "in about four hours of walking from the time we started and after passing two or three more villages we came upon a considerable town and my guides made many attempts to make me understand something but i gathered no inkling of their meaning except that i need be under no apprehension of danger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.39, - "num_samples": 182240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0015.wav", - "speed": 1 - } - ], - "original_duration": 11.39, - "original_num_samples": 182240, - "transcript": "suffice it that i found myself taken before the chief magistrate and by his orders was placed in an apartment with two other people who were the first i had seen looking anything but well and handsome" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.44, - "num_samples": 135040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0016.wav", - "speed": 1 - } - ], - "original_duration": 8.44, - "original_num_samples": 135040, - "transcript": "in fact one of them was plainly very much out of health and coughed violently from time to time in spite of manifest efforts to suppress it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.99, - "num_samples": 111840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.99, - "original_num_samples": 111840, - "transcript": "the other looked pale and ill but he was marvellously self contained and it was impossible to say what was the matter with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.105, - "num_samples": 193680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0018.wav", - "speed": 1 - } - ], - "original_duration": 12.105, - "original_num_samples": 193680, - "transcript": "they felt my pulse they looked at my tongue they listened at my chest they felt all my muscles and at the end of each operation they looked at the chief and nodded and said something in a tone quite pleasant as though i were all right" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.485, - "num_samples": 119760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0019.wav", - "speed": 1 - } - ], - "original_duration": 7.485, - "original_num_samples": 119760, - "transcript": "but by and by they came to my watch which i had hidden away in the inmost pocket that i had and had forgotten when they began their search" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.975, - "num_samples": 127600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0020.wav", - "speed": 1 - } - ], - "original_duration": 7.975, - "original_num_samples": 127600, - "transcript": "again there was a very old carriage whose wheels in spite of rust and decay i could see had been designed originally for iron rails" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.11, - "num_samples": 97760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0021.wav", - "speed": 1 - } - ], - "original_duration": 6.11, - "original_num_samples": 97760, - "transcript": "we passed many cases and at last came to one in which there were several clocks and two or three old watches" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.54, - "num_samples": 56640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.54, - "original_num_samples": 56640, - "transcript": "the design was different but the thing was clearly the same" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.65, - "num_samples": 42400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.65, - "original_num_samples": 42400, - "transcript": "this had some effect in calming him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.19, - "num_samples": 67040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153954/2412-153954-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.19, - "original_num_samples": 67040, - "transcript": "he began presently to relent and spoke to me in a kinder manner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.66, - "num_samples": 186560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.66, - "original_num_samples": 186560, - "transcript": "if the reader will excuse me i will say nothing of my antecedents nor of the circumstances which led me to leave my native country the narrative would be tedious to him and painful to myself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.51, - "num_samples": 168160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.51, - "original_num_samples": 168160, - "transcript": "it will be seen that i did not succeed in my design and that however much i may have met with that was new and strange i have been unable to reap any pecuniary advantage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.72, - "num_samples": 59520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.72, - "original_num_samples": 59520, - "transcript": "no one who is himself honest will doubt my being so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.515, - "num_samples": 136240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.515, - "original_num_samples": 136240, - "transcript": "i reached my destination in one of the last months of eighteen sixty eight but i dare not mention the season lest the reader should gather in which hemisphere i was" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.68, - "num_samples": 362880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0004.wav", - "speed": 1 - } - ], - "original_duration": 22.68, - "original_num_samples": 362880, - "transcript": "sheep and cattle were introduced and bred with extreme rapidity men took up their fifty thousand or one hundred thousand acres of country going inland one behind the other till in a few years there was not an acre between the sea and the front ranges which was not taken up and stations either for sheep or cattle were spotted about at intervals of some twenty or thirty miles over the whole country" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.14, - "num_samples": 50240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0005.wav", - "speed": 1 - } - ], - "original_duration": 3.14, - "original_num_samples": 50240, - "transcript": "i was delighted with the country and the manner of life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.615, - "num_samples": 265840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0006.wav", - "speed": 1 - } - ], - "original_duration": 16.615, - "original_num_samples": 265840, - "transcript": "i was to see the sheep not necessarily close at hand nor to get them in a single mob but to see enough of them here and there to feel easy that nothing had gone wrong this was no difficult matter for there were not above eight hundred of them and being all breeding ewes they were pretty quiet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.37, - "num_samples": 149920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.37, - "original_num_samples": 149920, - "transcript": "there were a good many sheep which i knew as two or three black ewes and a black lamb or two and several others which had some distinguishing mark whereby i could tell them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.815, - "num_samples": 93040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.815, - "original_num_samples": 93040, - "transcript": "it is surprising how soon the eye becomes accustomed to missing twenty sheep out of two or three hundred" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.995, - "num_samples": 95920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.995, - "original_num_samples": 95920, - "transcript": "it was a monotonous life but it was very healthy and one does not much mind anything when one is well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.915, - "num_samples": 46640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.915, - "original_num_samples": 46640, - "transcript": "the country was the grandest that can be imagined" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.745062, - "num_samples": 155921, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0011.wav", - "speed": 1 - } - ], - "original_duration": 9.745062, - "original_num_samples": 155921, - "transcript": "so lonely and so solemn with the sad grey clouds above and no sound save a lost lamb bleating upon the mountain side as though its little heart were breaking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.17, - "num_samples": 98720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0012.wav", - "speed": 1 - } - ], - "original_duration": 6.17, - "original_num_samples": 98720, - "transcript": "each must cry louder and wander farther yet may luck be with them both that they may find their own at nightfall" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.54, - "num_samples": 136640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.54, - "original_num_samples": 136640, - "transcript": "i had no money but if i could only find workable country i might stock it with borrowed capital and consider myself a made man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.705, - "num_samples": 155280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0014.wav", - "speed": 1 - } - ], - "original_duration": 9.705, - "original_num_samples": 155280, - "transcript": "there was no one in the whole world who had the smallest idea save those who were themselves on the other side of it if indeed there was any one at all could i hope to cross it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.465, - "num_samples": 55440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153948/2412-153948-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.465, - "original_num_samples": 55440, - "transcript": "i would try the nearer range and see how far i could go" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.55, - "num_samples": 40800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.55, - "original_num_samples": 40800, - "transcript": "preface to second edition" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.225, - "num_samples": 51600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.225, - "original_num_samples": 51600, - "transcript": "this is a mistake though a perfectly natural one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.595, - "num_samples": 105520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.595, - "original_num_samples": 105520, - "transcript": "on my return i purposely avoided looking into it until i had sent back my last revises to the printer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.515, - "num_samples": 152240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.515, - "original_num_samples": 152240, - "transcript": "then i had much pleasure in reading it but was indeed surprised at the many little points of similarity between the two books in spite of their entire independence to one another" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.72, - "num_samples": 139520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0004.wav", - "speed": 1 - } - ], - "original_duration": 8.72, - "original_num_samples": 139520, - "transcript": "i regret that reviewers have in some cases been inclined to treat the chapters on machines as an attempt to reduce mister darwin's theory to an absurdity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.68, - "num_samples": 218880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0005.wav", - "speed": 1 - } - ], - "original_duration": 13.68, - "original_num_samples": 218880, - "transcript": "i am surprised however that the book at which such an example of the specious misuse of analogy would seem most naturally levelled should have occurred to no reviewer neither shall i mention the name of the book here though i should fancy that the hint given will suffice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.68, - "num_samples": 186880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0006.wav", - "speed": 1 - } - ], - "original_duration": 11.68, - "original_num_samples": 186880, - "transcript": "but this had an effect of which i have little reason to complain for i was allowed almost to call them life long self deceivers to their faces and they said it was quite true but that it did not matter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.735, - "num_samples": 139760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0007.wav", - "speed": 1 - } - ], - "original_duration": 8.735, - "original_num_samples": 139760, - "transcript": "i must not conclude without expressing my most sincere thanks to my critics and to the public for the leniency and consideration with which they have treated my adventures" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.84, - "num_samples": 173440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0008.wav", - "speed": 1 - } - ], - "original_duration": 10.84, - "original_num_samples": 173440, - "transcript": "it was written in the upper rangitata district of the canterbury province as it then was of new zealand and appeared at christchurch in the press newspaper june thirteenth eighteen sixty three" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.72, - "num_samples": 139520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0009.wav", - "speed": 1 - } - ], - "original_duration": 8.72, - "original_num_samples": 139520, - "transcript": "i also wrote about this time the substance of what ultimately became the musical banks and the trial of a man for being in a consumption" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.195062, - "num_samples": 275121, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0010.wav", - "speed": 1 - } - ], - "original_duration": 17.195062, - "original_num_samples": 275121, - "transcript": "i see from my second preface that i took the book to messrs chapman and hall may first eighteen seventy one and on their rejection of it under the advice of one who has attained the highest rank among living writers i let it sleep till i took it to mister trubner early in eighteen seventy two" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.13, - "num_samples": 162080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.13, - "original_num_samples": 162080, - "transcript": "i attribute its unlooked for success mainly to two early favourable reviews the first in the pall mall gazette of april twelfth and the second in the spectator of april twentieth" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.98, - "num_samples": 31680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0012.wav", - "speed": 1 - } - ], - "original_duration": 1.98, - "original_num_samples": 31680, - "transcript": "there was also another cause" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.42, - "num_samples": 134720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.42, - "original_num_samples": 134720, - "transcript": "the first edition of erewhon sold in about three weeks i had not taken moulds and as the demand was strong it was set up again immediately" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.795, - "num_samples": 204720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0014.wav", - "speed": 1 - } - ], - "original_duration": 12.795, - "original_num_samples": 204720, - "transcript": "i made a few further very trifling alterations before moulds were taken but since the summer of eighteen seventy two as new editions were from time to time wanted they have been printed from stereos then made" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.03, - "num_samples": 176480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0015.wav", - "speed": 1 - } - ], - "original_duration": 11.03, - "original_num_samples": 176480, - "transcript": "i am still fairly well satisfied with those parts of erewhon that were repeatedly rewritten but from those that had only a single writing i would gladly cut out some forty or fifty pages if i could" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.185, - "num_samples": 82960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/2412/153947/2412-153947-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.185, - "original_num_samples": 82960, - "transcript": "this however may not be for the copyright will probably expire in a little over twelve years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.755, - "num_samples": 172080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0000.wav", - "speed": 1 - } - ], - "original_duration": 10.755, - "original_num_samples": 172080, - "transcript": "in the last communication which he had received from lady milborough she had scolded him in terms that were for her severe because he had not returned to his wife and taken her off with him to naples" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.915, - "num_samples": 142640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.915, - "original_num_samples": 142640, - "transcript": "and now it had come to pass that his sole remaining ally mister samuel bozzle the ex policeman was becoming weary of his service" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.45, - "num_samples": 151200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0002.wav", - "speed": 1 - } - ], - "original_duration": 9.45, - "original_num_samples": 151200, - "transcript": "at last he sent word to say that he himself would be in england before the end of march and would see that the majesty of the law should be vindicated in his favour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.94, - "num_samples": 127040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.94, - "original_num_samples": 127040, - "transcript": "in making this he had expected no success though from the energetic nature of his disposition he had made the attempt with some zeal" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.625, - "num_samples": 234000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0004.wav", - "speed": 1 - } - ], - "original_duration": 14.625, - "original_num_samples": 234000, - "transcript": "missus bozzle was disposed to think that ladies of quality among whom madame t was entitled in her estimation to take rank were seldom better than they ought to be and she was quite willing that her husband should earn his bread by watching the lady or the lady's lover" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.115, - "num_samples": 193840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0005.wav", - "speed": 1 - } - ], - "original_duration": 12.115, - "original_num_samples": 193840, - "transcript": "and had the case been brought before the judge ordinary by means of her husband's exertions she would have taken pleasure in reading every word of the evidence even though her husband should have been ever so roughly handled by the lawyers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.795, - "num_samples": 140720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0006.wav", - "speed": 1 - } - ], - "original_duration": 8.795, - "original_num_samples": 140720, - "transcript": "a distinct promise of a hundred pounds was made to him if he would have the child ready to hand over to trevelyan on trevelyan's arrival in england" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.335, - "num_samples": 37360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.335, - "original_num_samples": 37360, - "transcript": "of course it ain't said missus bozzle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.01, - "num_samples": 80160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.01, - "original_num_samples": 80160, - "transcript": "the paternal parent has a right to his infants no doubt that was bozzle's law" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.005, - "num_samples": 32080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0009.wav", - "speed": 1 - } - ], - "original_duration": 2.005, - "original_num_samples": 32080, - "transcript": "he can't suckle em can he" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.5, - "num_samples": 152000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0010.wav", - "speed": 1 - } - ], - "original_duration": 9.5, - "original_num_samples": 152000, - "transcript": "when a married woman has followers and the husband don't go the wrong side of the post too or it ain't proved again him that he do they'll never let her have nothing to do with the children" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.735, - "num_samples": 139760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.735, - "original_num_samples": 139760, - "transcript": "i'll tell you what it is b exclaimed missus bozzle it's my belief as he ain't quite right up here and missus bozzle touched her forehead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.785, - "num_samples": 60560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.785, - "original_num_samples": 60560, - "transcript": "drat em all what is it they wants they don't know what they wants" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.87, - "num_samples": 45920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0013.wav", - "speed": 1 - } - ], - "original_duration": 2.87, - "original_num_samples": 45920, - "transcript": "it's that as makes em i won't say what" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0014.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "but as for this here child b" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.15, - "num_samples": 178400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0015.wav", - "speed": 1 - } - ], - "original_duration": 11.15, - "original_num_samples": 178400, - "transcript": "trevelyan had followed his letter quicker than he had intended when it was written and was now with his prime minister before his prime minister had been able to take any action on the last instruction received" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.74, - "num_samples": 75840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.74, - "original_num_samples": 75840, - "transcript": "does one mister samuel bozzle live here asked trevelyan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.255, - "num_samples": 52080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.255, - "original_num_samples": 52080, - "transcript": "then bozzle came forward and introduced his wife" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.385, - "num_samples": 150160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0018.wav", - "speed": 1 - } - ], - "original_duration": 9.385, - "original_num_samples": 150160, - "transcript": "but trevelyan was of a different opinion and he was disgusted and revolted most unreasonably by the appearance of his minister's domestic arrangements" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.33, - "num_samples": 85280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.33, - "original_num_samples": 85280, - "transcript": "bozzle had always waited upon him with a decent coat and a well brushed hat and clean shoes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.615, - "num_samples": 105840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.615, - "original_num_samples": 105840, - "transcript": "it is very much easier for such men as mister bozzle to carry decency of appearance about with them than to keep it at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.985, - "num_samples": 79760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.985, - "original_num_samples": 79760, - "transcript": "perhaps you could put on your coat and walk out with me for a few minutes said trevelyan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.1, - "num_samples": 145600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0022.wav", - "speed": 1 - } - ], - "original_duration": 9.1, - "original_num_samples": 145600, - "transcript": "missus bozzle who well understood that business was business and that wives were not business felt no anger at this and handed her husband his best coat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.395062, - "num_samples": 182321, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0023.wav", - "speed": 1 - } - ], - "original_duration": 11.395062, - "original_num_samples": 182321, - "transcript": "bozzle away from his own home out on business with his coat buttoned over his breast and his best hat in his hand was aware that he commanded respect and he could carry himself accordingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.425, - "num_samples": 102800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.425, - "original_num_samples": 102800, - "transcript": "i do not suppose that anybody will question my right to have the care of my own child said trevelyan" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.83, - "num_samples": 61280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0025.wav", - "speed": 1 - } - ], - "original_duration": 3.83, - "original_num_samples": 61280, - "transcript": "if you would have gone to mister skint sir suggested bozzle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.7, - "num_samples": 155200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0026.wav", - "speed": 1 - } - ], - "original_duration": 9.7, - "original_num_samples": 155200, - "transcript": "as he went about his eyes were ever cast downwards and he walked with a quick shuffling gait and he suspected others feeling that he himself was suspected" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0027.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "and all work had ceased with him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.265, - "num_samples": 52240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.265, - "original_num_samples": 52240, - "transcript": "he's up in town sir a minding of his parliamentary duties" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.355, - "num_samples": 53680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.355, - "original_num_samples": 53680, - "transcript": "i've watched as sharp as watching can go pretty near" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.05, - "num_samples": 64800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0030.wav", - "speed": 1 - } - ], - "original_duration": 4.05, - "original_num_samples": 64800, - "transcript": "but if you ask me my opinion why in course they've been together somewhere" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.25, - "num_samples": 52000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0031.wav", - "speed": 1 - } - ], - "original_duration": 3.25, - "original_num_samples": 52000, - "transcript": "and bozzle as he said this smiled almost aloud" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.16, - "num_samples": 66560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/8226/3536-8226-0032.wav", - "speed": 1 - } - ], - "original_duration": 4.16, - "original_num_samples": 66560, - "transcript": "and he did go away leaving bozzle standing in the middle of stony walk" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.89, - "num_samples": 318240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0000.wav", - "speed": 1 - } - ], - "original_duration": 19.89, - "original_num_samples": 318240, - "transcript": "sir edward not wholly discouraged by the denial with which dorriforth had with delicacy acquainted him still hoped for a kind reception and was so often at the house of missus horton that lord frederick's jealousy was excited and the tortures he suffered in consequence convinced him beyond a doubt of the sincerity of his affection" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.905, - "num_samples": 254480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0001.wav", - "speed": 1 - } - ], - "original_duration": 15.905, - "original_num_samples": 254480, - "transcript": "every time he beheld the object of his passion for he still continued his visits though not so frequently as heretofore he pleaded his cause with such ardour that miss woodley who was sometimes present and ever compassionate could not resist wishing him success" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.495, - "num_samples": 167920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.495, - "original_num_samples": 167920, - "transcript": "yet did the watchful miss woodley oftentimes hear a sigh escape from her unknown to herself till she was reminded of it and then a sudden blush would instantly overspread her face" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.995, - "num_samples": 127920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.995, - "original_num_samples": 127920, - "transcript": "night after night his sleep had been disturbed by fears for her when abroad morning after morning it had been broken by the clamour of her return" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.13, - "num_samples": 50080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.13, - "original_num_samples": 50080, - "transcript": "i hope miss milner you pass this evening at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.93, - "num_samples": 78880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.93, - "original_num_samples": 78880, - "transcript": "i thought miss milner you gave me your word that you would pass this evening at home" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.3, - "num_samples": 84800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.3, - "original_num_samples": 84800, - "transcript": "yes indeed and i believe it is right that i should keep my first promise is it not" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.21, - "num_samples": 147360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0007.wav", - "speed": 1 - } - ], - "original_duration": 9.21, - "original_num_samples": 147360, - "transcript": "missus horton rose from her seat moved the decanters and fruit round the table stirred the fire and came back to her seat again before another word was uttered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.29, - "num_samples": 148640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.29, - "original_num_samples": 148640, - "transcript": "nor had this good woman's officious labours taken the least from the awkwardness of the silence which as soon as the bustle she had made was over returned in its full force" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.315, - "num_samples": 69040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.315, - "original_num_samples": 69040, - "transcript": "miss milner you shall not leave the house this evening sir" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.905, - "num_samples": 62480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.905, - "original_num_samples": 62480, - "transcript": "and he walked immediately out of the apartment by another door" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.795, - "num_samples": 204720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0011.wav", - "speed": 1 - } - ], - "original_duration": 12.795, - "original_num_samples": 204720, - "transcript": "her hand fell motionless from that which she held she appeared motionless herself till missus horton beseeching her not to be uneasy at the treatment she had received made her tears flow as if her heart was breaking" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.255, - "num_samples": 116080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0012.wav", - "speed": 1 - } - ], - "original_duration": 7.255, - "original_num_samples": 116080, - "transcript": "it was not from any real cause of grief that she wept but there was a magnetic quality in tears which always attracted her's" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.025, - "num_samples": 128400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.025, - "original_num_samples": 128400, - "transcript": "do you think i would go answered miss milner with an eagerness that for a time suppressed her tears in contradiction to his will" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.5, - "num_samples": 56000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.5, - "original_num_samples": 56000, - "transcript": "if you think so madam i see nothing that should prevent me now" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.95, - "num_samples": 63200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.95, - "original_num_samples": 63200, - "transcript": "niece i command you not to stir out of this room this evening" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.32, - "num_samples": 181120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0016.wav", - "speed": 1 - } - ], - "original_duration": 11.32, - "original_num_samples": 181120, - "transcript": "miss woodley obediently sat down and though her thoughts and heart were in the chamber of her friend she never marked by one impertinent word or by one line of her face the restraint she suffered" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.98, - "num_samples": 271680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0017.wav", - "speed": 1 - } - ], - "original_duration": 16.98, - "original_num_samples": 271680, - "transcript": "at the usual hour mister dorriforth and his ward were summoned to tea he entered with a countenance which evinced the remains of anger his eye gave testimony of his absent thoughts and though he took up a pamphlet affecting to read it was plain to discern that he scarcely knew he held it in his hand" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.585, - "num_samples": 121360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.585, - "original_num_samples": 121360, - "transcript": "miss woodley thought it her duty to be mute and now the gingle of a tea spoon was like a deep toned bell all was so quiet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.165, - "num_samples": 162640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0019.wav", - "speed": 1 - } - ], - "original_duration": 10.165, - "original_num_samples": 162640, - "transcript": "missus horton too in the self approving reflection that she was not in a quarrel or altercation of any kind felt herself at this moment remarkably peaceful and charitable" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.27, - "num_samples": 164320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0020.wav", - "speed": 1 - } - ], - "original_duration": 10.27, - "original_num_samples": 164320, - "transcript": "miss woodley did not recollect herself so but was so in reality in her peace and charity were instinctive virtues accident could not increase them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.9, - "num_samples": 222400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0021.wav", - "speed": 1 - } - ], - "original_duration": 13.9, - "original_num_samples": 222400, - "transcript": "he coughed drank his tea endeavoured to talk but found it difficult sometimes read and in this manner near two hours were passed away when miss milner came into the room not dressed for a ball but as she had risen from dinner" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.115, - "num_samples": 97840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.115, - "original_num_samples": 97840, - "transcript": "dorriforth read on and seemed afraid of looking up lest he should see what he could not have pardoned" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.545, - "num_samples": 200720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0023.wav", - "speed": 1 - } - ], - "original_duration": 12.545, - "original_num_samples": 200720, - "transcript": "after a few minutes pause and some little embarrassment on the part of missus horton at the disappointment she had to encounter from this unexpected dutiful conduct she asked miss milner if she would now have any tea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.35, - "num_samples": 101600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0024.wav", - "speed": 1 - } - ], - "original_duration": 6.35, - "original_num_samples": 101600, - "transcript": "dorriforth then laid the book out of his hand and by the time the servant had left the room thus began" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.015, - "num_samples": 80240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0025.wav", - "speed": 1 - } - ], - "original_duration": 5.015, - "original_num_samples": 80240, - "transcript": "it is often the ungrateful task of a friend to be troublesome sometimes unmannerly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.825, - "num_samples": 141200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0026.wav", - "speed": 1 - } - ], - "original_duration": 8.825, - "original_num_samples": 141200, - "transcript": "forgive the duties of my office and believe that no one is half so much concerned if it robs you of any degree of happiness as i myself am" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.315, - "num_samples": 165040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0027.wav", - "speed": 1 - } - ], - "original_duration": 10.315, - "original_num_samples": 165040, - "transcript": "what he said he looked with so much sincerity that had she been burning with rage at his late behaviour she must have forgiven him for the regret which he so forcibly exprest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.345, - "num_samples": 133520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0028.wav", - "speed": 1 - } - ], - "original_duration": 8.345, - "original_num_samples": 133520, - "transcript": "she was going to reply but found she could not without accompanying her words with tears therefore after the first attempt she desisted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.455, - "num_samples": 119280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0029.wav", - "speed": 1 - } - ], - "original_duration": 7.455, - "original_num_samples": 119280, - "transcript": "on this he rose from his chair and going to her said once more shew your submission by obeying me a second time to day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.475, - "num_samples": 135600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3536/23268/3536-23268-0030.wav", - "speed": 1 - } - ], - "original_duration": 8.475, - "original_num_samples": 135600, - "transcript": "keep your appointment and be assured that i shall issue my commands with more circumspection for the future as i find how strictly they are complied with" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.14, - "num_samples": 226240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0000.wav", - "speed": 1 - } - ], - "original_duration": 14.14, - "original_num_samples": 226240, - "transcript": "master and man dismounted from their beasts and as soon as they had settled themselves at the foot of the trees sancho who had had a good noontide meal that day let himself without more ado pass the gates of sleep" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.78, - "num_samples": 172480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.78, - "original_num_samples": 172480, - "transcript": "but don quixote whom his thoughts far more than hunger kept awake could not close an eye and roamed in fancy to and fro through all sorts of places" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.33, - "num_samples": 357280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0002.wav", - "speed": 1 - } - ], - "original_duration": 22.33, - "original_num_samples": 357280, - "transcript": "at one moment it seemed to him that he was in the cave of montesinos and saw dulcinea transformed into a country wench skipping and mounting upon her she ass again that the words of the sage merlin were sounding in his ears setting forth the conditions to be observed and the exertions to be made for the disenchantment of dulcinea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.51, - "num_samples": 40160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0003.wav", - "speed": 1 - } - ], - "original_duration": 2.51, - "original_num_samples": 40160, - "transcript": "who is touching me and untrussing me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.56, - "num_samples": 264960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0004.wav", - "speed": 1 - } - ], - "original_duration": 16.56, - "original_num_samples": 264960, - "transcript": "dulcinea is perishing thou art living on regardless i am dying of hope deferred therefore untruss thyself with a good will for mine it is here in this retired spot to give thee at least two thousand lashes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.655, - "num_samples": 266480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0005.wav", - "speed": 1 - } - ], - "original_duration": 16.655, - "original_num_samples": 266480, - "transcript": "seeing this sancho got up and grappling with his master he gripped him with all his might in his arms giving him a trip with the heel stretched him on the ground on his back and pressing his right knee on his chest held his hands in his own so that he could neither move nor breathe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.945, - "num_samples": 63120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.945, - "original_num_samples": 63120, - "transcript": "how now traitor exclaimed don quixote" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.55, - "num_samples": 56800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.55, - "original_num_samples": 56800, - "transcript": "dost thou revolt against thy master and natural lord" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.86, - "num_samples": 61760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.86, - "original_num_samples": 61760, - "transcript": "dost thou rise against him who gives thee his bread" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.485, - "num_samples": 231760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0009.wav", - "speed": 1 - } - ], - "original_duration": 14.485, - "original_num_samples": 231760, - "transcript": "don quixote gave his promise and swore by the life of his thoughts not to touch so much as a hair of his garments and to leave him entirely free and to his own discretion to whip himself whenever he pleased" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.735, - "num_samples": 251760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0010.wav", - "speed": 1 - } - ], - "original_duration": 15.735, - "original_num_samples": 251760, - "transcript": "sancho rose and removed some distance from the spot but as he was about to place himself leaning against another tree he felt something touch his head and putting up his hands encountered somebody's two feet with shoes and stockings on them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.28, - "num_samples": 180480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0011.wav", - "speed": 1 - } - ], - "original_duration": 11.28, - "original_num_samples": 180480, - "transcript": "he trembled with fear and made for another tree where the very same thing happened to him and he fell a shouting calling upon don quixote to come and protect him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.48, - "num_samples": 87680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.48, - "original_num_samples": 87680, - "transcript": "don quixote did so and asked him what had happened to him and what he was afraid of" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.715, - "num_samples": 75440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.715, - "original_num_samples": 75440, - "transcript": "sancho replied that all the trees were full of men's feet and legs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.13, - "num_samples": 290080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0014.wav", - "speed": 1 - } - ], - "original_duration": 18.13, - "original_num_samples": 290080, - "transcript": "don quixote was on foot with his horse unbridled and his lance leaning against a tree and in short completely defenceless he thought it best therefore to fold his arms and bow his head and reserve himself for a more favourable occasion and opportunity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.185, - "num_samples": 146960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0015.wav", - "speed": 1 - } - ], - "original_duration": 9.185, - "original_num_samples": 146960, - "transcript": "he was mounted upon a powerful horse and had on a coat of mail with four of the pistols they call petronels in that country at his waist" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.94, - "num_samples": 191040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0016.wav", - "speed": 1 - } - ], - "original_duration": 11.94, - "original_num_samples": 191040, - "transcript": "he saw that his squires for so they call those who follow that trade were about to rifle sancho panza but he ordered them to desist and was at once obeyed so the girdle escaped" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.325, - "num_samples": 277200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0017.wav", - "speed": 1 - } - ], - "original_duration": 17.325, - "original_num_samples": 277200, - "transcript": "he saw me he paid court to me i listened to him and unknown to my father i loved him for there is no woman however secluded she may live or close she may be kept who will not have opportunities and to spare for following her headlong impulses" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.32, - "num_samples": 117120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.32, - "original_num_samples": 117120, - "transcript": "in a word he pledged himself to be mine and i promised to be his without carrying matters any further" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.935, - "num_samples": 158960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0019.wav", - "speed": 1 - } - ], - "original_duration": 9.935, - "original_num_samples": 158960, - "transcript": "give me my horse and arms and wait for me here i will go in quest of this knight and dead or alive i will make him keep his word plighted to so great beauty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.52, - "num_samples": 392320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0020.wav", - "speed": 1 - } - ], - "original_duration": 24.52, - "original_num_samples": 392320, - "transcript": "nobody need have any doubt about that said sancho for my master has a very happy knack of matchmaking it's not many days since he forced another man to marry who in the same way backed out of his promise to another maiden and if it had not been for his persecutors the enchanters changing the man's proper shape into a lacquey's the said maiden would not be one this minute" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.18, - "num_samples": 98880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0021.wav", - "speed": 1 - } - ], - "original_duration": 6.18, - "original_num_samples": 98880, - "transcript": "they made haste to overtake them which as the party moved slowly they were able to do with ease" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 26.9, - "num_samples": 430400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0022.wav", - "speed": 1 - } - ], - "original_duration": 26.9, - "original_num_samples": 430400, - "transcript": "the wounded gentleman opened his all but closed eyes and recognising claudia said i see clearly fair and mistaken lady that it is thou that hast slain me a punishment not merited or deserved by my feelings towards thee for never did i mean to nor could i wrong thee in thought or deed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.07, - "num_samples": 177120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0023.wav", - "speed": 1 - } - ], - "original_duration": 11.07, - "original_num_samples": 177120, - "transcript": "it is not true then said claudia that thou wert going this morning to marry leonora the daughter of the rich balvastro" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.99, - "num_samples": 367840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0024.wav", - "speed": 1 - } - ], - "original_duration": 22.99, - "original_num_samples": 367840, - "transcript": "on perceiving this claudia when she had convinced herself that her beloved husband was no more rent the air with her sighs and made the heavens ring with her lamentations she tore her hair and scattered it to the winds she beat her face with her hands and showed all the signs of grief and sorrow that could be conceived to come from an afflicted heart" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.525, - "num_samples": 120400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0025.wav", - "speed": 1 - } - ], - "original_duration": 7.525, - "original_num_samples": 120400, - "transcript": "cruel reckless woman she cried how easily wert thou moved to carry out a thought so wicked" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.13, - "num_samples": 130080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0026.wav", - "speed": 1 - } - ], - "original_duration": 8.13, - "original_num_samples": 130080, - "transcript": "o husband whose unhappy fate in being mine hath borne thee from the marriage bed to the grave" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.395, - "num_samples": 166320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0027.wav", - "speed": 1 - } - ], - "original_duration": 10.395, - "original_num_samples": 166320, - "transcript": "the servants wept claudia swooned away again and again and the whole place seemed a field of sorrow and an abode of misfortune" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.46, - "num_samples": 167360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0028.wav", - "speed": 1 - } - ], - "original_duration": 10.46, - "original_num_samples": 167360, - "transcript": "claudia told him she meant to go to a monastery of which an aunt of hers was abbess where she intended to pass her life with a better and everlasting spouse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.06, - "num_samples": 160960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0029.wav", - "speed": 1 - } - ], - "original_duration": 10.06, - "original_num_samples": 160960, - "transcript": "claudia would not on any account allow him to accompany her and thanking him for his offers as well as she could took leave of him in tears" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.905, - "num_samples": 110480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0030.wav", - "speed": 1 - } - ], - "original_duration": 6.905, - "original_num_samples": 110480, - "transcript": "sancho said they had but that three kerchiefs that were worth three cities were missing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.814938, - "num_samples": 45039, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0031.wav", - "speed": 1 - } - ], - "original_duration": 2.814938, - "original_num_samples": 45039, - "transcript": "what are you talking about man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.585063, - "num_samples": 89361, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0032.wav", - "speed": 1 - } - ], - "original_duration": 5.585063, - "original_num_samples": 89361, - "transcript": "said one of the bystanders i have got them and they are not worth three reals" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.78, - "num_samples": 284480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0033.wav", - "speed": 1 - } - ], - "original_duration": 17.78, - "original_num_samples": 284480, - "transcript": "at this instant one or two of those squires who were posted as sentinels on the roads to watch who came along them and report what passed to their chief came up and said senor there is a great troop of people not far off coming along the road to barcelona" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.065, - "num_samples": 129040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0034.wav", - "speed": 1 - } - ], - "original_duration": 8.065, - "original_num_samples": 129040, - "transcript": "what led me into it was a certain thirst for vengeance which is strong enough to disturb the quietest hearts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.725, - "num_samples": 331600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0035.wav", - "speed": 1 - } - ], - "original_duration": 20.725, - "original_num_samples": 331600, - "transcript": "and if you have any desire to shorten the journey and put yourself easily in the way of salvation come with me and i will show you how to become a knight errant a calling wherein so many hardships and mishaps are encountered that if they be taken as penances they will lodge you in heaven in a trice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.29, - "num_samples": 308640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0036.wav", - "speed": 1 - } - ], - "original_duration": 19.29, - "original_num_samples": 308640, - "transcript": "and now the squires despatched to make the prize came up bringing with them two gentlemen on horseback two pilgrims on foot and a coach full of women with some six servants on foot and on horseback in attendance on them and a couple of muleteers whom the gentlemen had with them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.36, - "num_samples": 165760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0037.wav", - "speed": 1 - } - ], - "original_duration": 10.36, - "original_num_samples": 165760, - "transcript": "the captains showed plainly the concern they felt the regent's lady was downcast and the pilgrims did not at all enjoy seeing their property confiscated" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.65, - "num_samples": 154400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0038.wav", - "speed": 1 - } - ], - "original_duration": 9.65, - "original_num_samples": 154400, - "transcript": "the regent's lady ordered one of her servants to give the eighty crowns that had been assessed as her share at once for the captains had already paid down their sixty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.15, - "num_samples": 226400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0039.wav", - "speed": 1 - } - ], - "original_duration": 14.15, - "original_num_samples": 226400, - "transcript": "one of the squires observed in his mixture of gascon and catalan this captain of ours would make a better friar than highwayman if he wants to be so generous another time let it be with his own property and not ours" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.69, - "num_samples": 107040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3576/138058/3576-138058-0040.wav", - "speed": 1 - } - ], - "original_duration": 6.69, - "original_num_samples": 107040, - "transcript": "they were all taken aback and not one of them dared to utter a word such deference did they pay him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.27, - "num_samples": 148320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.27, - "original_num_samples": 148320, - "transcript": "but this power of discernment was denied them and only in after years with the loved ones of their own firesides close about them was the whole picture revealed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.105, - "num_samples": 129680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.105, - "original_num_samples": 129680, - "transcript": "one dark night at the head of a score of his tribe he fell upon wabigoon's camp his object being the abduction of the princess" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.195, - "num_samples": 67120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.195, - "original_num_samples": 67120, - "transcript": "while the attack was successful in a way its main purpose failed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.485, - "num_samples": 87760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.485, - "original_num_samples": 87760, - "transcript": "a counter attack was made upon woonga and he was driven deep into the wilderness with great loss" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.845, - "num_samples": 77520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.845, - "original_num_samples": 77520, - "transcript": "three days later minnetaki became newsome's wife at the hudson bay post" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.81, - "num_samples": 172960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.81, - "original_num_samples": 172960, - "transcript": "from that hour dated one of the most sanguinary feuds in the history of the great trading company a feud which as we shall see was destined to live even unto the second generation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.795, - "num_samples": 92720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.795, - "original_num_samples": 92720, - "transcript": "meanwhile two children came to bless the happy union of newsome and his lovely indian wife" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.115, - "num_samples": 81840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.115, - "original_num_samples": 81840, - "transcript": "the other was a girl three years younger and newsome insisted that she be called minnetaki" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.8, - "num_samples": 284800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0008.wav", - "speed": 1 - } - ], - "original_duration": 17.8, - "original_num_samples": 284800, - "transcript": "curiously enough the blood of wabi ran almost pure to his indian forefathers while minnetaki as she became older developed less of the wild beauty of her mother and more of the softer loveliness of the white race her wealth of soft jet black hair and her great dark eyes contrasting with the lighter skin of her father's blood" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.145, - "num_samples": 178320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0009.wav", - "speed": 1 - } - ], - "original_duration": 11.145, - "original_num_samples": 178320, - "transcript": "wabi on the other hand was an indian in appearance from his moccasins to the crown of his head swarthy sinewy as agile as a lynx and with every instinct in him crying for the life of the wild" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.4, - "num_samples": 150400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0010.wav", - "speed": 1 - } - ], - "original_duration": 9.4, - "original_num_samples": 150400, - "transcript": "one of newsome's chief pleasures in life had been the educating of his woodland bride and it was the ambition of both that the little minnetaki and her brother be reared in the ways of white children" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.725, - "num_samples": 171600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.725, - "original_num_samples": 171600, - "transcript": "consequently both mother and father began their education at the post they were sent to the factor's school and two winters were passed in port arthur that they might have the advantage of thoroughly equipped schools" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.63, - "num_samples": 170080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0012.wav", - "speed": 1 - } - ], - "original_duration": 10.63, - "original_num_samples": 170080, - "transcript": "the children proved themselves unusually bright pupils and by the time wabi was sixteen and minnetaki twelve one would not have known from their manner of speech that indian blood ran in their veins" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.18, - "num_samples": 82880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.18, - "original_num_samples": 82880, - "transcript": "it was at about this time in their lives that the woongas became especially daring in their depredations" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.695, - "num_samples": 139120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.695, - "original_num_samples": 139120, - "transcript": "at last so daring did he become that the provincial government placed a price upon his head and upon those of a number of his most notorious followers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.62, - "num_samples": 73920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.62, - "original_num_samples": 73920, - "transcript": "but each week added to his loneliness and his longings for minnetaki and his forests" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.05, - "num_samples": 80800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.05, - "original_num_samples": 80800, - "transcript": "necessity had become his grim master and the following week he was going to work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.555, - "num_samples": 104880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.555, - "original_num_samples": 104880, - "transcript": "a thousand plans were made a thousand adventures pictured and the mother would smile and laugh and plan with them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.325, - "num_samples": 117200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.325, - "original_num_samples": 117200, - "transcript": "but in time the end of it all came and wabi went back to the princess mother to minnetaki and to his forests" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.315, - "num_samples": 101040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.315, - "original_num_samples": 101040, - "transcript": "there were tears in the boys eyes when they parted and the mother cried for the indian boy who was returning to his people" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.905, - "num_samples": 46480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0020.wav", - "speed": 1 - } - ], - "original_duration": 2.905, - "original_num_samples": 46480, - "transcript": "spring came and passed and then summer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.16, - "num_samples": 66560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.16, - "original_num_samples": 66560, - "transcript": "we shall make more money up here this winter than you could earn in detroit in three years" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.32, - "num_samples": 101120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.32, - "original_num_samples": 101120, - "transcript": "we will hunt wolves the country is alive with them and the government gives a bounty of fifteen dollars for every scalp taken" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.665, - "num_samples": 42640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0023.wav", - "speed": 1 - } - ], - "original_duration": 2.665, - "original_num_samples": 42640, - "transcript": "three weeks later came wabigoon's reply" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.175, - "num_samples": 66800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0024.wav", - "speed": 1 - } - ], - "original_duration": 4.175, - "original_num_samples": 66800, - "transcript": "on the tenth of october he would meet rod at sprucewood on the black sturgeon river" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.305, - "num_samples": 164880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/66616/6241-66616-0025.wav", - "speed": 1 - } - ], - "original_duration": 10.305, - "original_num_samples": 164880, - "transcript": "there was little time to lose in making preparations and the fourth day following the receipt of wabi's letter found rod and his mother waiting for the train which was to whirl the boy into his new life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.235, - "num_samples": 99760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.235, - "original_num_samples": 99760, - "transcript": "i began to enjoy the exhilarating delight of traveling a life of desire gratification and liberty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.05, - "num_samples": 96800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.05, - "original_num_samples": 96800, - "transcript": "hans our extraordinary guide went first walking with a steady rapid unvarying step" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.485, - "num_samples": 87760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0002.wav", - "speed": 1 - } - ], - "original_duration": 5.485, - "original_num_samples": 87760, - "transcript": "our two horses with the luggage followed of their own accord without requiring whip or spur" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.44, - "num_samples": 135040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.44, - "original_num_samples": 135040, - "transcript": "geographers have divided it into four parts and we had to cross the southwest quarter which in the vernacular is called sudvestr fjordungr" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.515, - "num_samples": 88240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.515, - "original_num_samples": 88240, - "transcript": "we took our way through poor and sparse meadows which made a desperate effort every year to show a little green" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.95, - "num_samples": 47200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.95, - "original_num_samples": 47200, - "transcript": "they very rarely succeed in a good show of yellow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.93, - "num_samples": 126880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.93, - "original_num_samples": 126880, - "transcript": "i could not help smiling to see him look so big on his little horse his long legs now and then touching the ground made him look like a six footed centaur" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.02, - "num_samples": 96320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.02, - "original_num_samples": 96320, - "transcript": "snow tempest impracticable roads rocks icebergs nothing stops him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.285, - "num_samples": 68560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.285, - "original_num_samples": 68560, - "transcript": "we may do so was my reply but what about our worthy guide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "i should have a violent attack of the cramp if i were not to have some sort of exercise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.21, - "num_samples": 51360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.21, - "original_num_samples": 51360, - "transcript": "my arms are right but my legs are getting a little stiff" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.105063, - "num_samples": 161681, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0011.wav", - "speed": 1 - } - ], - "original_duration": 10.105063, - "original_num_samples": 161681, - "transcript": "here and there could be seen an isolated farm some solitary bur or icelandic house built of wood earth fragments of lava looking like beggars on the highway of life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.975, - "num_samples": 47600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0012.wav", - "speed": 1 - } - ], - "original_duration": 2.975, - "original_num_samples": 47600, - "transcript": "a few stray cows and sheep were only seen occasionally" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.62, - "num_samples": 153920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0013.wav", - "speed": 1 - } - ], - "original_duration": 9.62, - "original_num_samples": 153920, - "transcript": "little did i expect however the spectacle which awaited us when we reached the peninsula of sneffels where agglomerations of nature's ruins form a kind of terrible chaos" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.345, - "num_samples": 85520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.345, - "original_num_samples": 85520, - "transcript": "it consists simply of a few houses not what in england or germany we should call a hamlet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.97, - "num_samples": 63520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0015.wav", - "speed": 1 - } - ], - "original_duration": 3.97, - "original_num_samples": 63520, - "transcript": "i took occasion to consult the map to see where gardar was to be found" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.33, - "num_samples": 101280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0016.wav", - "speed": 1 - } - ], - "original_duration": 6.33, - "original_num_samples": 101280, - "transcript": "these sacred edifices are however very much like these people who do without watches and never miss them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.455, - "num_samples": 71280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0017.wav", - "speed": 1 - } - ], - "original_duration": 4.455, - "original_num_samples": 71280, - "transcript": "to ride over salt water upon the back of a little horse seemed to me absurd" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.905, - "num_samples": 62480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0018.wav", - "speed": 1 - } - ], - "original_duration": 3.905, - "original_num_samples": 62480, - "transcript": "in any case i shall trust rather to my own intelligence than theirs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.27, - "num_samples": 36320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.27, - "original_num_samples": 36320, - "transcript": "but my uncle was in no humor to wait" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.955, - "num_samples": 191280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0020.wav", - "speed": 1 - } - ], - "original_duration": 11.955, - "original_num_samples": 191280, - "transcript": "at length the sturdy little pony spreading out his legs in a stiff and ludicrous attitude got from under the professor's legs and left him standing with both feet on a separate stone like the colossus of rhodes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.455, - "num_samples": 71280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.455, - "original_num_samples": 71280, - "transcript": "he says tide replied my uncle translating the danish word for my information" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.275, - "num_samples": 132400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0022.wav", - "speed": 1 - } - ], - "original_duration": 8.275, - "original_num_samples": 132400, - "transcript": "i thoroughly understood and appreciated the necessity for waiting before crossing the fjord for that moment when the sea at its highest point is in a state of slack water" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.585, - "num_samples": 105360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61946/6241-61946-0023.wav", - "speed": 1 - } - ], - "original_duration": 6.585, - "original_num_samples": 105360, - "transcript": "accustomed as i had been to the steam ferry boats of the elbe i found the long oars of the boatmen but sorry means of locomotion" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.95, - "num_samples": 111200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.95, - "original_num_samples": 111200, - "transcript": "on the second of the month at two in the morning our precious cargo of luggage was taken on board the good ship valkyrie" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.22, - "num_samples": 51520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0001.wav", - "speed": 1 - } - ], - "original_duration": 3.22, - "original_num_samples": 51520, - "transcript": "but in the cause of science men are expected to suffer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.735, - "num_samples": 43760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0002.wav", - "speed": 1 - } - ], - "original_duration": 2.735, - "original_num_samples": 43760, - "transcript": "well and have we a fair wind" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.825, - "num_samples": 125200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.825, - "original_num_samples": 125200, - "transcript": "my uncle was delighted for myself moody and dissatisfied i appeared almost to expect a glimpse of the ghost of hamlet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.41, - "num_samples": 54560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0004.wav", - "speed": 1 - } - ], - "original_duration": 3.41, - "original_num_samples": 54560, - "transcript": "but no ghost or anything else appeared upon the ancient walls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.9, - "num_samples": 78400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.9, - "original_num_samples": 78400, - "transcript": "the fact is the castle is much later than the time of the heroic prince of denmark" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.83, - "num_samples": 61280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0006.wav", - "speed": 1 - } - ], - "original_duration": 3.83, - "original_num_samples": 61280, - "transcript": "no mister hardwigg said the captain no fear of that" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.64, - "num_samples": 42240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0007.wav", - "speed": 1 - } - ], - "original_duration": 2.64, - "original_num_samples": 42240, - "transcript": "at all events we shall get there some day" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.110062, - "num_samples": 129761, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.110062, - "original_num_samples": 129761, - "transcript": "on the eleventh day we sighted cape portland over which towered mount myrdals yokul which the weather being clear we made out very readily" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.845, - "num_samples": 61520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.845, - "original_num_samples": 61520, - "transcript": "the valkyrie kept off the coast steering to the westward" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.53, - "num_samples": 72480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0010.wav", - "speed": 1 - } - ], - "original_duration": 4.53, - "original_num_samples": 72480, - "transcript": "on all sides were to be seen whole schools of whales and sharks" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.9, - "num_samples": 62400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.9, - "original_num_samples": 62400, - "transcript": "nearly the whole population of the town was on foot to see us land" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.86, - "num_samples": 93760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.86, - "original_num_samples": 93760, - "transcript": "the fact was that scarcely any one of them but expected some goods by the periodical vessel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.97, - "num_samples": 127520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0013.wav", - "speed": 1 - } - ], - "original_duration": 7.97, - "original_num_samples": 127520, - "transcript": "then without further remark he put his finger to his lips frowned darkly and descended into the small boat which awaited us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.74, - "num_samples": 107840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0014.wav", - "speed": 1 - } - ], - "original_duration": 6.74, - "original_num_samples": 107840, - "transcript": "he was however but a civil servant a magistrate the governor of the island baron trampe" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.49, - "num_samples": 39840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.49, - "original_num_samples": 39840, - "transcript": "the professor knew whom he had to deal with" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.665, - "num_samples": 74640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0016.wav", - "speed": 1 - } - ], - "original_duration": 4.665, - "original_num_samples": 74640, - "transcript": "this modest scholar spoke no languages save icelandic and latin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.09, - "num_samples": 97440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0017.wav", - "speed": 1 - } - ], - "original_duration": 6.09, - "original_num_samples": 97440, - "transcript": "when therefore he addressed himself to me in the language of horace we at once came to understand one another" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.19, - "num_samples": 115040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.19, - "original_num_samples": 115040, - "transcript": "now harry said my uncle rubbing his hands an goes well the worse difficulty is now over" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.905, - "num_samples": 46480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.905, - "original_num_samples": 46480, - "transcript": "in the meantime there is not an hour to lose" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.395, - "num_samples": 70320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.395, - "original_num_samples": 70320, - "transcript": "very likely i may find there some manuscripts from the hand of saknussemm" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.52, - "num_samples": 40320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0021.wav", - "speed": 1 - } - ], - "original_duration": 2.52, - "original_num_samples": 40320, - "transcript": "i shall be glad to consult them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.02, - "num_samples": 48320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0022.wav", - "speed": 1 - } - ], - "original_duration": 3.02, - "original_num_samples": 48320, - "transcript": "they were now however absent on duty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.355, - "num_samples": 69680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.355, - "original_num_samples": 69680, - "transcript": "though not very large it appeared not likely to be filled for centuries" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.36, - "num_samples": 117760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0024.wav", - "speed": 1 - } - ], - "original_duration": 7.36, - "original_num_samples": 117760, - "transcript": "i have not the slightest doubt that in high winds its red tiles were blown out to the great annoyance of the pastor and congregation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.26, - "num_samples": 100160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0025.wav", - "speed": 1 - } - ], - "original_duration": 6.26, - "original_num_samples": 100160, - "transcript": "thanks to the heat of these residences grass grows on the roof which grass is carefully cut for hay" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.105, - "num_samples": 145680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0026.wav", - "speed": 1 - } - ], - "original_duration": 9.105, - "original_num_samples": 145680, - "transcript": "i saw but few inhabitants during my excursion but i met a crowd on the beach drying salting and loading codfish the principal article of exportation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.87, - "num_samples": 285920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/6241/61943/6241-61943-0027.wav", - "speed": 1 - } - ], - "original_duration": 17.87, - "original_num_samples": 285920, - "transcript": "the men appeared robust but heavy fair haired like germans but of pensive mien exiles of a higher scale in the ladder of humanity than the eskimos but i thought much more unhappy since with superior perceptions they are compelled to live within the limits of the polar circle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.195, - "num_samples": 115120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0000.wav", - "speed": 1 - } - ], - "original_duration": 7.195, - "original_num_samples": 115120, - "transcript": "san francisco's care free spirit was fully exemplified before the ashes of the great fire of nineteen o six were cold" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.855, - "num_samples": 173680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0001.wav", - "speed": 1 - } - ], - "original_duration": 10.855, - "original_num_samples": 173680, - "transcript": "thompson opened a large restaurant in o'farrell street just above fillmore and for two years or more did a thriving business his place being noted for its good cooking and its splendid service" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.885, - "num_samples": 222160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0002.wav", - "speed": 1 - } - ], - "original_duration": 13.885, - "original_num_samples": 222160, - "transcript": "one of his waiters phil tyson was one of the earlier ones to go back into the burned district to begin business and he opened a restaurant called the del monte in powell street near market but it was too early for success and closed after a short career" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.365, - "num_samples": 101840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.365, - "original_num_samples": 101840, - "transcript": "here as well as in a number of other places one can well appreciate the colloquial definition of cabaret" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.085, - "num_samples": 241360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0004.wav", - "speed": 1 - } - ], - "original_duration": 15.085, - "original_num_samples": 241360, - "transcript": "here there is always good music and food well cooked and well served and always a lively crowd during the luncheon dinner and after theatre hours the room is not large but its dimensions are greatly magnified owing to the covering of mirrors which line the walls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.445, - "num_samples": 167120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.445, - "original_num_samples": 167120, - "transcript": "this garish display of mirrors and elaborate decoration of ceiling and pillars gives it the appearance of the abode of saturnalia but decorum is the rule among the patrons" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.405, - "num_samples": 118480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0006.wav", - "speed": 1 - } - ], - "original_duration": 7.405, - "original_num_samples": 118480, - "transcript": "john tait is the presiding spirit here he having made reputation as club manager and then as manager of the cliff house" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.85, - "num_samples": 77600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.85, - "original_num_samples": 77600, - "transcript": "the poodle dog has a hotel attachment where one may get rooms or full apartments" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.305, - "num_samples": 148880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.305, - "original_num_samples": 148880, - "transcript": "if you know how to order and do not care to count the cost when you order probably the best dinner at these restaurants can be had at either blanco's or the poodle dog" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.835, - "num_samples": 61360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0009.wav", - "speed": 1 - } - ], - "original_duration": 3.835, - "original_num_samples": 61360, - "transcript": "the cuisine is of the best and the chefs rank at the top of their art" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.85, - "num_samples": 109600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.85, - "original_num_samples": 109600, - "transcript": "at the two mentioned one pays for the surroundings as well as for the food and sometimes this is worth paying for" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.925, - "num_samples": 302800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0011.wav", - "speed": 1 - } - ], - "original_duration": 18.925, - "original_num_samples": 302800, - "transcript": "the restaurants of the present day that approach nearest the old bohemian restaurants of pre fire days of the french class are jack's in sacramento street between montgomery and kearny felix in montgomery street between clay and washington and the poodle dog bergez franks in bush street between kearny and grant avenue" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.8, - "num_samples": 92800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.8, - "original_num_samples": 92800, - "transcript": "in either of these restaurants you will be served with the best the market affords cooked the right way" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.335, - "num_samples": 85360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.335, - "original_num_samples": 85360, - "transcript": "in this same district is the mint in commercial street between montgomery and kearny streets" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.3, - "num_samples": 84800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0014.wav", - "speed": 1 - } - ], - "original_duration": 5.3, - "original_num_samples": 84800, - "transcript": "it has changed from what it was in the old days but is still an excellent place to dine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.35, - "num_samples": 149600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0015.wav", - "speed": 1 - } - ], - "original_duration": 9.35, - "original_num_samples": 149600, - "transcript": "it is an idea that is worth while but unfortunately the proprietors depend too much on the decorative feature and too little on the food and how they serve it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.41, - "num_samples": 166560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0016.wav", - "speed": 1 - } - ], - "original_duration": 10.41, - "original_num_samples": 166560, - "transcript": "the fly trap and charlie's fashion the first in sutter street near kearny and the other in market near sutter serve well cooked foods especially soup salads and fish" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.855, - "num_samples": 93680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0017.wav", - "speed": 1 - } - ], - "original_duration": 5.855, - "original_num_samples": 93680, - "transcript": "of course these are not the entire menus but of all the well prepared dishes these are their best" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.26, - "num_samples": 52160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0018.wav", - "speed": 1 - } - ], - "original_duration": 3.26, - "original_num_samples": 52160, - "transcript": "both serve good spanish dinners at reasonable prices" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.105, - "num_samples": 97680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0019.wav", - "speed": 1 - } - ], - "original_duration": 6.105, - "original_num_samples": 97680, - "transcript": "his prices are moderate and his cooking and viands of the best and will satisfy the most critical of the gourmets" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.68, - "num_samples": 106880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.68, - "original_num_samples": 106880, - "transcript": "at the corner of market and eddy streets is the odeon down in a basement with decorations of most garish order" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.385, - "num_samples": 182160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0021.wav", - "speed": 1 - } - ], - "original_duration": 11.385, - "original_num_samples": 182160, - "transcript": "one can almost imagine himself in one of the famous rathskellers of old heidelberg not at the schloss of course for here you cannot look down on the weiser as it flows beneath the windows of the great wine stube on the hill" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.365, - "num_samples": 197840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0022.wav", - "speed": 1 - } - ], - "original_duration": 12.365, - "original_num_samples": 197840, - "transcript": "but if you really love good music music that has melody and rhythm and soothing cadences go to the heidelberg inn and listen to the concert which is a feature of the place every evening" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.75, - "num_samples": 108000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0023.wav", - "speed": 1 - } - ], - "original_duration": 6.75, - "original_num_samples": 108000, - "transcript": "we finally got him to select the one prized above all others and this is what chef scheiler gave us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.735, - "num_samples": 171760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0024.wav", - "speed": 1 - } - ], - "original_duration": 10.735, - "original_num_samples": 171760, - "transcript": "to the pickle add two large onions cut in quarters two fresh carrots and about one ounce of mixed whole allspice black peppers cloves and bay leaves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.63, - "num_samples": 42080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0025.wav", - "speed": 1 - } - ], - "original_duration": 2.63, - "original_num_samples": 42080, - "transcript": "put in the oven and brown to a golden color" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.135, - "num_samples": 98160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0026.wav", - "speed": 1 - } - ], - "original_duration": 6.135, - "original_num_samples": 98160, - "transcript": "then take it out of the roasting pan and put it into a casserole after sprinkling it with two ounces of flour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.235, - "num_samples": 83760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0027.wav", - "speed": 1 - } - ], - "original_duration": 5.235, - "original_num_samples": 83760, - "transcript": "put into the oven again and cook for half an hour basting frequently with the original brine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.02, - "num_samples": 48320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.02, - "original_num_samples": 48320, - "transcript": "when done take the meat out of the sauce" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.775, - "num_samples": 124400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0029.wav", - "speed": 1 - } - ], - "original_duration": 7.775, - "original_num_samples": 124400, - "transcript": "strain the sauce through a fine collander and add a few raisins a piece of honey cake or ginger snaps and the meat of one fresh tomato" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.845, - "num_samples": 45520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.845, - "original_num_samples": 45520, - "transcript": "season with salt and pepper and a little sugar to taste" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.745, - "num_samples": 91920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0031.wav", - "speed": 1 - } - ], - "original_duration": 5.745, - "original_num_samples": 91920, - "transcript": "the hof brau however is less distinctively german as the greater number of its patrons are americans" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.99, - "num_samples": 95840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0032.wav", - "speed": 1 - } - ], - "original_duration": 5.99, - "original_num_samples": 95840, - "transcript": "the specialty of the hof brau is abalone's and they have as a feature this shell fish cooked in several ways" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.8, - "num_samples": 172800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0033.wav", - "speed": 1 - } - ], - "original_duration": 10.8, - "original_num_samples": 172800, - "transcript": "they also have as the chef in charge of the abalone dishes herbert formerly chef for one of the yacht clubs of the coast who claims to have the only proper recipe for making abalone's tender" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.155, - "num_samples": 178480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0034.wav", - "speed": 1 - } - ], - "original_duration": 11.155, - "original_num_samples": 178480, - "transcript": "under ordinary circumstances the abalone is tough and unpalatable but after the deft manipulation of herbert they are tender and make a fine dish either fried as chowder or a la newberg" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.09, - "num_samples": 81440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130726/652-130726-0035.wav", - "speed": 1 - } - ], - "original_duration": 5.09, - "original_num_samples": 81440, - "transcript": "in addition to abalone's the hof brau makes a specialty of little oregon crawfish" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.99, - "num_samples": 159840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.99, - "original_num_samples": 159840, - "transcript": "never drink any hard liquors such as whisky brandy gin or cocktails with oysters or clams as it is liable to upset you for the rest of the evening" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.24, - "num_samples": 99840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0001.wav", - "speed": 1 - } - ], - "original_duration": 6.24, - "original_num_samples": 99840, - "transcript": "with soup and fish serve white wines such as rhein wine sauterne or white burgundy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.57, - "num_samples": 121120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.57, - "original_num_samples": 121120, - "transcript": "with entrees serve clarets or other red wines such as swiss bordeaux hungarian or italian wines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.97, - "num_samples": 127520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.97, - "original_num_samples": 127520, - "transcript": "austrian burgundy is one of the finest wines possessing rich flavor and fine perfume other burgundies are" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.99, - "num_samples": 79840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0004.wav", - "speed": 1 - } - ], - "original_duration": 4.99, - "original_num_samples": 79840, - "transcript": "chablis a white burgundy dry and of agreeable aroma" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.135, - "num_samples": 66160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0005.wav", - "speed": 1 - } - ], - "original_duration": 4.135, - "original_num_samples": 66160, - "transcript": "clarets are valued for their flavor and for their tonic properties" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.86, - "num_samples": 77760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0006.wav", - "speed": 1 - } - ], - "original_duration": 4.86, - "original_num_samples": 77760, - "transcript": "german wines are of lighter character and are generally termed rhein wines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.05, - "num_samples": 64800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.05, - "original_num_samples": 64800, - "transcript": "hochheimer a light pleasing and wholesome wine" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.385, - "num_samples": 38160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0008.wav", - "speed": 1 - } - ], - "original_duration": 2.385, - "original_num_samples": 38160, - "transcript": "dry and of magnificent bouquet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.74, - "num_samples": 75840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.74, - "original_num_samples": 75840, - "transcript": "lacrima christi a still wine of excellent flavor and bouquet" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.925, - "num_samples": 110800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0010.wav", - "speed": 1 - } - ], - "original_duration": 6.925, - "original_num_samples": 110800, - "transcript": "sauterne is a white bordeaux a strong luscious wine the best known varieties being" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.965, - "num_samples": 63440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.965, - "original_num_samples": 63440, - "transcript": "vintage years have much to do with the quality of wines" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.8, - "num_samples": 60800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0012.wav", - "speed": 1 - } - ], - "original_duration": 3.8, - "original_num_samples": 60800, - "transcript": "rhein and moselle eighteen ninety three" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.67, - "num_samples": 74720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/130737/652-130737-0013.wav", - "speed": 1 - } - ], - "original_duration": 4.67, - "original_num_samples": 74720, - "transcript": "claret eighteen ninety eight and nineteen o four" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.025, - "num_samples": 96400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0000.wav", - "speed": 1 - } - ], - "original_duration": 6.025, - "original_num_samples": 96400, - "transcript": "asparagus salad cook the asparagus in salted water drain and chill" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.66, - "num_samples": 138560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.66, - "original_num_samples": 138560, - "transcript": "birds nest salad have ready as many crisp leaves of lettuce as may be required to make a dainty little nest for each person" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0002.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "serve with french dressing hidden under the leaves of the nest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.18, - "num_samples": 146880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0003.wav", - "speed": 1 - } - ], - "original_duration": 9.18, - "original_num_samples": 146880, - "transcript": "cabbage salad chop or shave fine half a medium size head of cabbage that has been left in cold water until crisp then drain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.12, - "num_samples": 113920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.12, - "original_num_samples": 113920, - "transcript": "add two tablespoons thick sour cream two tablespoons sugar a sprinkle of mustard and half cup of vinegar" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.015, - "num_samples": 96240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0005.wav", - "speed": 1 - } - ], - "original_duration": 6.015, - "original_num_samples": 96240, - "transcript": "beat until thoroughly mixed pour over the cabbage and toss lightly until uniformly seasoned" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.115, - "num_samples": 145840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0006.wav", - "speed": 1 - } - ], - "original_duration": 9.115, - "original_num_samples": 145840, - "transcript": "cauliflower mayonnaise take cold boiled cauliflower break into branches adding salt pepper and vinegar to season" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.885, - "num_samples": 78160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.885, - "original_num_samples": 78160, - "transcript": "surround with a garnish of cooked and diced carrots turnips green peas" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.43, - "num_samples": 54880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0008.wav", - "speed": 1 - } - ], - "original_duration": 3.43, - "original_num_samples": 54880, - "transcript": "pour mayonnaise over all chill and serve" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.97, - "num_samples": 223520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0009.wav", - "speed": 1 - } - ], - "original_duration": 13.97, - "original_num_samples": 223520, - "transcript": "celery and nut salad cut enough celery fine to measure two cups add one cup of finely shredded or shaved cabbage and one and one half cups of walnut meats broken in small pieces but not chopped" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.4, - "num_samples": 54400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0010.wav", - "speed": 1 - } - ], - "original_duration": 3.4, - "original_num_samples": 54400, - "transcript": "stir the soaked gelatin in while the cucumber is hot" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.28, - "num_samples": 52480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.28, - "original_num_samples": 52480, - "transcript": "set into a cold place to chill and become firm" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.285, - "num_samples": 148560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0012.wav", - "speed": 1 - } - ], - "original_duration": 9.285, - "original_num_samples": 148560, - "transcript": "salad two cups of apples cut into small pieces one cup celery cut into small pieces one cup english walnuts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.6, - "num_samples": 89600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.6, - "original_num_samples": 89600, - "transcript": "serve on a lettuce leaf with mayonnaise dressing made without mustard and thinned with cream" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.945, - "num_samples": 63120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0014.wav", - "speed": 1 - } - ], - "original_duration": 3.945, - "original_num_samples": 63120, - "transcript": "garnish dish that dressing is made in with a little garlic" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.805, - "num_samples": 188880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0015.wav", - "speed": 1 - } - ], - "original_duration": 11.805, - "original_num_samples": 188880, - "transcript": "put the pulp into a basin with two ounces of melted butter two tablespoonfuls of lemon juice half a pound of chestnuts boiled and grated and seasoning of salt and white pepper to taste" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.99, - "num_samples": 255840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0016.wav", - "speed": 1 - } - ], - "original_duration": 15.99, - "original_num_samples": 255840, - "transcript": "tomato baskets tomato baskets are charming accessories for holding vegetable salad chicken shrimps cold beans asparagus tips shredded celery cucumbers cut in cubes and minced peppers" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.405, - "num_samples": 54480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0017.wav", - "speed": 1 - } - ], - "original_duration": 3.405, - "original_num_samples": 54480, - "transcript": "handles of watercress may be attached to these baskets" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.48, - "num_samples": 87680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0018.wav", - "speed": 1 - } - ], - "original_duration": 5.48, - "original_num_samples": 87680, - "transcript": "this dressing should stand in the ice box four or five hours to become seasoned" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.785, - "num_samples": 44560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0019.wav", - "speed": 1 - } - ], - "original_duration": 2.785, - "original_num_samples": 44560, - "transcript": "when thickened strain and cool" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.505, - "num_samples": 72080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/652/129742/652-129742-0020.wav", - "speed": 1 - } - ], - "original_duration": 4.505, - "original_num_samples": 72080, - "transcript": "strain and bottle and put in ice box shake before using each time" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.22, - "num_samples": 323520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0000.wav", - "speed": 1 - } - ], - "original_duration": 20.22, - "original_num_samples": 323520, - "transcript": "every elevation of the type man has hitherto been the work of an aristocratic society and so it will always be a society believing in a long scale of gradations of rank and differences of worth among human beings and requiring slavery in some form or other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.455, - "num_samples": 199280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0001.wav", - "speed": 1 - } - ], - "original_duration": 12.455, - "original_num_samples": 199280, - "transcript": "the distinctions of moral values have either originated in a ruling caste pleasantly conscious of being different from the ruled or among the ruled class the slaves and dependents of all sorts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.475, - "num_samples": 71600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0002.wav", - "speed": 1 - } - ], - "original_duration": 4.475, - "original_num_samples": 71600, - "transcript": "we truthful ones the nobility in ancient greece called themselves" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.955, - "num_samples": 303280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0003.wav", - "speed": 1 - } - ], - "original_duration": 18.955, - "original_num_samples": 303280, - "transcript": "it is obvious that everywhere the designations of moral value were at first applied to men and were only derivatively and at a later period applied to actions it is a gross mistake therefore when historians of morals start with questions like why have sympathetic actions been praised" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.985, - "num_samples": 111760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0004.wav", - "speed": 1 - } - ], - "original_duration": 6.985, - "original_num_samples": 111760, - "transcript": "he honours whatever he recognizes in himself such morality equals self glorification" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.765, - "num_samples": 156240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0005.wav", - "speed": 1 - } - ], - "original_duration": 9.765, - "original_num_samples": 156240, - "transcript": "probably a pessimistic suspicion with regard to the entire situation of man will find expression perhaps a condemnation of man together with his situation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.295, - "num_samples": 228720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0006.wav", - "speed": 1 - } - ], - "original_duration": 14.295, - "original_num_samples": 228720, - "transcript": "here is the seat of the origin of the famous antithesis good and evil power and dangerousness are assumed to reside in the evil a certain dreadfulness subtlety and strength which do not admit of being despised" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.93, - "num_samples": 238880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0007.wav", - "speed": 1 - } - ], - "original_duration": 14.93, - "original_num_samples": 238880, - "transcript": "according to slave morality therefore the evil man arouses fear according to master morality it is precisely the good man who arouses fear and seeks to arouse it while the bad man is regarded as the despicable being" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.3, - "num_samples": 148800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0008.wav", - "speed": 1 - } - ], - "original_duration": 9.3, - "original_num_samples": 148800, - "transcript": "everywhere that slave morality gains the ascendancy language shows a tendency to approximate the significations of the words good and stupid" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.365, - "num_samples": 389840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0009.wav", - "speed": 1 - } - ], - "original_duration": 24.365, - "original_num_samples": 389840, - "transcript": "or he will even say for many reasons i can delight in the good opinion of others perhaps because i love and honour them and rejoice in all their joys perhaps also because their good opinion endorses and strengthens my belief in my own good opinion perhaps because the good opinion of others even in cases where i do not share it is useful to me or gives promise of usefulness all this however is not vanity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.44, - "num_samples": 519040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0010.wav", - "speed": 1 - } - ], - "original_duration": 32.44, - "original_num_samples": 519040, - "transcript": "in fact conformably to the slow rise of the democratic social order and its cause the blending of the blood of masters and slaves the originally noble and rare impulse of the masters to assign a value to themselves and to think well of themselves will now be more and more encouraged and extended but it has at all times an older ampler and more radically ingrained propensity opposed to it and in the phenomenon of vanity this older propensity overmasters the younger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.225, - "num_samples": 275600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0011.wav", - "speed": 1 - } - ], - "original_duration": 17.225, - "original_num_samples": 275600, - "transcript": "the most varied experience teaches it what are the qualities to which it principally owes the fact that it still exists in spite of all gods and men and has hitherto been victorious these qualities it calls virtues and these virtues alone it develops to maturity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.7, - "num_samples": 251200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0012.wav", - "speed": 1 - } - ], - "original_duration": 15.7, - "original_num_samples": 251200, - "transcript": "variations whether they be deviations into the higher finer and rarer or deteriorations and monstrosities appear suddenly on the scene in the greatest exuberance and splendour the individual dares to be individual and detach himself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 32.645, - "num_samples": 522320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0013.wav", - "speed": 1 - } - ], - "original_duration": 32.645, - "original_num_samples": 522320, - "transcript": "at this turning point of history there manifest themselves side by side and often mixed and entangled together a magnificent manifold virgin forest like up growth and up striving a kind of tropical tempo in the rivalry of growth and an extraordinary decay and self destruction owing to the savagely opposing and seemingly exploding egoisms which strive with one another for sun and light and can no longer assign any limit restraint or forbearance for themselves by means of the hitherto existing morality" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 28.795, - "num_samples": 460720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0014.wav", - "speed": 1 - } - ], - "original_duration": 28.795, - "original_num_samples": 460720, - "transcript": "nothing but new whys nothing but new hows no common formulas any longer misunderstanding and disregard in league with each other decay deterioration and the loftiest desires frightfully entangled the genius of the race overflowing from all the cornucopias of good and bad a portentous simultaneousness of spring and autumn full of new charms and mysteries peculiar to the fresh still inexhausted still unwearied corruption" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.465, - "num_samples": 279440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0015.wav", - "speed": 1 - } - ], - "original_duration": 17.465, - "original_num_samples": 279440, - "transcript": "danger is again present the mother of morality great danger this time shifted into the individual into the neighbour and friend into the street into their own child into their own heart into all the most personal and secret recesses of their desires and volitions" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.015, - "num_samples": 80240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0016.wav", - "speed": 1 - } - ], - "original_duration": 5.015, - "original_num_samples": 80240, - "transcript": "what will the moral philosophers who appear at this time have to preach" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.19, - "num_samples": 35040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0017.wav", - "speed": 1 - } - ], - "original_duration": 2.19, - "original_num_samples": 35040, - "transcript": "this is the problem of race" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.565, - "num_samples": 233040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0018.wav", - "speed": 1 - } - ], - "original_duration": 14.565, - "original_num_samples": 233040, - "transcript": "in our very democratic or rather very plebeian age education and culture must be essentially the art of deceiving deceiving with regard to origin with regard to the inherited plebeianism in body and soul" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.115, - "num_samples": 305840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0019.wav", - "speed": 1 - } - ], - "original_duration": 19.115, - "original_num_samples": 305840, - "transcript": "the noble soul accepts the fact of his egoism without question and also without consciousness of harshness constraint or arbitrariness therein but rather as something that may have its basis in the primary law of things if he sought a designation for it he would say it is justice itself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.41, - "num_samples": 374560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0020.wav", - "speed": 1 - } - ], - "original_duration": 23.41, - "original_num_samples": 374560, - "transcript": "on this account the people of one nation understand one another better than those belonging to different nations even when they use the same language or rather when people have lived long together under similar conditions of climate soil danger requirement toil there originates therefrom an entity that understands itself namely a nation" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.69, - "num_samples": 203040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0021.wav", - "speed": 1 - } - ], - "original_duration": 12.69, - "original_num_samples": 203040, - "transcript": "the greater the danger the greater is the need of agreeing quickly and readily about what is necessary not to misunderstand one another in danger that is what cannot at all be dispensed with in intercourse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.285, - "num_samples": 260560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0022.wav", - "speed": 1 - } - ], - "original_duration": 16.285, - "original_num_samples": 260560, - "transcript": "also in all loves and friendships one has the experience that nothing of the kind continues when the discovery has been made that in using the same words one of the two parties has feelings thoughts intuitions wishes or fears different from those of the other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.94, - "num_samples": 239040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0023.wav", - "speed": 1 - } - ], - "original_duration": 14.94, - "original_num_samples": 239040, - "transcript": "whichever groups of sensations within a soul awaken most readily begin to speak and give the word of command these decide as to the general order of rank of its values and determine ultimately its list of desirable things" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.74, - "num_samples": 155840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0024.wav", - "speed": 1 - } - ], - "original_duration": 9.74, - "original_num_samples": 155840, - "transcript": "a man's estimates of value betray something of the structure of his soul and wherein it sees its conditions of life its intrinsic needs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.85, - "num_samples": 285600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0025.wav", - "speed": 1 - } - ], - "original_duration": 17.85, - "original_num_samples": 285600, - "transcript": "profound suffering makes noble it separates one of the most refined forms of disguise is epicurism along with a certain ostentatious boldness of taste which takes suffering lightly and puts itself on the defensive against all that is sorrowful and profound" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.11, - "num_samples": 209760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0026.wav", - "speed": 1 - } - ], - "original_duration": 13.11, - "original_num_samples": 209760, - "transcript": "the highest instinct for purity places him who is affected with it in the most extraordinary and dangerous isolation as a saint for it is just holiness the highest spiritualization of the instinct in question" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 19.12, - "num_samples": 305920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0027.wav", - "speed": 1 - } - ], - "original_duration": 19.12, - "original_num_samples": 305920, - "transcript": "occasionally too the waking call comes too late the chance which gives permission to take action when their best youth and strength for action have been used up in sitting still and how many a one just as he sprang up has found with horror that his limbs are benumbed and his spirits are now too heavy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.665, - "num_samples": 58640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0028.wav", - "speed": 1 - } - ], - "original_duration": 3.665, - "original_num_samples": 58640, - "transcript": "and whoever thou art what is it that now pleases thee" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.915, - "num_samples": 46640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0029.wav", - "speed": 1 - } - ], - "original_duration": 2.915, - "original_num_samples": 46640, - "transcript": "only name it whatever i have i offer thee" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.94, - "num_samples": 47040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0030.wav", - "speed": 1 - } - ], - "original_duration": 2.94, - "original_num_samples": 47040, - "transcript": "but you misunderstand him when you complain about it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.86, - "num_samples": 109760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0031.wav", - "speed": 1 - } - ], - "original_duration": 6.86, - "original_num_samples": 109760, - "transcript": "there must be a sort of repugnance in me to believe anything definite about myself is there perhaps some enigma therein" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.36, - "num_samples": 213760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0032.wav", - "speed": 1 - } - ], - "original_duration": 13.36, - "original_num_samples": 213760, - "transcript": "probably but fortunately nothing for my own teeth perhaps it betrays the species to which i belong but not to myself as is sufficiently agreeable to me but what has happened to you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.555, - "num_samples": 88880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0033.wav", - "speed": 1 - } - ], - "original_duration": 5.555, - "original_num_samples": 88880, - "transcript": "i do not know he said hesitatingly perhaps the harpies have flown over my table" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.985, - "num_samples": 239760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0034.wav", - "speed": 1 - } - ], - "original_duration": 14.985, - "original_num_samples": 239760, - "transcript": "to suffocate with his memories to him who has the desires of a lofty and dainty soul and only seldom finds his table laid and his food prepared the danger will always be great nowadays however it is extraordinarily so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.695, - "num_samples": 75120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/422/122949/422-122949-0035.wav", - "speed": 1 - } - ], - "original_duration": 4.695, - "original_num_samples": 75120, - "transcript": "and to choose for company that roguish and cheerful vice politeness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.71, - "num_samples": 155360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.71, - "original_num_samples": 155360, - "transcript": "he lives thy loss he dies from every limb mangled by thee lightnings of godhead shine from which thy darkness hath not where to hide" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.7, - "num_samples": 43200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0001.wav", - "speed": 1 - } - ], - "original_duration": 2.7, - "original_num_samples": 43200, - "transcript": "quinci impara a stupirti" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 31.7, - "num_samples": 507200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0002.wav", - "speed": 1 - } - ], - "original_duration": 31.7, - "original_num_samples": 507200, - "transcript": "in flesh was raimented how he was killed and buried from the dead how he arose to life with victory and reigned in heaven how all of us shall be glorious like him whose hearts to his are wed how they who die for love of reason give hypocrites tyrants sophists all who sell their neighbours ill for holiness to hell how the dead saint condemns the bad who live how all he does becomes a law for men how he at last to judge shall come again" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.845, - "num_samples": 125520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0003.wav", - "speed": 1 - } - ], - "original_duration": 7.845, - "original_num_samples": 125520, - "transcript": "this world's thick vapours whelm your eyes unworthy of that glorious show blind to his splendour bent upon his shame" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.155, - "num_samples": 162480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0004.wav", - "speed": 1 - } - ], - "original_duration": 10.155, - "original_num_samples": 162480, - "transcript": "money is false and light unless it be bought by a man's own worthy qualities and blood is such that its corrupt disease and ignorant pretence are foul to see" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.22, - "num_samples": 35520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0005.wav", - "speed": 1 - } - ], - "original_duration": 2.22, - "original_num_samples": 35520, - "transcript": "il popolo e una bestia" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 22.96, - "num_samples": 367360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0006.wav", - "speed": 1 - } - ], - "original_duration": 22.96, - "original_num_samples": 367360, - "transcript": "the people is a beast of muddy brain that knows not its own force and therefore stands loaded with wood and stone the powerless hands of a mere child guide it with bit and rein one kick would be enough to break the chain but the beast fears and what the child demands it does nor its own terror understands confused and stupefied by bugbears vain" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.73, - "num_samples": 27680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0007.wav", - "speed": 1 - } - ], - "original_duration": 1.73, - "original_num_samples": 27680, - "transcript": "most wonderful" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.28, - "num_samples": 132480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0008.wav", - "speed": 1 - } - ], - "original_duration": 8.28, - "original_num_samples": 132480, - "transcript": "that penance hath no blame which magdalen found sweet purging our shame self punishment is virtue all men know" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.955, - "num_samples": 191280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0009.wav", - "speed": 1 - } - ], - "original_duration": 11.955, - "original_num_samples": 191280, - "transcript": "organ of rut not reason is the lord who from the body politic doth drain lust for himself instead of toil and pain leaving us lean as crickets on dry sward" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.195, - "num_samples": 195120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0010.wav", - "speed": 1 - } - ], - "original_duration": 12.195, - "original_num_samples": 195120, - "transcript": "well too if he like love would filch our hoard with pleasure to ourselves sluicing our vein and vigour to perpetuate the strain of life by spilth of life within us stored" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.755, - "num_samples": 140080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0011.wav", - "speed": 1 - } - ], - "original_duration": 8.755, - "original_num_samples": 140080, - "transcript": "heaven help that body which a little mind housed in a head lacking ears tongue and eyes and senseless but for smell can tyrannise" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.325, - "num_samples": 69200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.325, - "original_num_samples": 69200, - "transcript": "due to thee their praise of maiden pure of teeming motherhood" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.875, - "num_samples": 142000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/304647/8842-304647-0013.wav", - "speed": 1 - } - ], - "original_duration": 8.875, - "original_num_samples": 142000, - "transcript": "thou like arcturus steadfast in the skies with tardy sense guidest thy kingdom fair bearing alone the load of liberty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.65, - "num_samples": 234400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0000.wav", - "speed": 1 - } - ], - "original_duration": 14.65, - "original_num_samples": 234400, - "transcript": "he translated at an early age chiefly between eighteen forty five and eighteen forty nine a great number of poems by the italians contemporary with dante or preceding him and among other things he made a version of the whole vita nuova prose and verse" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.35, - "num_samples": 149600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.35, - "original_num_samples": 149600, - "transcript": "this book in its original form was received with favour and settled the claim of rossetti to rank as a poetic translator or indeed as a poet in his own right" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.655, - "num_samples": 106480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0002.wav", - "speed": 1 - } - ], - "original_duration": 6.655, - "original_num_samples": 106480, - "transcript": "the life blood of rhythmical translation is this commandment that a good poem shall not be turned into a bad one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.86, - "num_samples": 141760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.86, - "original_num_samples": 141760, - "transcript": "the only true motive for putting poetry into a fresh language must be to endow a fresh nation as far as possible with one more possession of beauty" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.74, - "num_samples": 91840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.74, - "original_num_samples": 91840, - "transcript": "poetry not being an exact science literality of rendering is altogether secondary to this chief law" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 23.445, - "num_samples": 375120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0005.wav", - "speed": 1 - } - ], - "original_duration": 23.445, - "original_num_samples": 375120, - "transcript": "often would he avail himself of any special grace of his own idiom and epoch if only his will belonged to him often would some cadence serve him but for his author's structure some structure but for his author's cadence often the beautiful turn of a stanza must be weakened to adopt some rhyme which will tally and he sees the poet revelling in abundance of language where himself is scantily supplied" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.365, - "num_samples": 85840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.365, - "original_num_samples": 85840, - "transcript": "and if you have time it would be a great service to translate the analyses of the poems which i omitted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.215, - "num_samples": 99440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0007.wav", - "speed": 1 - } - ], - "original_duration": 6.215, - "original_num_samples": 99440, - "transcript": "on january twenty fifth he wrote many and many thanks for a most essential service most thoroughly performed" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.31, - "num_samples": 84960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.31, - "original_num_samples": 84960, - "transcript": "my notes which you have taken the trouble of revising are of course quite paltry and useless" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.055, - "num_samples": 112880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0009.wav", - "speed": 1 - } - ], - "original_duration": 7.055, - "original_num_samples": 112880, - "transcript": "it is therefore and on all accounts unnecessary to say much more of the work here than it says for itself" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.335, - "num_samples": 133360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0010.wav", - "speed": 1 - } - ], - "original_duration": 8.335, - "original_num_samples": 133360, - "transcript": "throughout the vita nuova there is a strain like the first falling murmur which reaches the ear in some remote meadow and prepares us to look upon the sea" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.705, - "num_samples": 59280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0011.wav", - "speed": 1 - } - ], - "original_duration": 3.705, - "original_num_samples": 59280, - "transcript": "a word should be said here of the title of dante's autobiography" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.63, - "num_samples": 90080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302196/8842-302196-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.63, - "original_num_samples": 90080, - "transcript": "this has induced some editors of the vita nuova to explain the title as meaning early life" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.5, - "num_samples": 152000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.5, - "original_num_samples": 152000, - "transcript": "thereafter this sonnet bred in me desire to write down in verse four other things touching my condition the which things it seemed to me that i had not yet made manifest" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.44, - "num_samples": 151040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.44, - "original_num_samples": 151040, - "transcript": "which thing being thus there came a day when certain ladies to whom it was well known they having been with me at divers times in my trouble were met together for the pleasure of gentle company" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.335, - "num_samples": 165360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.335, - "original_num_samples": 165360, - "transcript": "but when i still spake not one of them who before had been talking with another addressed me by my name saying to what end lovest thou this lady seeing that thou canst not support her presence" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.265, - "num_samples": 132240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0003.wav", - "speed": 1 - } - ], - "original_duration": 8.265, - "original_num_samples": 132240, - "transcript": "and now that it hath pleased her to deny me this love my master of his great goodness hath placed all my beatitude there where my hope will not fail me" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.59, - "num_samples": 121440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0004.wav", - "speed": 1 - } - ], - "original_duration": 7.59, - "original_num_samples": 121440, - "transcript": "then those ladies began to talk closely together and as i have seen snow fall among the rain so was their talk mingled with sighs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.095, - "num_samples": 161520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.095, - "original_num_samples": 161520, - "transcript": "and i declare that when i speak thereof love sheds such perfect sweetness over me that if my courage failed not certainly to him my listeners must be all resign'd" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.865, - "num_samples": 157840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0006.wav", - "speed": 1 - } - ], - "original_duration": 9.865, - "original_num_samples": 157840, - "transcript": "whatever her sweet eyes are turned upon spirits of love do issue thence in flame which through their eyes who then may look on them pierce to the heart's deep chamber every one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.855, - "num_samples": 77680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0007.wav", - "speed": 1 - } - ], - "original_duration": 4.855, - "original_num_samples": 77680, - "transcript": "to her i wend along in whose much strength my weakness is made strong" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.97, - "num_samples": 95520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.97, - "original_num_samples": 95520, - "transcript": "so to the road thou shalt be reconciled and find the lady and with the lady love" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.915, - "num_samples": 78640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0009.wav", - "speed": 1 - } - ], - "original_duration": 4.915, - "original_num_samples": 78640, - "transcript": "the second begins here an angel the third here dear song i know" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.475, - "num_samples": 39600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0010.wav", - "speed": 1 - } - ], - "original_duration": 2.475, - "original_num_samples": 39600, - "transcript": "the first part is divided into four" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.22, - "num_samples": 83520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0011.wav", - "speed": 1 - } - ], - "original_duration": 5.22, - "original_num_samples": 83520, - "transcript": "in the third i say what it is i purpose to speak so as not to be impeded by faintheartedness" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.805, - "num_samples": 76880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0012.wav", - "speed": 1 - } - ], - "original_duration": 4.805, - "original_num_samples": 76880, - "transcript": "in the fourth repeating to whom i purpose speaking i tell the reason why i speak to them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.535, - "num_samples": 88560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0013.wav", - "speed": 1 - } - ], - "original_duration": 5.535, - "original_num_samples": 88560, - "transcript": "in the second i tell what is understood of her on earth here my lady is desired" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.62, - "num_samples": 249920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0014.wav", - "speed": 1 - } - ], - "original_duration": 15.62, - "original_num_samples": 249920, - "transcript": "this second part is divided into two for in the first i speak of her as regards the nobleness of her soul relating some of her virtues proceeding from her soul in the second i speak of her as regards the nobleness of her body narrating some of her beauties here love saith concerning her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.985, - "num_samples": 143760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302201/8842-302201-0015.wav", - "speed": 1 - } - ], - "original_duration": 8.985, - "original_num_samples": 143760, - "transcript": "this second part is divided into two for in the one i speak of the eyes which are the beginning of love in the second i speak of the mouth which is the end of love" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.24, - "num_samples": 179840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0000.wav", - "speed": 1 - } - ], - "original_duration": 11.24, - "original_num_samples": 179840, - "transcript": "and at the first it seemed to me that i saw certain faces of women with their hair loosened which called out to me thou shalt surely die after the which other terrible and unknown appearances said unto me thou art dead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.485, - "num_samples": 151760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0001.wav", - "speed": 1 - } - ], - "original_duration": 9.485, - "original_num_samples": 151760, - "transcript": "and so strong was my phantasy that i wept again in very truth and said with my true voice o excellent soul how blessed is he that now looketh upon thee" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.77, - "num_samples": 252320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0002.wav", - "speed": 1 - } - ], - "original_duration": 15.77, - "original_num_samples": 252320, - "transcript": "whereby other ladies who were about the room becoming aware of my discomfort by reason of the moan that she made who indeed was of my very near kindred led her away from where i was and then set themselves to awaken me thinking that i dreamed and saying sleep no longer and be not disquieted" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0003.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "when being aroused i opened mine eyes and knew that it had been a deception" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.045, - "num_samples": 208720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0004.wav", - "speed": 1 - } - ], - "original_duration": 13.045, - "original_num_samples": 208720, - "transcript": "and my hue was such that they look'd at each other and thought of death saying under their breath most tenderly o let us comfort him then unto me what dream was thine that it hath shaken thee so much" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.27, - "num_samples": 132320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0005.wav", - "speed": 1 - } - ], - "original_duration": 8.27, - "original_num_samples": 132320, - "transcript": "and therewithal such a bewilderment possess'd me that i shut mine eyes for peace and in my brain did cease order of thought and every healthful thing" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.28, - "num_samples": 84480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0006.wav", - "speed": 1 - } - ], - "original_duration": 5.28, - "original_num_samples": 84480, - "transcript": "then saw i many broken hinted sights in the uncertain state i stepp'd into" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.7, - "num_samples": 59200, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0007.wav", - "speed": 1 - } - ], - "original_duration": 3.7, - "original_num_samples": 59200, - "transcript": "these wildering phantasies then carried me to see my lady dead" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.415, - "num_samples": 70640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0008.wav", - "speed": 1 - } - ], - "original_duration": 4.415, - "original_num_samples": 70640, - "transcript": "the second part begins here i was a thinking the first part divides into two" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.095, - "num_samples": 193520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0009.wav", - "speed": 1 - } - ], - "original_duration": 12.095, - "original_num_samples": 193520, - "transcript": "this lady's right name was joan but because of her comeliness or at least it was so imagined she was called of many primavera spring and went by that name among them" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.395, - "num_samples": 230320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0010.wav", - "speed": 1 - } - ], - "original_duration": 14.395, - "original_num_samples": 230320, - "transcript": "and in his speech he laugh'd and laugh'd again then while it was his pleasure to remain i chanced to look the way he had drawn near and saw the ladies joan and beatrice approach me this the other following one and a second marvel instantly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.72, - "num_samples": 107520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/8842/302203/8842-302203-0011.wav", - "speed": 1 - } - ], - "original_duration": 6.72, - "original_num_samples": 107520, - "transcript": "the second part begins here saying be now the third here then while it was his pleasure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.66, - "num_samples": 42560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0000.wav", - "speed": 1 - } - ], - "original_duration": 2.66, - "original_num_samples": 42560, - "transcript": "illustration long pepper" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.05, - "num_samples": 176800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0001.wav", - "speed": 1 - } - ], - "original_duration": 11.05, - "original_num_samples": 176800, - "transcript": "long pepper this is the produce of a different plant from that which produces the black it consisting of the half ripe flower heads of what naturalists call piper longum and chaba" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.26, - "num_samples": 164160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0002.wav", - "speed": 1 - } - ], - "original_duration": 10.26, - "original_num_samples": 164160, - "transcript": "originally the most valuable of these were found in the spice islands or moluccas of the indian ocean and were highly prized by the nations of antiquity" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.405, - "num_samples": 86480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0003.wav", - "speed": 1 - } - ], - "original_duration": 5.405, - "original_num_samples": 86480, - "transcript": "the long pepper is less aromatic than the black but its oil is more pungent" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.17, - "num_samples": 130720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0004.wav", - "speed": 1 - } - ], - "original_duration": 8.17, - "original_num_samples": 130720, - "transcript": "then add the yolks of the eggs well beaten stir them to the sauce but do not allow it to boil and serve very hot" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.86, - "num_samples": 301760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0005.wav", - "speed": 1 - } - ], - "original_duration": 18.86, - "original_num_samples": 301760, - "transcript": "mode pare and slice the cucumbers as for the table sprinkle well with salt and let them remain for twenty four hours strain off the liquor pack in jars a thick layer of cucumbers and salt alternately tie down closely and when wanted for use take out the quantity required" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.8, - "num_samples": 44800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0006.wav", - "speed": 1 - } - ], - "original_duration": 2.8, - "original_num_samples": 44800, - "transcript": "illustration the cucumber" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 26.625, - "num_samples": 426000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0007.wav", - "speed": 1 - } - ], - "original_duration": 26.625, - "original_num_samples": 426000, - "transcript": "mode choose the greenest cucumbers and those that are most free from seeds put them in strong salt and water with a cabbage leaf to keep them down tie a paper over them and put them in a warm place till they are yellow then wash them and set them over the fire in fresh water with a very little salt and another cabbage leaf over them cover very closely but take care they do not boil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.285, - "num_samples": 324560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0008.wav", - "speed": 1 - } - ], - "original_duration": 20.285, - "original_num_samples": 324560, - "transcript": "put the sugar with one quarter pint of water in a saucepan over the fire remove the scum as it rises and add the lemon peel and ginger with the outside scraped off when the syrup is tolerably thick take it off the fire and when cold wipe the cucumbers dry and put them in" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.84, - "num_samples": 93440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.84, - "original_num_samples": 93440, - "transcript": "seasonable this recipe should be used in june july or august" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.505, - "num_samples": 184080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0010.wav", - "speed": 1 - } - ], - "original_duration": 11.505, - "original_num_samples": 184080, - "transcript": "solid rocks of salt are also found in various parts of the world and the county of chester contains many of these mines and it is from there that much of our salt comes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.0, - "num_samples": 224000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0011.wav", - "speed": 1 - } - ], - "original_duration": 14.0, - "original_num_samples": 224000, - "transcript": "some springs are so highly impregnated with salt as to have received the name of brine springs and are supposed to have become so by passing through the salt rocks below ground and thus dissolving a portion of this mineral substance" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.405, - "num_samples": 86480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0012.wav", - "speed": 1 - } - ], - "original_duration": 5.405, - "original_num_samples": 86480, - "transcript": "mode put the milk in a very clean saucepan and let it boil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.01, - "num_samples": 96160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.01, - "original_num_samples": 96160, - "transcript": "beat the eggs stir to them the milk and pounded sugar and put the mixture into a jug" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.13, - "num_samples": 130080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0014.wav", - "speed": 1 - } - ], - "original_duration": 8.13, - "original_num_samples": 130080, - "transcript": "place the jug in a saucepan of boiling water keep stirring well until it thickens but do not allow it to boil or it will curdle" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.29, - "num_samples": 68640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0015.wav", - "speed": 1 - } - ], - "original_duration": 4.29, - "original_num_samples": 68640, - "transcript": "when it is sufficiently thick take it off as it should not boil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.78, - "num_samples": 44480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0016.wav", - "speed": 1 - } - ], - "original_duration": 2.78, - "original_num_samples": 44480, - "transcript": "illustration the lemon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.105, - "num_samples": 113680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0017.wav", - "speed": 1 - } - ], - "original_duration": 7.105, - "original_num_samples": 113680, - "transcript": "the lemon this fruit is a native of asia and is mentioned by virgil as an antidote to poison" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.06, - "num_samples": 112960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0018.wav", - "speed": 1 - } - ], - "original_duration": 7.06, - "original_num_samples": 112960, - "transcript": "it is hardier than the orange and as one of the citron tribe was brought into europe by the arabians" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.38, - "num_samples": 134080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0019.wav", - "speed": 1 - } - ], - "original_duration": 8.38, - "original_num_samples": 134080, - "transcript": "the lemon was first cultivated in england in the beginning of the seventeenth century and is now often to be found in our green houses" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.34, - "num_samples": 149440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0020.wav", - "speed": 1 - } - ], - "original_duration": 9.34, - "original_num_samples": 149440, - "transcript": "this juice which is called citric acid may be preserved in bottles for a considerable time by covering it with a thin stratum of oil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.69, - "num_samples": 27040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0021.wav", - "speed": 1 - } - ], - "original_duration": 1.69, - "original_num_samples": 27040, - "transcript": "to pickle eggs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.77, - "num_samples": 108320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0022.wav", - "speed": 1 - } - ], - "original_duration": 6.77, - "original_num_samples": 108320, - "transcript": "seasonable this should be made about easter as at this time eggs are plentiful and cheap" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.24, - "num_samples": 115840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0023.wav", - "speed": 1 - } - ], - "original_duration": 7.24, - "original_num_samples": 115840, - "transcript": "a store of pickled eggs will be found very useful and ornamental in serving with many first and second course dishes" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.86, - "num_samples": 45760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0024.wav", - "speed": 1 - } - ], - "original_duration": 2.86, - "original_num_samples": 45760, - "transcript": "illustration ginger" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.005, - "num_samples": 128080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0025.wav", - "speed": 1 - } - ], - "original_duration": 8.005, - "original_num_samples": 128080, - "transcript": "the ginger plant known to naturalists as zingiber officinale is a native of the east and west indies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.655, - "num_samples": 90480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0026.wav", - "speed": 1 - } - ], - "original_duration": 5.655, - "original_num_samples": 90480, - "transcript": "in jamaica it flowers about august or september fading about the end of the year" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.535, - "num_samples": 152560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0027.wav", - "speed": 1 - } - ], - "original_duration": 9.535, - "original_num_samples": 152560, - "transcript": "beat the yolks of the other two eggs add them with a little flour and salt to those pounded mix all well together and roll into balls" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.16, - "num_samples": 82560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0028.wav", - "speed": 1 - } - ], - "original_duration": 5.16, - "original_num_samples": 82560, - "transcript": "boil them before they are put into the soup or other dish they may be intended for" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.625, - "num_samples": 42000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0029.wav", - "speed": 1 - } - ], - "original_duration": 2.625, - "original_num_samples": 42000, - "transcript": "lemon juice may be added at pleasure" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.37, - "num_samples": 149920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0030.wav", - "speed": 1 - } - ], - "original_duration": 9.37, - "original_num_samples": 149920, - "transcript": "mode put the whole of the ingredients into a bottle and let it remain for a fortnight in a warm place occasionally shaking up the contents" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.155, - "num_samples": 82480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0031.wav", - "speed": 1 - } - ], - "original_duration": 5.155, - "original_num_samples": 82480, - "transcript": "they ought to be taken up in the autumn and when dried in the house will keep till spring" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.92, - "num_samples": 94720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0032.wav", - "speed": 1 - } - ], - "original_duration": 5.92, - "original_num_samples": 94720, - "transcript": "add the wine and if necessary a seasoning of cayenne when it will be ready to serve" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.71, - "num_samples": 123360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0033.wav", - "speed": 1 - } - ], - "original_duration": 7.71, - "original_num_samples": 123360, - "transcript": "note the wine in this sauce may be omitted and an onion sliced and fried of a nice brown substituted for it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.68, - "num_samples": 58880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0034.wav", - "speed": 1 - } - ], - "original_duration": 3.68, - "original_num_samples": 58880, - "transcript": "simmer for a minute or two and serve in a tureen" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.61, - "num_samples": 57760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0035.wav", - "speed": 1 - } - ], - "original_duration": 3.61, - "original_num_samples": 57760, - "transcript": "sufficient to serve with five or six mackerel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.875, - "num_samples": 222000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0036.wav", - "speed": 1 - } - ], - "original_duration": 13.875, - "original_num_samples": 222000, - "transcript": "various dishes are frequently ornamented and garnished with its graceful leaves and these are sometimes boiled in soups although it is more usually confined in english cookery to the mackerel sauce as here given" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.97, - "num_samples": 47520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0037.wav", - "speed": 1 - } - ], - "original_duration": 2.97, - "original_num_samples": 47520, - "transcript": "forcemeat for cold savoury pies" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.905, - "num_samples": 94480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0038.wav", - "speed": 1 - } - ], - "original_duration": 5.905, - "original_num_samples": 94480, - "transcript": "pound well and bind with one or two eggs which have been previously beaten and strained" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.88, - "num_samples": 46080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0039.wav", - "speed": 1 - } - ], - "original_duration": 2.88, - "original_num_samples": 46080, - "transcript": "illustration marjoram" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.92, - "num_samples": 126720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0040.wav", - "speed": 1 - } - ], - "original_duration": 7.92, - "original_num_samples": 126720, - "transcript": "it is a native of portugal and when its leaves are used as a seasoning herb they have an agreeable aromatic flavour" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 12.15, - "num_samples": 194400, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0041.wav", - "speed": 1 - } - ], - "original_duration": 12.15, - "original_num_samples": 194400, - "transcript": "mode mix all the ingredients well together carefully mincing them very finely beat up the egg moisten with it and work the whole very smoothly together" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.87, - "num_samples": 61920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0042.wav", - "speed": 1 - } - ], - "original_duration": 3.87, - "original_num_samples": 61920, - "transcript": "sufficient for a moderate sized haddock or pike" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.085, - "num_samples": 113360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0043.wav", - "speed": 1 - } - ], - "original_duration": 7.085, - "original_num_samples": 113360, - "transcript": "now beat and strain the eggs work these up with the other ingredients and the forcemeat will be ready for use" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.77, - "num_samples": 92320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0044.wav", - "speed": 1 - } - ], - "original_duration": 5.77, - "original_num_samples": 92320, - "transcript": "boil for five minutes mince it very small and mix it with the other ingredients" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.11, - "num_samples": 97760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0045.wav", - "speed": 1 - } - ], - "original_duration": 6.11, - "original_num_samples": 97760, - "transcript": "if it should be in an unsound state it must be on no account made use of" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.635, - "num_samples": 42160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0046.wav", - "speed": 1 - } - ], - "original_duration": 2.635, - "original_num_samples": 42160, - "transcript": "illustration basil" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.665, - "num_samples": 282640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0047.wav", - "speed": 1 - } - ], - "original_duration": 17.665, - "original_num_samples": 282640, - "transcript": "other sweet herbs are cultivated for purposes of medicine and perfumery they are most grateful both to the organs of taste and smelling and to the aroma derived from them is due in a great measure the sweet and exhilarating fragrance of our flowery meads" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 1.84, - "num_samples": 29440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0048.wav", - "speed": 1 - } - ], - "original_duration": 1.84, - "original_num_samples": 29440, - "transcript": "french forcemeat" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.835, - "num_samples": 253360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0049.wav", - "speed": 1 - } - ], - "original_duration": 15.835, - "original_num_samples": 253360, - "transcript": "it will be well to state in the beginning of this recipe that french forcemeat or quenelles consist of the blending of three separate processes namely panada udder and whatever meat you intend using panada" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.79, - "num_samples": 140640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0050.wav", - "speed": 1 - } - ], - "original_duration": 8.79, - "original_num_samples": 140640, - "transcript": "place it over the fire keep constantly stirring to prevent its burning and when quite dry put in a small piece of butter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.29, - "num_samples": 148640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0051.wav", - "speed": 1 - } - ], - "original_duration": 9.29, - "original_num_samples": 148640, - "transcript": "put the udder into a stewpan with sufficient water to cover it let it stew gently till quite done when take it out to cool" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.1, - "num_samples": 49600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0052.wav", - "speed": 1 - } - ], - "original_duration": 3.1, - "original_num_samples": 49600, - "transcript": "illustration pestle and mortar" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.06, - "num_samples": 176960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0053.wav", - "speed": 1 - } - ], - "original_duration": 11.06, - "original_num_samples": 176960, - "transcript": "when the three ingredients are properly prepared pound them altogether in a mortar for some time for the more quenelles are pounded the more delicate they are" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.725, - "num_samples": 123600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0054.wav", - "speed": 1 - } - ], - "original_duration": 7.725, - "original_num_samples": 123600, - "transcript": "if the quenelles are not firm enough add the yolk of another egg but omit the white which only makes them hollow and puffy inside" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.51, - "num_samples": 152160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0055.wav", - "speed": 1 - } - ], - "original_duration": 9.51, - "original_num_samples": 152160, - "transcript": "any one with the slightest pretensions to refined cookery must in this particular implicitly follow the example of our friends across the channel" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.1, - "num_samples": 33600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0056.wav", - "speed": 1 - } - ], - "original_duration": 2.1, - "original_num_samples": 33600, - "transcript": "fried bread crumbs" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.31, - "num_samples": 132960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0057.wav", - "speed": 1 - } - ], - "original_duration": 8.31, - "original_num_samples": 132960, - "transcript": "the fat they are fried in should be clear and the crumbs should not have the slightest appearance or taste of having been in the least degree burnt" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.36, - "num_samples": 37760, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0058.wav", - "speed": 1 - } - ], - "original_duration": 2.36, - "original_num_samples": 37760, - "transcript": "fried bread for borders" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.745, - "num_samples": 139920, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0059.wav", - "speed": 1 - } - ], - "original_duration": 8.745, - "original_num_samples": 139920, - "transcript": "when quite crisp dip one side of the sippet into the beaten white of an egg mixed with a little flour and place it on the edge of the dish" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.405, - "num_samples": 118480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0060.wav", - "speed": 1 - } - ], - "original_duration": 7.405, - "original_num_samples": 118480, - "transcript": "continue in this manner till the border is completed arranging the sippets a pale and a dark one alternately" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.43, - "num_samples": 278880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0061.wav", - "speed": 1 - } - ], - "original_duration": 17.43, - "original_num_samples": 278880, - "transcript": "mode cut up the onion and carrot into small rings and put them into a stewpan with the herbs mushrooms bay leaf cloves and mace add the butter and simmer the whole very gently over a slow fire until the onion is quite tender" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.14, - "num_samples": 66240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0062.wav", - "speed": 1 - } - ], - "original_duration": 4.14, - "original_num_samples": 66240, - "transcript": "sufficient half this quantity for two slices of salmon" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.8, - "num_samples": 44800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/1919/142785/1919-142785-0063.wav", - "speed": 1 - } - ], - "original_duration": 2.8, - "original_num_samples": 44800, - "transcript": "illustration sage" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.48, - "num_samples": 151680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0000.wav", - "speed": 1 - } - ], - "original_duration": 9.48, - "original_num_samples": 151680, - "transcript": "her meeting with letty was indescribably tender and the days that followed were pretty equally divided between her and her brother in nursing the one and loving the other" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.66, - "num_samples": 138560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0001.wav", - "speed": 1 - } - ], - "original_duration": 8.66, - "original_num_samples": 138560, - "transcript": "but even while she enjoyed every hour of life and begrudged the time given to sleep she felt as if the dream was too beautiful to last and often said" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.665, - "num_samples": 122640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0002.wav", - "speed": 1 - } - ], - "original_duration": 7.665, - "original_num_samples": 122640, - "transcript": "so christie turned a deaf ear to her prophetic soul and gave herself up to the blissful holiday that had come at last" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.85, - "num_samples": 109600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0003.wav", - "speed": 1 - } - ], - "original_duration": 6.85, - "original_num_samples": 109600, - "transcript": "nothing can surprise me now i'm prepared for any thing even the sight of my quakerish lover dancing a jig" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.85, - "num_samples": 93600, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0004.wav", - "speed": 1 - } - ], - "original_duration": 5.85, - "original_num_samples": 93600, - "transcript": "i feel like a boy out of school or rather a man out of prison and must enjoy my liberty in some way" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.67, - "num_samples": 170720, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0005.wav", - "speed": 1 - } - ], - "original_duration": 10.67, - "original_num_samples": 170720, - "transcript": "i'm not a talker you know and as the laws of gravitation forbid my soaring aloft anywhere i can only express my joyfully uplifted state of mind by prancing as you call it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.095, - "num_samples": 145520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0006.wav", - "speed": 1 - } - ], - "original_duration": 9.095, - "original_num_samples": 145520, - "transcript": "i don't want you to i love to see you so young and happy only you are not the old david and i've got to get acquainted with the new one" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.499937, - "num_samples": 87999, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0007.wav", - "speed": 1 - } - ], - "original_duration": 5.499937, - "original_num_samples": 87999, - "transcript": "i hope you'll like him better than the frost bitten old david you first knew and were kind enough to love" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.850062, - "num_samples": 93601, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0008.wav", - "speed": 1 - } - ], - "original_duration": 5.850062, - "original_num_samples": 93601, - "transcript": "mother says i've gone back to the time before we lost letty and i sometimes feel as if i had" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.79, - "num_samples": 92640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0009.wav", - "speed": 1 - } - ], - "original_duration": 5.79, - "original_num_samples": 92640, - "transcript": "in that case you will find me a proud impetuous ambitious fellow christie and how will that suit" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.18, - "num_samples": 162880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0010.wav", - "speed": 1 - } - ], - "original_duration": 10.18, - "original_num_samples": 162880, - "transcript": "excellently i like pride of your sort impetuosity becomes you for you have learned to control it if need be and the ambition is best of all" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.26, - "num_samples": 36160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0011.wav", - "speed": 1 - } - ], - "original_duration": 2.26, - "original_num_samples": 36160, - "transcript": "i shall wait for time to show" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.77, - "num_samples": 236320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0012.wav", - "speed": 1 - } - ], - "original_duration": 14.77, - "original_num_samples": 236320, - "transcript": "then they went back to their work little dreaming as they tied roses and twined smilax wreaths how near that other chance was how soon they were to be called upon to keep their promise and how well each was to perform the part given them in life and death" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.115, - "num_samples": 97840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0013.wav", - "speed": 1 - } - ], - "original_duration": 6.115, - "original_num_samples": 97840, - "transcript": "to no home in the land did the great trouble bring a more sudden change than the little cottage in the lane" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.94, - "num_samples": 175040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0014.wav", - "speed": 1 - } - ], - "original_duration": 10.94, - "original_num_samples": 175040, - "transcript": "david was sober enough now and went about his work with a grim set to his lips and a spark in his eyes that made the three women look at one another pale with unspoken apprehension" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.3, - "num_samples": 36800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0015.wav", - "speed": 1 - } - ], - "original_duration": 2.3, - "original_num_samples": 36800, - "transcript": "it is terrible and yet glorious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 15.47, - "num_samples": 247520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0016.wav", - "speed": 1 - } - ], - "original_duration": 15.47, - "original_num_samples": 247520, - "transcript": "david held it close in both of his saying gratefully thank you mother then fixing his eyes on the younger yet not dearer women he added with a ring in his voice that made their hearts answer with a prompt ay ay in spite of love or fear" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.895, - "num_samples": 190320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0017.wav", - "speed": 1 - } - ], - "original_duration": 11.895, - "original_num_samples": 190320, - "transcript": "the boys bless their brave hearts have done nobly but older men are needed now we cannot sacrifice all the gallant lads and we who have more to lose than they must take our turn and try to do as well" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 10.305, - "num_samples": 164880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0018.wav", - "speed": 1 - } - ], - "original_duration": 10.305, - "original_num_samples": 164880, - "transcript": "yes david sister and sweetheart answered bravely forgetting in the fervor of the moment what heavy consequences god might see fit to send good" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.8, - "num_samples": 92800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0019.wav", - "speed": 1 - } - ], - "original_duration": 5.8, - "original_num_samples": 92800, - "transcript": "bennet will take the garden and green house off my hands this autumn for a year or longer if i like" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.07, - "num_samples": 97120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0020.wav", - "speed": 1 - } - ], - "original_duration": 6.07, - "original_num_samples": 97120, - "transcript": "he's a kind neighborly man and his boy will take my place about the house and protect you faithfully" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.5, - "num_samples": 72000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0021.wav", - "speed": 1 - } - ], - "original_duration": 4.5, - "original_num_samples": 72000, - "transcript": "i knew you would go i saw you getting ready and i made up my mind to follow" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.505, - "num_samples": 136080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0022.wav", - "speed": 1 - } - ], - "original_duration": 8.505, - "original_num_samples": 136080, - "transcript": "you will let me do it and in return i will marry you whenever you ask me answered christie sealing the promise with a kiss that silenced him" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.98, - "num_samples": 79680, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0023.wav", - "speed": 1 - } - ], - "original_duration": 4.98, - "original_num_samples": 79680, - "transcript": "you've something to tell me i see it in your face dear i must go" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 14.68, - "num_samples": 234880, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0024.wav", - "speed": 1 - } - ], - "original_duration": 14.68, - "original_num_samples": 234880, - "transcript": "next evening as missus sterling sat alone in the twilight a tall man in army blue entered quietly stood watching the tranquil figure for a moment then went and knelt down beside it saying with a most unsoldierly choke in the voice" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.095, - "num_samples": 97520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0025.wav", - "speed": 1 - } - ], - "original_duration": 6.095, - "original_num_samples": 97520, - "transcript": "the loyal frenzy fell upon the three quiet women and they could not do too much for their country" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.835, - "num_samples": 93360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0026.wav", - "speed": 1 - } - ], - "original_duration": 5.835, - "original_num_samples": 93360, - "transcript": "it would have taken many knapsacks to hold all the gifts showered upon him by his friends and neighbors" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.58, - "num_samples": 137280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0027.wav", - "speed": 1 - } - ], - "original_duration": 8.58, - "original_num_samples": 137280, - "transcript": "finding that lisha showed little enthusiasm on the subject she tried to rouse him by patriotic appeals of various sorts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 9.57, - "num_samples": 153120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0028.wav", - "speed": 1 - } - ], - "original_duration": 9.57, - "original_num_samples": 153120, - "transcript": "very well said missus wilkins resolutely to herself ef i can't make no impression on his soul i will on his stommick and see how that'll work" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.945, - "num_samples": 63120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0029.wav", - "speed": 1 - } - ], - "original_duration": 3.945, - "original_num_samples": 63120, - "transcript": "we can't afford no nice vittles now when our men are sufferin so" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 16.445, - "num_samples": 263120, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0030.wav", - "speed": 1 - } - ], - "original_duration": 16.445, - "original_num_samples": 263120, - "transcript": "he was not as unmoved as he seemed by the general excitement and had felt sundry manly impulses to up and at em when his comrades in the shop discussed the crisis with ireful brandishing of awls and vengeful pounding of sole leather as if the rebels were under the hammer" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.83, - "num_samples": 141280, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0031.wav", - "speed": 1 - } - ], - "original_duration": 8.83, - "original_num_samples": 141280, - "transcript": "to say that the fish rose at once and swallowed the bait hook and all but feebly expresses the justice done to the cakes by that long suffering man" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.34, - "num_samples": 85440, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0032.wav", - "speed": 1 - } - ], - "original_duration": 5.34, - "original_num_samples": 85440, - "transcript": "can you remember what hepsey told us and call them poor long sufferin creeters names" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.875, - "num_samples": 62000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0033.wav", - "speed": 1 - } - ], - "original_duration": 3.875, - "original_num_samples": 62000, - "transcript": "no he ain't it's a trainer added ann lizy" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 2.765, - "num_samples": 44240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0034.wav", - "speed": 1 - } - ], - "original_duration": 2.765, - "original_num_samples": 44240, - "transcript": "now cynthy be you satisfied" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.755, - "num_samples": 76080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0035.wav", - "speed": 1 - } - ], - "original_duration": 4.755, - "original_num_samples": 76080, - "transcript": "and the inconsistent woman fell upon his buttony breast weeping copiously" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 24.125, - "num_samples": 386000, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0036.wav", - "speed": 1 - } - ], - "original_duration": 24.125, - "original_num_samples": 386000, - "transcript": "his wife fed him with the fat of the land regardless of consequences his children revolved about him with tireless curiosity and wonder his neighbors flocked in to applaud advise and admire every one treated him with a respect most grateful to his feelings he was an object of interest and with every hour his importance increased so that by night he felt like a commander in chief and bore himself accordingly" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.665, - "num_samples": 106640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0037.wav", - "speed": 1 - } - ], - "original_duration": 6.665, - "original_num_samples": 106640, - "transcript": "then the good soul openly shouldered the burden she had borne so long in secret and bravely trudged on alone" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 6.24, - "num_samples": 99840, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0038.wav", - "speed": 1 - } - ], - "original_duration": 6.24, - "original_num_samples": 99840, - "transcript": "the women dropped their work to look and listen for his visits were few and short and every instant was precious" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.79, - "num_samples": 76640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0039.wav", - "speed": 1 - } - ], - "original_duration": 4.79, - "original_num_samples": 76640, - "transcript": "they knew what it was without a word missus sterling clasped her hands and bowed her head" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.27, - "num_samples": 52320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0040.wav", - "speed": 1 - } - ], - "original_duration": 3.27, - "original_num_samples": 52320, - "transcript": "now let's be brave and enjoy every minute of it" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.14, - "num_samples": 114240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0041.wav", - "speed": 1 - } - ], - "original_duration": 7.14, - "original_num_samples": 114240, - "transcript": "we will what can i do for you davy asked christie wonderfully supported by the thought that she was going too" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.185, - "num_samples": 210960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0042.wav", - "speed": 1 - } - ], - "original_duration": 13.185, - "original_num_samples": 210960, - "transcript": "as a married woman you will get on better as my wife you will be allowed to come to me if i need you and as my he stopped there for he could not add as my widow you will have my pension to support you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.66, - "num_samples": 90560, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0043.wav", - "speed": 1 - } - ], - "original_duration": 5.66, - "original_num_samples": 90560, - "transcript": "nothing can part us any more not even death for love like ours will last for ever" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.505, - "num_samples": 56080, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0044.wav", - "speed": 1 - } - ], - "original_duration": 3.505, - "original_num_samples": 56080, - "transcript": "not one david that's true love christie" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 7.29, - "num_samples": 116640, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0045.wav", - "speed": 1 - } - ], - "original_duration": 7.29, - "original_num_samples": 116640, - "transcript": "then they stood quite still for a time and in the silence the two hearts talked together in the sweet language no tongue can utter" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 17.425, - "num_samples": 278800, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0046.wav", - "speed": 1 - } - ], - "original_duration": 17.425, - "original_num_samples": 278800, - "transcript": "surely i shall if i give you and myself to the cause and i do it gladly though i know that my heart has got to ache as it never has ached yet when my courage fails as it will by and by and my selfish soul counts the cost of my offering after the excitement is over" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.935, - "num_samples": 190960, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0047.wav", - "speed": 1 - } - ], - "original_duration": 11.935, - "original_num_samples": 190960, - "transcript": "david caught the exaltation and gave no further thought to any thing but the duty of the hour finding himself stronger and braver for that long look into the illuminated face of the woman he loved" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 8.565, - "num_samples": 137040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0048.wav", - "speed": 1 - } - ], - "original_duration": 8.565, - "original_num_samples": 137040, - "transcript": "the roses are for they remind me of poor helen and the first work i did with david was arranging flowers like these for a dead baby's little coffin" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 13.565, - "num_samples": 217040, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0049.wav", - "speed": 1 - } - ], - "original_duration": 13.565, - "original_num_samples": 217040, - "transcript": "but i think few brides dress with a braver happier heart than mine though i do choose a sober wedding gown answered christie smiling again as she took from a half packed trunk her new hospital suit of soft gray woollen stuff" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.335, - "num_samples": 69360, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0050.wav", - "speed": 1 - } - ], - "original_duration": 4.335, - "original_num_samples": 69360, - "transcript": "mister power is waiting are you ready love quite ready" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 5.76, - "num_samples": 92160, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0051.wav", - "speed": 1 - } - ], - "original_duration": 5.76, - "original_num_samples": 92160, - "transcript": "you young folks take a wedding trip to the green house while we see how well we can get on without you" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 11.97, - "num_samples": 191520, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0052.wav", - "speed": 1 - } - ], - "original_duration": 11.97, - "original_num_samples": 191520, - "transcript": "david and christie went smiling away together and if they shed any tears over the brief happiness no one saw them but the flowers and they loyally kept the secret folded up in their tender hearts" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 20.64, - "num_samples": 330240, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0053.wav", - "speed": 1 - } - ], - "original_duration": 20.64, - "original_num_samples": 330240, - "transcript": "a very simple little marriage feast but more love good will and tender wishes adorned the plain table than is often found at wedding breakfasts and better than any speech or song was letty's broken whisper as she folded her arms round david's empty chair when no one saw her heaven bless and keep and bring him back to us" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 18.545063, - "num_samples": 296721, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0054.wav", - "speed": 1 - } - ], - "original_duration": 18.545063, - "original_num_samples": 296721, - "transcript": "all watched with quickened breath and proud souls that living wave blue below and bright with a steely glitter above as it flowed down the street and away to join the sea of dauntless hearts that for months had rolled up against the south and ebbed back reddened with the blood of men like these" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 3.895, - "num_samples": 62320, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0055.wav", - "speed": 1 - } - ], - "original_duration": 3.895, - "original_num_samples": 62320, - "transcript": "then she saw david and the regiment became one man to her" - }, - { - "files": [ - { - "channels": 1, - "sample_rate": 16000.0, - "bitrate": 16, - "duration": 4.155, - "num_samples": 66480, - "encoding": "Signed Integer PCM", - "silent": false, - "fname": "dev-clean-wav/3853/163249/3853-163249-0056.wav", - "speed": 1 - } - ], - "original_duration": 4.155, - "original_num_samples": 66480, - "transcript": "i could not love thee dear so much loved i not honor more" - } -] \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/environment.yml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/environment.yml deleted file mode 100644 index 4958247b7ff..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/environment.yml +++ /dev/null @@ -1,128 +0,0 @@ -name: mlperf-rnnt -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - absl-py=0.9.0=py36_0 - - blas=1.0=mkl - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2020.4.5.1=hecc5488_0 - - certifi=2020.4.5.1=py36h9f0ad1d_0 - - cffi=1.14.0=py36h2e261b9_0 - - cmake=3.14.0=h52cb24c_0 - - cudatoolkit=10.1.243=h6bb024c_0 - - cudatoolkit-dev=10.1.243=h516909a_3 - - expat=2.2.6=he6710b0_0 - - freetype=2.9.1=h8a8886c_1 - - gdb=8.3.1=py36h497da48_1 - - intel-openmp=2020.0=166 - - jpeg=9b=h024ee3a_2 - - krb5=1.17.1=h173b8e3_0 - - lame=3.100=h14c3975_1001 - - ld_impl_linux-64=2.33.1=h53a641e_7 - - libcurl=7.69.1=h20c2e04_0 - - libedit=3.1.20181209=hc058e9b_0 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=9.1.0=hdf63c60_0 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libpng=1.6.37=hbc83047_0 - - libssh2=1.9.0=h1ba5d50_1 - - libstdcxx-ng=9.1.0=hdf63c60_0 - - libtiff=4.1.0=h2733197_0 - - mad=0.15.1b=he1b5a44_0 - - mkl=2020.0=166 - - mkl-include=2020.0=166 - - mkl-service=2.3.0=py36he904b0f_0 - - mkl_fft=1.0.15=py36ha843d7b_0 - - mkl_random=1.1.0=py36hd6b4f25_0 - - ncurses=6.1=hf484d3e_1002 - - ninja=1.9.0=py36hfd86e86_0 - - numpy=1.18.1=py36h4f9e942_0 - - numpy-base=1.18.1=py36hde5b4d6_1 - - olefile=0.46=py_0 - - openssl=1.1.1g=h516909a_0 - - pillow=7.0.0=py36hb39fc2d_0 - - pip=20.0.2=py36_1 - - pycparser=2.20=py_0 - - python=3.6.10=h0371630_0 - - python_abi=3.6=1_cp36m - - pytorch=1.5.0=py3.6_cuda10.1.243_cudnn7.6.3_0 - - pyyaml=5.3.1=py36h7b6447c_0 - - readline=7.0=hf8c457e_1001 - - rhash=1.3.8=h1ba5d50_0 - - setuptools=46.1.3=py36_0 - - six=1.14.0=py36_0 - - sqlite=3.31.1=h7b6447c_0 - - tk=8.6.8=hbc83047_0 - - torchvision=0.6.0=py36_cu101 - - wheel=0.34.2=py36_0 - - xz=5.2.4=h14c3975_4 - - yaml=0.1.7=had09818_2 - - zlib=1.2.11=h7b6447c_3 - - zstd=1.3.7=h0b5b093_0 - - pip: - - ascii-graph==1.5.1 - - attrs==19.3.0 - - audioread==2.1.8 - - autopep8==1.5.1 - - backcall==0.1.0 - - chardet==3.0.4 - - coverage==5.0.4 - - decorator==4.4.2 - - entrypoints==0.3 - - flake8==3.7.9 - - grpcio==1.28.1 - - idna==2.9 - - importlib-metadata==1.6.0 - - inflect==4.1.0 - - ipdb==0.13.2 - - ipython==7.13.0 - - ipython-genutils==0.2.0 - - jedi==0.16.0 - - joblib==0.14.1 - - librosa==0.7.2 - - llvmlite==0.31.0 - - markdown==3.2.1 - - mccabe==0.6.1 - - more-itertools==8.2.0 - - numba==0.48.0 - - onnx==1.6.0 - - onnxruntime==1.2.0 - - packaging==20.3 - - pandas==0.24.2 - - parso==0.6.2 - - pexpect==4.8.0 - - pickleshare==0.7.5 - - pluggy==0.13.1 - - prompt-toolkit==3.0.5 - - protobuf==3.11.3 - - ptyprocess==0.6.0 - - py==1.8.1 - - pycodestyle==2.5.0 - - pyflakes==2.1.1 - - pygments==2.6.1 - - pyparsing==2.4.7 - - pytest==5.4.2 - - python-dateutil==2.8.1 - - pytz==2019.3 - - requests==2.23.0 - - resampy==0.2.2 - - scikit-learn==0.22.2.post1 - - scipy==1.4.1 - - soundfile==0.10.3.post1 - - sox==1.3.7 - - tensorboard==2.0.0 - - toml==0.10.0 - - tqdm==4.31.1 - - traitlets==4.3.3 - - typing-extensions==3.7.4.2 - - unidecode==1.1.1 - - urllib3==1.25.8 - - wcwidth==0.1.9 - - werkzeug==1.0.1 - - wrapt==1.10.11 - - zipp==3.1.0 -prefix: /cb/home/daniel/ws/miniconda3/envs/mlperf-rnnt - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/mlperf.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/mlperf.conf deleted file mode 100644 index 7f5b55b58e2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/mlperf.conf +++ /dev/null @@ -1,65 +0,0 @@ -# The format of this config file is 'key = value'. -# The key has the format 'model.scenario.key'. Value is mostly int64_t. -# Model maybe '*' as wildcard. In that case the value applies to all models. -# All times are in milli seconds - -# Set performance_sample_count for each model. -# User can optionally set this to higher values in user.conf. -mobilenet.*.performance_sample_count_override = 1024 -gnmt.*.performance_sample_count_override = 3903900 -resnet50.*.performance_sample_count_override = 1024 -ssd-mobilenet.*.performance_sample_count_override = 256 -ssd-resnet34.*.performance_sample_count_override = 64 -bert.*.performance_sample_count_override = 10833 -dlrm.*.performance_sample_count_override = 204800 -rnnt.*.performance_sample_count_override = 2513 -3d-unet.*.performance_sample_count_override = 16 - -# Set seeds. The seeds will be distributed two weeks before the submission. -*.*.qsl_rng_seed = 12786827339337101903 -*.*.sample_index_rng_seed = 12640797754436136668 -*.*.schedule_rng_seed = 3135815929913719677 - -*.SingleStream.target_latency_percentile = 90 -*.SingleStream.min_duration = 60000 -*.SingleStream.min_query_count = 1024 - -*.MultiStream.target_qps = 20 -*.MultiStream.target_latency_percentile = 99 -*.MultiStream.max_async_queries = 1 -*.MultiStream.target_latency = 50 -*.MultiStream.min_duration = 60000 -*.MultiStream.min_query_count = 270336 -ssd-resnet34.MultiStream.target_qps = 15 -ssd-resnet34.MultiStream.target_latency = 66 -gnmt.MultiStream.min_query_count = 90112 -gnmt.MultiStream.target_latency = 100 -gnmt.MultiStream.target_qps = 10 -gnmt.MultiStream.target_latency_percentile = 97 - -*.Server.target_latency = 10 -*.Server.target_latency_percentile = 99 -*.Server.target_duration = 0 -*.Server.min_duration = 60000 -*.Server.min_query_count = 270336 -resnet50.Server.target_latency = 15 -ssd-resnet34.Server.target_latency = 100 -gnmt.Server.min_query_count = 90112 -gnmt.Server.target_latency = 250 -gnmt.Server.target_latency_percentile = 97 -bert.Server.target_latency = 130 -dlrm.Server.target_latency = 30 -rnnt.Server.target_latency = 1000 - -*.Offline.target_latency_percentile = 90 -*.Offline.min_duration = 60000 -# In Offline scenario, we always have one query. But LoadGen maps this to -# min_sample_count internally in Offline scenario, so set this to 24576 since -# the rule requires that Offline scenario run for at least 24576 samples. -*.Offline.min_query_count = 24576 - -# These fields should be defined and overridden by user.conf. -*.SingleStream.target_latency = 10 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 -*.MultiStream.samples_per_query = 4 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_dataset.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_dataset.sh deleted file mode 100644 index 2c517ee70ba..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_dataset.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - prepare_dataset - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --download_dir=*) - download_dir=$(echo $var |cut -f2 -d=) - ;; - --convert_dir=*) - convert_dir=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - - mkdir -p $download_dir $convert_dir -} - -# prepare_dataset -function prepare_dataset { - # if you already have origin dataset, set stage=2, make sure to extract it \ - # and change the origin dataset path to your path - stage=1 - - # Download dataset - if [[ $stage -le 1 ]]; then - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $download_dir \ - -e $download_dir - fi - - # Convert dataset - if [[ $stage -le 2 ]]; then - python pytorch/utils/convert_librispeech.py \ - --input_dir $download_dir/LibriSpeech/dev-clean \ - --dest_dir $convert_dir/dev-clean-wav \ - --output_json $convert_dir/dev-clean-wav.json - fi -} - -main "$@" \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_loadgen.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_loadgen.sh deleted file mode 100644 index e04a48d2c9c..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/prepare_loadgen.sh +++ /dev/null @@ -1,10 +0,0 @@ -pushd . -echo "Install loadgen" -git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference -cd mlperf_inference -git checkout r1.1 -git log -1 -git submodule update --init --recursive -cd loadgen -CFLAGS="-std=c++14" python setup.py install -popd diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/Dockerfile b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/Dockerfile deleted file mode 100644 index 1cb52bf6261..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3 -FROM ${FROM_IMAGE_NAME} - - -RUN apt-get update && apt-get install -y libsndfile1 && apt-get install -y sox && rm -rf /var/lib/apt/lists/* - -RUN COMMIT_SHA=c6d12f9e1562833c2b4e7ad84cb22aa4ba31d18c && \ - git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer && \ - cd deps/warp-transducer && \ - git checkout $COMMIT_SHA && \ - mkdir build && \ - cd build && \ - cmake .. && \ - make VERBOSE=1 && \ - export CUDA_HOME="/usr/local/cuda" && \ - export WARP_RNNT_PATH=`pwd` && \ - export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME && \ - export LD_LIBRARY_PATH="$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" && \ - export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH && \ - export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH && \ - export CFLAGS="-I$CUDA_HOME/include $CFLAGS" && \ - cd ../pytorch_binding && \ - python3 setup.py install --user && \ - rm -rf ../tests test ../tensorflow_binding && \ - cd ../../.. - -WORKDIR /workspace/jasper - -COPY requirements.txt . -RUN pip install --disable-pip-version-check -U -r requirements.txt - -COPY . . diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/LICENSE deleted file mode 100644 index 75ee157cd96..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/LICENSE +++ /dev/null @@ -1,204 +0,0 @@ - Except where otherwise noted, the following license applies to all files in this repo. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2019 NVIDIA Corporation - Copyright 2019 Myrtle Software Limited, www.myrtle.ai - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/NOTICE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/NOTICE deleted file mode 100644 index 7916839bcc4..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Jasper in PyTorch - -This repository includes source code (in "parts/") from: -* https://github.com/keithito/tacotron and https://github.com/ryanleary/patter licensed under MIT license. - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/configs/rnnt.toml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/configs/rnnt.toml deleted file mode 100644 index a4cd1dfb470..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/configs/rnnt.toml +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model = "RNNT" - -[input] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 # TODO -max_duration = 16.7 -speed_perturbation = true - - -cutout_rect_regions = 0 -cutout_rect_time = 60 -cutout_rect_freq = 25 - - -cutout_x_regions = 2 -cutout_y_regions = 2 -cutout_x_width = 6 -cutout_y_width = 6 - - -[input_eval] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 - - -[rnnt] -rnn_type = "lstm" -encoder_n_hidden = 1024 -encoder_pre_rnn_layers = 2 -encoder_stack_time_factor = 2 -encoder_post_rnn_layers = 3 -pred_n_hidden = 320 -pred_rnn_layers = 2 -forget_gate_bias = 1.0 -joint_n_hidden = 512 -dropout=0.32 - - -[labels] -labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/dataset.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/dataset.py deleted file mode 100644 index 7b9036f1c55..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/dataset.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This file contains classes and functions related to data loading -""" -from collections import namedtuple -import torch -import numpy as np -from torch.utils.data import Dataset -from parts.manifest import Manifest -from parts.features import WaveformFeaturizer - - -def seq_collate_fn(batch): - """batches samples and returns as tensors - Args: - batch : list of samples - Returns - batches of tensors - """ - audio_lengths = torch.LongTensor([sample.waveform.size(0) - for sample in batch]) - transcript_lengths = torch.LongTensor([sample.transcript.size(0) - for sample in batch]) - permute_indices = torch.argsort(audio_lengths, descending=True) - - audio_lengths = audio_lengths[permute_indices] - transcript_lengths = transcript_lengths[permute_indices] - padded_audio_signals = torch.nn.utils.rnn.pad_sequence( - [batch[i].waveform for i in permute_indices], - batch_first=True - ) - transcript_list = [batch[i].transcript - for i in permute_indices] - packed_transcripts = torch.nn.utils.rnn.pack_sequence(transcript_list, - enforce_sorted=False) - - # TODO: Don't I need to stop grad at some point now? - return (padded_audio_signals, audio_lengths, transcript_list, - packed_transcripts, transcript_lengths) - - -class AudioToTextDataLayer: - """Data layer with data loader - """ - - def __init__(self, **kwargs): - featurizer_config = kwargs['featurizer_config'] - pad_to_max = kwargs.get('pad_to_max', False) - perturb_config = kwargs.get('perturb_config', None) - manifest_filepath = kwargs['manifest_filepath'] - dataset_dir = kwargs['dataset_dir'] - labels = kwargs['labels'] - batch_size = kwargs['batch_size'] - drop_last = kwargs.get('drop_last', False) - shuffle = kwargs.get('shuffle', True) - min_duration = featurizer_config.get('min_duration', 0.1) - max_duration = featurizer_config.get('max_duration', None) - normalize_transcripts = kwargs.get('normalize_transcripts', True) - trim_silence = kwargs.get('trim_silence', False) - sampler_type = kwargs.get('sampler', 'default') - speed_perturbation = featurizer_config.get('speed_perturbation', False) - sort_by_duration = sampler_type == 'bucket' - self._featurizer = WaveformFeaturizer.from_config( - featurizer_config, perturbation_configs=perturb_config) - self._dataset = AudioDataset( - dataset_dir=dataset_dir, - manifest_filepath=manifest_filepath, - labels=labels, blank_index=len(labels), - sort_by_duration=sort_by_duration, - pad_to_max=pad_to_max, - featurizer=self._featurizer, max_duration=max_duration, - min_duration=min_duration, normalize=normalize_transcripts, - trim=trim_silence, speed_perturbation=speed_perturbation) - - print('sort_by_duration', sort_by_duration) - - self._dataloader = torch.utils.data.DataLoader( - dataset=self._dataset, - batch_size=batch_size, - collate_fn=lambda b: seq_collate_fn(b), - drop_last=drop_last, - shuffle=shuffle, - num_workers=0, - pin_memory=True, - sampler=None - ) - - def __len__(self): - return len(self._dataset) - - @property - def data_iterator(self): - return self._dataloader - - -class AudioDataset(Dataset): - def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False, - min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False, - trim=False, speed_perturbation=False): - """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations - (in seconds). Each entry is a different audio sample. - Args: - dataset_dir: absolute path to dataset folder - manifest_filepath: relative path from dataset folder to manifest json as described above. - labels: String containing all the possible characters to map to - featurizer: Initialized featurizer class that converts paths of audio to feature tensors - max_duration: If audio exceeds this length, do not include in dataset - min_duration: If audio is less than this length, do not include in dataset - pad_to_max: if specified input sequences into dnn model will be padded to max_duration - blank_index: blank index for ctc loss / decoder - max_utts: Limit number of utterances - normalize: whether to normalize transcript text - sort_by_duration: whether or not to sort sequences by increasing duration - trim: if specified trims leading and trailing silence from an audio signal. - speed_perturbation: specify if using data contains speed perburbation - """ - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max, - max_duration=max_duration, - sort_by_duration=sort_by_duration, - min_duration=min_duration, max_utts=max_utts, - normalize=normalize, speed_perturbation=speed_perturbation) - self.featurizer = featurizer - self.blank_index = blank_index - self.trim = trim - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600)) - - def __getitem__(self, index): - sample = self.manifest[index] - rn_indx = np.random.randint(len(sample['audio_filepath'])) - duration = sample['audio_duration'][rn_indx] if 'audio_duration' in sample else 0 - offset = sample['offset'] if 'offset' in sample else 0 - features = self.featurizer.process(sample['audio_filepath'][rn_indx], - offset=offset, duration=duration, - trim=self.trim) - - AudioSample = namedtuple('AudioSample', ['waveform', - 'transcript']) - return AudioSample(features, - torch.LongTensor(sample["transcript"])) - - def __len__(self): - return len(self.manifest) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/decoders.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/decoders.py deleted file mode 100644 index 2ce25ac8ac5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/decoders.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Optional, Tuple - -import torch - -import torch.nn.functional as F -from model_separable_rnnt import label_collate - - -class ScriptGreedyDecoder(torch.nn.Module): - """A greedy transducer decoder. - - Args: - blank_symbol: See `Decoder`. - model: Model to use for prediction. - max_symbols_per_step: The maximum number of symbols that can be added - to a sequence in a single time step; if set to None then there is - no limit. - cutoff_prob: Skip to next step in search if current highest character - probability is less than this. - """ - - def __init__(self, blank_index, model, max_symbols_per_step=30): - super().__init__() - # assert isinstance(model, torch.jit.ScriptModule) - # assert not model.training - self.eval() - self._model = model - self._blank_id = blank_index - self._SOS = -1 - assert max_symbols_per_step > 0 - self._max_symbols_per_step = max_symbols_per_step - - @torch.jit.export - def forward(self, x: torch.Tensor, out_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]: - """Returns a list of sentences given an input batch. - - Args: - x: A tensor of size (batch, channels, features, seq_len) - TODO was (seq_len, batch, in_features). - out_lens: list of int representing the length of each sequence - output sequence. - - Returns: - list containing batch number of sentences (strings). - """ - # Apply optional preprocessing - - logits, logits_lens = self._model.encoder(x, out_lens) - - output: List[List[int]] = [] - for batch_idx in range(logits.size(0)): - inseq = logits[batch_idx, :, :].unsqueeze(1) - # inseq: TxBxF - logitlen = logits_lens[batch_idx] - sentence = self._greedy_decode(inseq, logitlen) - output.append(sentence) - - return logits, logits_lens, output - - def _greedy_decode(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]: - hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - label: List[int] = [] - for time_idx in range(int(out_len.item())): - f = x[time_idx, :, :].unsqueeze(0) - - not_blank = True - symbols_added = 0 - - while not_blank and symbols_added < self._max_symbols_per_step: - g, hidden_prime = self._pred_step( - self._get_last_symb(label), - hidden - ) - logp = self._joint_step(f, g, log_normalize=False)[0, :] - - # get index k, of max prob - v, k = logp.max(0) - k = k.item() - - if k == self._blank_id: - not_blank = False - else: - label.append(k) - hidden = hidden_prime - symbols_added += 1 - - return label - - def _pred_step(self, label: int, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - if label == self._SOS: - return self._model.prediction(None, hidden) - if label > self._blank_id: - label -= 1 - label = torch.tensor([[label]], dtype=torch.int64) - return self._model.prediction(label, hidden) - - def _joint_step(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor: - logits = self._model.joint(enc, pred)[:, 0, 0, :] - if not log_normalize: - return logits - - probs = F.log_softmax(logits, dim=len(logits.shape) - 1) - - return probs - - def _get_last_symb(self, labels: List[int]) -> int: - return self._SOS if len(labels) == 0 else labels[-1] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/helpers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/helpers.py deleted file mode 100644 index cfe3b66f3c8..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/helpers.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from enum import Enum -from metrics import word_error_rate - - -class Optimization(Enum): - """Various levels of Optimization. - WARNING: This might have effect on model accuracy.""" - nothing = 0 - mxprO0 = 1 - mxprO1 = 2 - mxprO2 = 3 - mxprO3 = 4 - - -AmpOptimizations = {Optimization.mxprO0: "O0", - Optimization.mxprO1: "O1", - Optimization.mxprO2: "O2", - Optimization.mxprO3: "O3"} - - -def add_blank_label(labels): - if not isinstance(labels, list): - raise ValueError("labels must be a list of symbols") - labels.append("") - return labels - - -def __rnnt_decoder_predictions_tensor(tensor, labels): - """ - Takes output of greedy rnnt decoder and converts to strings. - Args: - tensor: model output tensor - label: A list of labels - Returns: - prediction - """ - hypotheses = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - # iterate over batch - for ind in range(len(tensor)): - hypothesis = ''.join([labels_map[c] for c in tensor[ind]]) - hypotheses.append(hypothesis) - return hypotheses - - -def __gather_predictions(predictions_list: list, labels: list) -> list: - results = [] - for prediction in predictions_list: - results += __rnnt_decoder_predictions_tensor(prediction, labels=labels) - return results - - -def __gather_transcripts(transcript_list: list, transcript_len_list: list, - labels: list) -> list: - results = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - for i, t in enumerate(transcript_list): - target = t.numpy().tolist() - reference = ''.join([labels_map[c] for c in target]) - results.append(reference) - return results - - -def process_evaluation_batch(tensors: dict, global_vars: dict, labels: list): - """ - Processes results of an iteration and saves it in global_vars - Args: - tensors: dictionary with results of an evaluation iteration, e.g. loss, predictions, transcript, and output - global_vars: dictionary where processes results of iteration are saved - labels: A list of labels - """ - for kv, v in tensors.items(): - if kv.startswith('predictions'): - global_vars['predictions'] += __gather_predictions( - v, labels=labels) - elif kv.startswith('transcript_length'): - transcript_len_list = v - elif kv.startswith('transcript'): - transcript_list = v - - global_vars['transcripts'] += __gather_transcripts(transcript_list, - transcript_len_list, - labels=labels) - - -def process_evaluation_epoch(global_vars: dict, tag=None): - """ - Processes results from each worker at the end of evaluation and combine to final result - Args: - global_vars: dictionary containing information of entire evaluation - Return: - wer: final word error rate - loss: final loss - """ - hypotheses = global_vars['predictions'] - references = global_vars['transcripts'] - - wer, scores, num_words = word_error_rate( - hypotheses=hypotheses, references=references) - return wer - - -def print_dict(d): - maxLen = max([len(ii) for ii in d.keys()]) - fmtString = '\t%' + str(maxLen) + 's : %s' - print('Arguments:') - for keyPair in sorted(d.items()): - print(fmtString % keyPair) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/metrics.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/metrics.py deleted file mode 100644 index 5426e37237a..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/metrics.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List - - -def __levenshtein(a: List, b: List) -> int: - """Calculates the Levenshtein distance between a and b. - """ - n, m = len(a), len(b) - if n > m: - # Make sure n <= m, to use O(min(n,m)) space - a, b = b, a - n, m = m, n - - current = list(range(n + 1)) - for i in range(1, m + 1): - previous, current = current, [i] + [0] * n - for j in range(1, n + 1): - add, delete = previous[j] + 1, current[j - 1] + 1 - change = previous[j - 1] - if a[j - 1] != b[i - 1]: - change = change + 1 - current[j] = min(add, delete, change) - - return current[n] - - -def word_error_rate(hypotheses: List[str], references: List[str]) -> float: - """ - Computes Average Word Error rate between two texts represented as - corresponding lists of string. Hypotheses and references must have same length. - - Args: - hypotheses: list of hypotheses - references: list of references - - Returns: - (float) average word error rate - """ - scores = 0 - words = 0 - if len(hypotheses) != len(references): - raise ValueError("In word error rate calculation, hypotheses and reference" - " lists must have the same number of elements. But I got:" - "{0} and {1} correspondingly".format(len(hypotheses), len(references))) - for h, r in zip(hypotheses, references): - h_list = h.split() - r_list = r.split() - words += len(r_list) - scores += __levenshtein(h_list, r_list) - if words != 0: - wer = (1.0 * scores) / words - else: - wer = float('inf') - return wer, scores, words diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/model_separable_rnnt.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/model_separable_rnnt.py deleted file mode 100644 index 68a0ed6b5e5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/model_separable_rnnt.py +++ /dev/null @@ -1,214 +0,0 @@ -from typing import Optional, Tuple - -import numpy as np -import torch - -from rnn import rnn -from rnn import StackTime - - -class RNNT(torch.nn.Module): - def __init__(self, rnnt=None, num_classes=1, **kwargs): - super().__init__() - if kwargs.get("no_featurizer", False): - in_features = kwargs.get("in_features") - else: - feat_config = kwargs.get("feature_config") - # This may be useful in the future, for MLPerf - # configuration. - in_features = feat_config['features'] * \ - feat_config.get("frame_splicing", 1) - - self.encoder = Encoder(in_features, - rnnt["encoder_n_hidden"], - rnnt["encoder_pre_rnn_layers"], - rnnt["encoder_post_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["encoder_stack_time_factor"], - rnnt["dropout"], - ) - - self.prediction = Prediction( - num_classes, - rnnt["pred_n_hidden"], - rnnt["pred_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["dropout"], - ) - - self.joint = Joint( - num_classes, - rnnt["pred_n_hidden"], - rnnt["encoder_n_hidden"], - rnnt["joint_n_hidden"], - rnnt["dropout"], - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - return self.encoder(x_padded, x_lens) - - -class Encoder(torch.nn.Module): - def __init__(self, in_features, encoder_n_hidden, - encoder_pre_rnn_layers, encoder_post_rnn_layers, - forget_gate_bias, norm, rnn_type, encoder_stack_time_factor, - dropout): - super().__init__() - self.pre_rnn = rnn( - rnn=rnn_type, - input_size=in_features, - hidden_size=encoder_n_hidden, - num_layers=encoder_pre_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - self.stack_time = StackTime(factor=encoder_stack_time_factor) - self.post_rnn = rnn( - rnn=rnn_type, - input_size=encoder_stack_time_factor * encoder_n_hidden, - hidden_size=encoder_n_hidden, - num_layers=encoder_post_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - norm_first_rnn=True, - dropout=dropout, - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - x_padded, _ = self.pre_rnn(x_padded, None) - x_padded, x_lens = self.stack_time(x_padded, x_lens) - # (T, B, H) - x_padded, _ = self.post_rnn(x_padded, None) - # (B, T, H) - x_padded = x_padded.transpose(0, 1) - return x_padded, x_lens - -class Prediction(torch.nn.Module): - def __init__(self, vocab_size, n_hidden, pred_rnn_layers, - forget_gate_bias, norm, rnn_type, dropout): - super().__init__() - self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden) - self.n_hidden = n_hidden - self.dec_rnn = rnn( - rnn=rnn_type, - input_size=n_hidden, - hidden_size=n_hidden, - num_layers=pred_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - - def forward(self, y: Optional[torch.Tensor], - state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - """ - B - batch size - U - label length - H - Hidden dimension size - L - Number of decoder layers = 2 - - Args: - y: (B, U) - - Returns: - Tuple (g, hid) where: - g: (B, U + 1, H) - hid: (h, c) where h is the final sequence hidden state and c is - the final cell state: - h (tensor), shape (L, B, H) - c (tensor), shape (L, B, H) - """ - if y is None: - # This is gross. I should really just pass in an SOS token - # instead. Is there no SOS token? - assert state is None - # Hacky, no way to determine this right now! - B = 1 - y = torch.zeros((B, 1, self.n_hidden), dtype=torch.float32) - else: - y = self.embed(y) - - # if state is None: - # batch = y.size(0) - # state = [ - # (torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device), - # torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device)) - # for _ in range(self.pred_rnn_layers) - # ] - - y = y.transpose(0, 1) # .contiguous() # (U + 1, B, H) - g, hid = self.dec_rnn(y, state) - g = g.transpose(0, 1) # .contiguous() # (B, U + 1, H) - # del y, state - return g, hid - -class Joint(torch.nn.Module): - def __init__(self, vocab_size, pred_n_hidden, enc_n_hidden, - joint_n_hidden, dropout): - super().__init__() - layers = [ - torch.nn.Linear(pred_n_hidden + enc_n_hidden, joint_n_hidden), - torch.nn.ReLU(), - ] + ([torch.nn.Dropout(p=dropout), ] if dropout else []) + [ - torch.nn.Linear(joint_n_hidden, vocab_size) - ] - self.net = torch.nn.Sequential( - *layers - ) - - def forward(self, f: torch.Tensor, g: torch.Tensor): - """ - f should be shape (B, T, H) - g should be shape (B, U + 1, H) - - returns: - logits of shape (B, T, U, K + 1) - """ - # Combine the input states and the output states - B, T, H = f.shape - B, U_, H2 = g.shape - - f = f.unsqueeze(dim=2) # (B, T, 1, H) - f = f.expand((B, T, U_, H)) - - g = g.unsqueeze(dim=1) # (B, 1, U + 1, H) - g = g.expand((B, T, U_, H2)) - - inp = torch.cat([f, g], dim=3) # (B, T, U, 2H) - res = self.net(inp) - # del f, g, inp - return res - -def label_collate(labels): - """Collates the label inputs for the rnn-t prediction network. - - If `labels` is already in torch.Tensor form this is a no-op. - - Args: - labels: A torch.Tensor List of label indexes or a torch.Tensor. - - Returns: - A padded torch.Tensor of shape (batch, max_seq_len). - """ - - if isinstance(labels, torch.Tensor): - return labels.type(torch.int64) - if not isinstance(labels, (list, tuple)): - raise ValueError( - f"`labels` should be a list or tensor not {type(labels)}" - ) - - batch_size = len(labels) - max_len = max(len(l) for l in labels) - - cat_labels = np.full((batch_size, max_len), fill_value=0.0, dtype=np.int32) - for e, l in enumerate(labels): - cat_labels[e, :len(l)] = l - labels = torch.LongTensor(cat_labels) - - return labels diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/features.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/features.py deleted file mode 100644 index 5a1309758eb..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/features.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn -import math -import librosa -from .segment import AudioSegment - - -class WaveformFeaturizer(object): - def __init__(self, input_cfg): - self.cfg = input_cfg - - def process(self, file_path, offset=0, duration=0, trim=False): - audio = AudioSegment.from_file(file_path, - target_sr=self.cfg['sample_rate'], - int_values=self.cfg.get( - 'int_values', False), - offset=offset, duration=duration, trim=trim) - return self.process_segment(audio) - - def process_segment(self, audio_segment): - return torch.tensor(audio_segment.samples, dtype=torch.float) - - @classmethod - def from_config(cls, input_config, perturbation_configs=None): - return cls(input_config) - - -constant = 1e-5 - - -def normalize_batch(x, seq_len, normalize_type): - if normalize_type == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - return (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif normalize_type == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - return (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - return x - - -def splice_frames(x, frame_splicing): - """ Stacks frames together across feature dim - - input is batch_size, feature_dim, num_frames - output is batch_size, feature_dim*frame_splicing, num_frames - - """ - seq = [x] - for n in range(1, frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - return torch.cat(seq, dim=1)[:, :, ::frame_splicing] - - -class FilterbankFeatures(nn.Module): - def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, - window="hamming", normalize="per_feature", n_fft=None, - preemph=0.97, - nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant, - pad_to=8, - max_duration=16.7, - frame_splicing=1): - super(FilterbankFeatures, self).__init__() -# print("PADDING: {}".format(pad_to)) - - torch_windows = { - 'hann': torch.hann_window, - 'hamming': torch.hamming_window, - 'blackman': torch.blackman_window, - 'bartlett': torch.bartlett_window, - 'none': None, - } - - self.win_length = int(sample_rate * window_size) # frame size - self.hop_length = int(sample_rate * window_stride) - self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) - - self.normalize = normalize - self.log = log - self.dither = dither - self.frame_splicing = frame_splicing - self.nfilt = nfilt - self.preemph = preemph - self.pad_to = pad_to - # For now, always enable this. - # See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration - self.use_deterministic_dithering = True - highfreq = highfreq or sample_rate / 2 - window_fn = torch_windows.get(window, None) - window_tensor = window_fn(self.win_length, - periodic=False) if window_fn else None - filterbanks = torch.tensor( - librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, - fmax=highfreq), dtype=torch.float).unsqueeze(0) - # self.fb = filterbanks - # self.window = window_tensor - self.register_buffer("fb", filterbanks) - self.register_buffer("window", window_tensor) - # Calculate maximum sequence length (# frames) - max_length = 1 + math.ceil( - (max_duration * sample_rate - self.win_length) / self.hop_length - ) - max_pad = 16 - (max_length % 16) - self.max_length = max_length + max_pad - - def get_seq_len(self, seq_len): - seq_len = (seq_len + self.hop_length - 1) // self.hop_length - seq_len = (seq_len + self.frame_splicing - 1) // self.frame_splicing - return seq_len - - @torch.no_grad() - def forward(self, inp: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: - x, seq_len = inp - - dtype = x.dtype - - seq_len = self.get_seq_len(seq_len) - - # dither - if self.dither > 0 and not self.use_deterministic_dithering: - x += self.dither * torch.randn_like(x) - - # do preemphasis - # Ideally, we would mask immediately after this... Ugh :( - if self.preemph is not None: - x = torch.cat((x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]), - dim=1) - - # do stft - x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, - win_length=self.win_length, - center=True, window=self.window.to(dtype=torch.float), - return_complex=False - ) - # get power spectrum - x = x.pow(2).sum(-1) - - if self.dither > 0 and self.use_deterministic_dithering: - x = x + self.dither ** 2 - # dot with filterbank energies - x = torch.matmul(self.fb.to(x.dtype), x) - - # log features if required - if self.log: - x = torch.log(x + 1e-20) - - # frame splicing if required - if self.frame_splicing > 1: - seq = [x] - for n in range(1, self.frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - x = torch.cat(seq, dim=1)[:, :, ::self.frame_splicing] - - # normalize if required - constant = 1e-5 - if self.normalize == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif self.normalize == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - x = x - - # Hmmm... They don't do any masking anymore. Seems concerning! - - # mask to zero any values beyond seq_len in batch, pad to multiple of `pad_to` (for efficiency) - # max_len = x.size(-1) - x = x[:, :, :seq_len.max()] # rnnt loss requires lengths to match - # mask = torch.arange(max_len).to(seq_len.dtype).to(x.device).expand(x.size(0), - # max_len) >= seq_len.unsqueeze(1) - - # x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0) - pad_to = self.pad_to - if pad_to != 0: - raise NotImplementedError() - # if pad_to == "max": - # x = nn.functional.pad(x, (0, self.max_length - x.size(-1))) - # elif pad_to > 0: - # pad_amt = x.size(-1) % pad_to - # if pad_amt != 0: - # x = nn.functional.pad(x, (0, pad_to - pad_amt)) - - return x.to(dtype) - - @classmethod - def from_config(cls, cfg, log=False): - return cls(sample_rate=cfg['sample_rate'], window_size=cfg['window_size'], - window_stride=cfg['window_stride'], n_fft=cfg['n_fft'], - nfilt=cfg['features'], window=cfg['window'], - normalize=cfg['normalize'], - max_duration=cfg.get('max_duration', 16.7), - dither=cfg['dither'], pad_to=cfg.get("pad_to", 0), - frame_splicing=cfg.get("frame_splicing", 1), log=log) - - -class FeatureFactory(object): - featurizers = { - "logfbank": FilterbankFeatures, - "fbank": FilterbankFeatures, - } - - def __init__(self): - pass - - @classmethod - def from_config(cls, cfg): - feat_type = cfg.get('feat_type', "logspect") - featurizer = cls.featurizers[feat_type] - # return featurizer.from_config(cfg, log="log" in cfg['feat_type']) - return featurizer.from_config(cfg, log="log" in feat_type) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/manifest.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/manifest.py deleted file mode 100644 index fb04c5da882..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/manifest.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import string -import os - -from .text import _clean_text - - -def normalize_string(s, labels, table, **unused_kwargs): - """ - Normalizes string. For example: - 'call me at 8:00 pm!' -> 'call me at eight zero pm' - - Args: - s: string to normalize - labels: labels used during model training. - - Returns: - Normalized string - """ - - def good_token(token, labels): - s = set(labels) - for t in token: - if t not in s: - return False - return True - - try: - text = _clean_text(s, ["english_cleaners"], table).strip() - return ''.join([t for t in text if good_token(t, labels=labels)]) - except: - print("WARNING: Normalizing {} failed".format(s)) - return None - - -class Manifest(object): - def __init__(self, data_dir, manifest_paths, labels, blank_index, max_duration=None, pad_to_max=False, - min_duration=None, sort_by_duration=False, max_utts=0, - normalize=True, speed_perturbation=False, filter_speed=1.0): - self.labels_map = dict([(labels[i], i) for i in range(len(labels))]) - self.blank_index = blank_index - self.max_duration = max_duration - ids = [] - duration = 0.0 - filtered_duration = 0.0 - - # If removing punctuation, make a list of punctuation to remove - table = None - if normalize: - # Punctuation to remove - punctuation = string.punctuation - punctuation = punctuation.replace("+", "") - punctuation = punctuation.replace("&", "") - # We might also want to consider: - # @ -> at - # -> number, pound, hashtag - # ~ -> tilde - # _ -> underscore - # % -> percent - # If a punctuation symbol is inside our vocab, we do not remove from text - for l in labels: - punctuation = punctuation.replace(l, "") - # Turn all punctuation to whitespace - table = str.maketrans(punctuation, " " * len(punctuation)) - for manifest_path in manifest_paths: - with open(manifest_path, "r", encoding="utf-8") as fh: - a = json.load(fh) - for data in a: - files_and_speeds = data['files'] - - if pad_to_max: - if not speed_perturbation: - min_speed = filter_speed - else: - min_speed = min(x['speed'] - for x in files_and_speeds) - max_duration = self.max_duration * min_speed - - data['duration'] = data['original_duration'] - if min_duration is not None and data['duration'] < min_duration: - filtered_duration += data['duration'] - continue - if max_duration is not None and data['duration'] > max_duration: - filtered_duration += data['duration'] - continue - - # Prune and normalize according to transcript - transcript_text = data[ - 'transcript'] if "transcript" in data else self.load_transcript( - data['text_filepath']) - if normalize: - transcript_text = normalize_string(transcript_text, labels=labels, - table=table) - if not isinstance(transcript_text, str): - print( - "WARNING: Got transcript: {}. It is not a string. Dropping data point".format( - transcript_text)) - filtered_duration += data['duration'] - continue - data["transcript"] = self.parse_transcript( - transcript_text) # convert to vocab indices - - if speed_perturbation: - audio_paths = [x['fname'] for x in files_and_speeds] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds] - else: - audio_paths = [ - x['fname'] for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_filepath'] = [os.path.join( - data_dir, x) for x in audio_paths] - data.pop('files') - data.pop('original_duration') - - ids.append(data) - duration += data['duration'] - - if max_utts > 0 and len(ids) >= max_utts: - print( - 'Stopping parsing %s as max_utts=%d' % (manifest_path, max_utts)) - break - - if sort_by_duration: - ids = sorted(ids, key=lambda x: x['duration']) - self._data = ids - self._size = len(ids) - self._duration = duration - self._filtered_duration = filtered_duration - - def load_transcript(self, transcript_path): - with open(transcript_path, 'r', encoding="utf-8") as transcript_file: - transcript = transcript_file.read().replace('\n', '') - return transcript - - def parse_transcript(self, transcript): - chars = [self.labels_map.get(x, self.blank_index) - for x in list(transcript)] - transcript = list(filter(lambda x: x != self.blank_index, chars)) - return transcript - - def __getitem__(self, item): - return self._data[item] - - def __len__(self): - return self._size - - def __iter__(self): - return iter(self._data) - - @property - def duration(self): - return self._duration - - @property - def filtered_duration(self): - return self._filtered_duration - - @property - def data(self): - return list(self._data) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/segment.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/segment.py deleted file mode 100644 index 08aa5c6a492..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/segment.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import librosa -import soundfile as sf - - -class AudioSegment(object): - """Monaural audio segment abstraction. - :param samples: Audio samples [num_samples x num_channels]. - :type samples: ndarray.float32 - :param sample_rate: Audio sample rate. - :type sample_rate: int - :raises TypeError: If the sample data type is not float or int. - """ - - def __init__(self, samples, sample_rate, target_sr=None, trim=False, - trim_db=60): - """Create audio segment from samples. - Samples are convert float32 internally, with int scaled to [-1, 1]. - """ - samples = self._convert_samples_to_float32(samples) - if target_sr is not None and target_sr != sample_rate: - samples = librosa.core.resample(samples, sample_rate, target_sr) - sample_rate = target_sr - if trim: - samples, _ = librosa.effects.trim(samples, trim_db) - self._samples = samples - self._sample_rate = sample_rate - if self._samples.ndim >= 2: - self._samples = np.mean(self._samples, 1) - - def __eq__(self, other): - """Return whether two objects are equal.""" - if type(other) is not type(self): - return False - if self._sample_rate != other._sample_rate: - return False - if self._samples.shape != other._samples.shape: - return False - if np.any(self.samples != other._samples): - return False - return True - - def __ne__(self, other): - """Return whether two objects are unequal.""" - return not self.__eq__(other) - - def __str__(self): - """Return human-readable representation of segment.""" - return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " - "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate, - self.duration, self.rms_db)) - - @staticmethod - def _convert_samples_to_float32(samples): - """Convert sample type to float32. - Audio sample type is usually integer or float-point. - Integers will be scaled to [-1, 1] in float32. - """ - float32_samples = samples.astype('float32') - if samples.dtype in np.sctypes['int']: - bits = np.iinfo(samples.dtype).bits - float32_samples *= (1. / 2 ** (bits - 1)) - elif samples.dtype in np.sctypes['float']: - pass - else: - raise TypeError("Unsupported sample type: %s." % samples.dtype) - return float32_samples - - @classmethod - def from_file(cls, filename, target_sr=None, int_values=False, offset=0, - duration=0, trim=False): - """ - Load a file supported by librosa and return as an AudioSegment. - :param filename: path of file to load - :param target_sr: the desired sample rate - :param int_values: if true, load samples as 32-bit integers - :param offset: offset in seconds when loading audio - :param duration: duration in seconds when loading audio - :return: numpy array of samples - """ - with sf.SoundFile(filename, 'r') as f: - dtype = 'int32' if int_values else 'float32' - sample_rate = f.samplerate - if offset > 0: - f.seek(int(offset * sample_rate)) - if duration > 0: - samples = f.read(int(duration * sample_rate), dtype=dtype) - else: - samples = f.read(dtype=dtype) - samples = samples.transpose() - return cls(samples, sample_rate, target_sr=target_sr, trim=trim) - - @property - def samples(self): - return self._samples.copy() - - @property - def sample_rate(self): - return self._sample_rate - - @property - def num_samples(self): - return self._samples.shape[0] - - @property - def duration(self): - return self._samples.shape[0] / float(self._sample_rate) - - @property - def rms_db(self): - mean_square = np.mean(self._samples ** 2) - return 10 * np.log10(mean_square) - - def gain_db(self, gain): - self._samples *= 10. ** (gain / 20.) - - def pad(self, pad_size, symmetric=False): - """Add zero padding to the sample. The pad size is given in number of samples. - If symmetric=True, `pad_size` will be added to both sides. If false, `pad_size` - zeros will be added only to the end. - """ - self._samples = np.pad(self._samples, - (pad_size if symmetric else 0, pad_size), - mode='constant') - - def subsegment(self, start_time=None, end_time=None): - """Cut the AudioSegment between given boundaries. - Note that this is an in-place transformation. - :param start_time: Beginning of subsegment in seconds. - :type start_time: float - :param end_time: End of subsegment in seconds. - :type end_time: float - :raise ValueError: If start_time or end_time is incorrectly set, e.g. out - of bounds in time. - """ - start_time = 0.0 if start_time is None else start_time - end_time = self.duration if end_time is None else end_time - if start_time < 0.0: - start_time = self.duration + start_time - if end_time < 0.0: - end_time = self.duration + end_time - if start_time < 0.0: - raise ValueError("The slice start position (%f s) is out of " - "bounds." % start_time) - if end_time < 0.0: - raise ValueError("The slice end position (%f s) is out of bounds." % - end_time) - if start_time > end_time: - raise ValueError("The slice start position (%f s) is later than " - "the end position (%f s)." % (start_time, end_time)) - if end_time > self.duration: - raise ValueError("The slice end position (%f s) is out of bounds " - "(> %f s)" % (end_time, self.duration)) - start_sample = int(round(start_time * self._sample_rate)) - end_sample = int(round(end_time * self._sample_rate)) - self._samples = self._samples[start_sample:end_sample] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/LICENSE deleted file mode 100644 index 4ad4ed1d5e3..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2017 Keith Ito - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/__init__.py deleted file mode 100644 index 61936879a95..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2017 Keith Ito -""" from https://github.com/keithito/tacotron """ -from . import cleaners - - -def _clean_text(text, cleaner_names, *args): - for name in cleaner_names: - cleaner = getattr(cleaners, name) - if not cleaner: - raise Exception('Unknown cleaner: %s' % name) - text = cleaner(text, *args) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/cleaners.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/cleaners.py deleted file mode 100644 index e1e52af5f37..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/cleaners.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" from https://github.com/keithito/tacotron -Modified to add puncturation removal -""" - -''' -Cleaners are transformations that run over the input text at both training and eval time. - -Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" -hyperparameter. Some cleaners are English-specific. You'll typically want to use: - 1. "english_cleaners" for English text - 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using - the Unidecode library (https://pypi.python.org/pypi/Unidecode) - 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update - the symbols in symbols.py to match your data). - -''' - - -# Regular expression matching whitespace: -import re -from text_unidecode import unidecode -from .numbers import normalize_numbers -_whitespace_re = re.compile(r'\s+') - -# List of (regular expression, replacement) pairs for abbreviations: -_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [ - ('mrs', 'misess'), - ('mr', 'mister'), - ('dr', 'doctor'), - ('st', 'saint'), - ('co', 'company'), - ('jr', 'junior'), - ('maj', 'major'), - ('gen', 'general'), - ('drs', 'doctors'), - ('rev', 'reverend'), - ('lt', 'lieutenant'), - ('hon', 'honorable'), - ('sgt', 'sergeant'), - ('capt', 'captain'), - ('esq', 'esquire'), - ('ltd', 'limited'), - ('col', 'colonel'), - ('ft', 'fort'), -]] - - -def expand_abbreviations(text): - for regex, replacement in _abbreviations: - text = re.sub(regex, replacement, text) - return text - - -def expand_numbers(text): - return normalize_numbers(text) - - -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, ' ', text) - - -def convert_to_ascii(text): - return unidecode(text) - - -def remove_punctuation(text, table): - text = text.translate(table) - text = re.sub(r'&', " and ", text) - text = re.sub(r'\+', " plus ", text) - return text - - -def basic_cleaners(text): - '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def transliteration_cleaners(text): - '''Pipeline for non-English text that transliterates to ASCII.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def english_cleaners(text, table=None): - '''Pipeline for English text, including number and abbreviation expansion.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = expand_numbers(text) - text = expand_abbreviations(text) - if table is not None: - text = remove_punctuation(text, table) - text = collapse_whitespace(text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/numbers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/numbers.py deleted file mode 100644 index 3d2f77121c8..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/parts/text/numbers.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" from https://github.com/keithito/tacotron -Modifed to add support for time and slight tweaks to _expand_number -""" - -import inflect -import re - - -_inflect = inflect.engine() -_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') -_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') -_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') -_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') -_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') -_number_re = re.compile(r'[0-9]+') -_time_re = re.compile(r'([0-9]{1,2}):([0-9]{2})') - - -def _remove_commas(m): - return m.group(1).replace(',', '') - - -def _expand_decimal_point(m): - return m.group(1).replace('.', ' point ') - - -def _expand_dollars(m): - match = m.group(1) - parts = match.split('.') - if len(parts) > 2: - return match + ' dollars' # Unexpected format - dollars = int(parts[0]) if parts[0] else 0 - cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 - if dollars and cents: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) - elif dollars: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - return '%s %s' % (dollars, dollar_unit) - elif cents: - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s' % (cents, cent_unit) - else: - return 'zero dollars' - - -def _expand_ordinal(m): - return _inflect.number_to_words(m.group(0)) - - -def _expand_number(m): - if int(m.group(0)[0]) == 0: - return _inflect.number_to_words(m.group(0), andword='', group=1) - num = int(m.group(0)) - if num > 1000 and num < 3000: - if num == 2000: - return 'two thousand' - elif num > 2000 and num < 2010: - return 'two thousand ' + _inflect.number_to_words(num % 100) - elif num % 100 == 0: - return _inflect.number_to_words(num // 100) + ' hundred' - else: - return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') - # Add check for number phones and other large numbers - elif num > 1000000000 and num % 10000 != 0: - return _inflect.number_to_words(num, andword='', group=1) - else: - return _inflect.number_to_words(num, andword='') - - -def _expand_time(m): - mins = int(m.group(2)) - if mins == 0: - return _inflect.number_to_words(m.group(1)) - return " ".join([_inflect.number_to_words(m.group(1)), _inflect.number_to_words(m.group(2))]) - - -def normalize_numbers(text): - text = re.sub(_comma_number_re, _remove_commas, text) - text = re.sub(_pounds_re, r'\1 pounds', text) - text = re.sub(_dollars_re, _expand_dollars, text) - text = re.sub(_decimal_number_re, _expand_decimal_point, text) - text = re.sub(_ordinal_re, _expand_ordinal, text) - text = re.sub(_number_re, _expand_number, text) - text = re.sub(_time_re, _expand_time, text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/preprocessing.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/preprocessing.py deleted file mode 100644 index 581885466b0..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/preprocessing.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn - -from helpers import Optimization -from parts.features import FeatureFactory - - -class AudioPreprocessing(nn.Module): - """GPU accelerated audio preprocessing - """ - - def __init__(self, **kwargs): - nn.Module.__init__(self) # For PyTorch API - self.optim_level = kwargs.get( - 'optimization_level', Optimization.nothing) - self.featurizer = FeatureFactory.from_config(kwargs) - - def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: - input_signal, length = x - length.requires_grad_(False) - processed_signal = self.featurizer(x) - processed_length = self.featurizer.get_seq_len(length) - return processed_signal, processed_length diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/rnn.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/rnn.py deleted file mode 100644 index 9bbea9c0a67..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/rnn.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - -from typing import Optional, Tuple - - -def rnn(rnn, input_size, hidden_size, num_layers, norm=None, - forget_gate_bias=1.0, dropout=0.0, **kwargs): - """TODO""" - if rnn != "lstm": - raise ValueError(f"Unknown rnn={rnn}") - if norm not in [None]: - raise ValueError(f"unknown norm={norm}") - - if rnn == "lstm": - return LstmDrop( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - forget_gate_bias=forget_gate_bias, - **kwargs - ) - - -class LstmDrop(torch.nn.Module): - - def __init__(self, input_size, hidden_size, num_layers, dropout, forget_gate_bias, - **kwargs): - """Returns an LSTM with forget gate bias init to `forget_gate_bias`. - - Args: - input_size: See `torch.nn.LSTM`. - hidden_size: See `torch.nn.LSTM`. - num_layers: See `torch.nn.LSTM`. - dropout: See `torch.nn.LSTM`. - forget_gate_bias: For each layer and each direction, the total value of - to initialise the forget gate bias to. - - Returns: - A `torch.nn.LSTM`. - """ - super(LstmDrop, self).__init__() - - self.lstm = torch.nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - ) - if forget_gate_bias is not None: - for name, v in self.lstm.named_parameters(): - if "bias_ih" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(forget_gate_bias) - if "bias_hh" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(0) - - if dropout: - self.inplace_dropout = torch.nn.Dropout(dropout, inplace=True) - else: - self.inplace_droput = None - - def forward(self, x: torch.Tensor, - h: Optional[Tuple[torch.Tensor, torch.Tensor]] = None): - x, h = self.lstm(x, h) - - if self.inplace_dropout is not None: - self.inplace_dropout(x.data) - - return x, h - - -class StackTime(torch.nn.Module): - - __constants__ = ["factor"] - - def __init__(self, factor): - super().__init__() - self.factor = int(factor) - - def forward(self, x, x_lens): - # T, B, U - seq = [x] - for i in range(1, self.factor): - # This doesn't seem to make much sense... - tmp = torch.zeros_like(x) - tmp[:-i, :, :] = x[i:, :, :] - seq.append(tmp) - x_lens = torch.ceil(x_lens.float() / self.factor).int() - # Gross, this is horrible. What a waste of memory... - return torch.cat(seq, dim=2)[::self.factor, :, :], x_lens diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/build.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/build.sh deleted file mode 100644 index cfdc97c010e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/build.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -docker build . --rm -t jasper \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/evaluation.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/evaluation.sh deleted file mode 100644 index fcd472fd9aa..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/evaluation.sh +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -NUM_GPUS=${8:-1} -PRECISION=${9:-"fp32"} -NUM_STEPS=${10:-"-1"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_evaluation_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - - -CMD=" inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC " -CMD+=" $STEPS " - - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference.sh deleted file mode 100644 index 2d4474ce2b7..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference.sh +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -MODELOUTPUT_FILE=${12:-"none"} -PREDICTION_FILE=${13:-"$RESULT_DIR/${DATASET}.predictions"} - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE) - printf -v TAG "jasper_inference_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -PRED="" -if [ "$PREDICTION_FILE" = "none" ] ; then - PRED="" -else - PRED=" --save_prediction $PREDICTION_FILE" -fi - -OUTPUT="" -if [ "$MODELOUTPUT_FILE" = "none" ] ; then - OUTPUT=" " -else - OUTPUT=" --logits_save_to $MODELOUTPUT_FILE" -fi - - -if [ "$CUDNN_BENCHMARK" = "true" ]; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -CMD=" python inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PRED " -CMD+=" $OUTPUT " -CMD+=" $PREC " -CMD+=" $STEPS " - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x -echo "MODELOUTPUT_FILE: ${MODELOUTPUT_FILE}" -echo "PREDICTION_FILE: ${PREDICTION_FILE}" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference_benchmark.sh deleted file mode 100644 index 7aeea84c159..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/inference_benchmark.sh +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"true"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -MAX_DURATION=${10:-"36"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -CMD=" python inference_benchmark.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --seed=$SEED" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --ckpt=$CHECKPOINT" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to=-1" -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC" -CMD+=" $STEPS" - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE ) - printf -v TAG "jasper_inference_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" - grep 'latency' "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/preprocess_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/preprocess_librispeech.sh deleted file mode 100644 index 7cfe5cc6a57..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/preprocess_librispeech.sh +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/usr/bin/env bash - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-100 \ - --dest_dir /datasets/LibriSpeech/train-clean-100-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-100-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-360 \ - --dest_dir /datasets/LibriSpeech/train-clean-360-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-360-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-other-500 \ - --dest_dir /datasets/LibriSpeech/train-other-500-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-other-500-wav.json \ - --speed 0.9 1.1 - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-clean \ - --dest_dir /datasets/LibriSpeech/dev-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-other \ - --dest_dir /datasets/LibriSpeech/dev-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-other-wav.json - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-clean \ - --dest_dir /datasets/LibriSpeech/test-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-other \ - --dest_dir /datasets/LibriSpeech/test-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-other-wav.json diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train.sh deleted file mode 100644 index d59ce8ebeb2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train.sh +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/rnnt.toml"} -RESULT_DIR=${3:-"/results"} -CHECKPOINT=${4:-"none"} -CREATE_LOGFILE=${5:-"true"} -CUDNN_BENCHMARK=${6:-"true"} -NUM_GPUS=${7:-8} -PRECISION=${8:-"fp16"} -EPOCHS=${9:-100} -SEED=${10:-6} -BATCH_SIZE=${11:-8} -EVAL_BATCH_SIZE=${11:-2} -LEARNING_RATE=${12:-"0.001"} -LEARNING_RATE_WARMUP=${12:-"8000"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -LAUNCH_OPT=${LAUNCH_OPT:-"none"} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] && [ "$PRECISION" = "fp16" ]; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - - -if [ "$CHECKPOINT" = "none" ] ; then - CHECKPOINT="" -else - CHECKPOINT=" --ckpt=${CHECKPOINT}" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --eval_batch_size=$EVAL_BATCH_SIZE" -CMD+=" --num_epochs=$EPOCHS" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --lr_warmup=$LEARNING_RATE_WARMUP" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=adam" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100" -CMD+=" --eval_freq=1" -CMD+=" --train_freq=250" -CMD+=" --lr_decay" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS " -CMD+=" $CHECKPOINT" -CMD+=" $PREC" -CMD+=" $CUDNN" - - -if [ "${LAUNCH_OPT}" != "none" ]; then - CMD="python -m $LAUNCH_OPT $CMD" -elif [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m multiproc --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "rnnt_train_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE=$RESULT_DIR/$TAG.$DATESTAMP.log - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee $LOGFILE -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train_benchmark.sh deleted file mode 100644 index 7b5a33705ca..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/train_benchmark.sh +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${3:-"/results"} -CREATE_LOGFILE=${4:-"true"} -CUDNN_BENCHMARK=${5:-"true"} -NUM_GPUS=${6:-8} -PRECISION=${7:-"fp16"} -NUM_STEPS=${8:-"-1"} -MAX_DURATION=${9:-16.7} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -LEARNING_RATE=${12:-"0.015"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -PRINT_FREQUENCY=${14:-1} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC=" --fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -ne "-1" ] ; then - STEPS=" --num_steps=$NUM_STEPS" -elif [ "$NUM_STEPS" = "-1" ] ; then - STEPS="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --num_epochs=400" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=novograd" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100000" -CMD+=" --eval_freq=100000" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to_max" -CMD+=" --train_freq=$PRINT_FREQUENCY" -CMD+=" --lr_decay" -CMD+=" $CUDNN" -CMD+=" $PREC" -CMD+=" $STEPS" - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_train_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" - -fi - -if [ -z "$LOGFILE" ] ; then - - set -x - $CMD - set +x -else - - set -x - ( - $CMD - ) |& tee "$LOGFILE" - - set +x - - mean_latency=`cat "$LOGFILE" | grep 'Step time' | awk '{print $3}' | tail -n +2 | egrep -o '[0-9.]+'| awk 'BEGIN {total=0} {total+=$1} END {printf("%.2f\n",total/NR)}'` - mean_throughput=`python -c "print($BATCH_SIZE*$NUM_GPUS/${mean_latency})"` - training_wer_per_pgu=`cat "$LOGFILE" | grep 'training_batch_WER'| awk '{print $2}' | tail -n 1 | egrep -o '[0-9.]+'` - training_loss_per_pgu=`cat "$LOGFILE" | grep 'Loss@Step'| awk '{print $4}' | tail -n 1 | egrep -o '[0-9.]+'` - final_eval_wer=`cat "$LOGFILE" | grep 'Evaluation WER'| tail -n 1 | egrep -o '[0-9.]+'` - final_eval_loss=`cat "$LOGFILE" | grep 'Evaluation Loss'| tail -n 1 | egrep -o '[0-9.]+'` - - echo "max duration: $MAX_DURATION s" | tee -a "$LOGFILE" - echo "mean_latency: $mean_latency s" | tee -a "$LOGFILE" - echo "mean_throughput: $mean_throughput sequences/s" | tee -a "$LOGFILE" - echo "training_wer_per_pgu: $training_wer_per_pgu" | tee -a "$LOGFILE" - echo "training_loss_per_pgu: $training_loss_per_pgu" | tee -a "$LOGFILE" - echo "final_eval_loss: $final_eval_loss" | tee -a "$LOGFILE" - echo "final_eval_wer: $final_eval_wer" | tee -a "$LOGFILE" -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/convert_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/convert_librispeech.py deleted file mode 100644 index e90076cb09d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/convert_librispeech.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import glob -import multiprocessing -import json - -import pandas as pd - -from preprocessing_utils import parallel_preprocess - -parser = argparse.ArgumentParser(description='Preprocess LibriSpeech.') -parser.add_argument('--input_dir', type=str, required=True, - help='LibriSpeech collection input dir') -parser.add_argument('--dest_dir', type=str, required=True, - help='Output dir') -parser.add_argument('--output_json', type=str, default='./', - help='name of the output json file.') -parser.add_argument('-s', '--speed', type=float, nargs='*', - help='Speed perturbation ratio') -parser.add_argument('--target_sr', type=int, default=None, - help='Target sample rate. ' - 'defaults to the input sample rate') -parser.add_argument('--overwrite', action='store_true', - help='Overwrite file if exists') -parser.add_argument('--parallel', type=int, default=multiprocessing.cpu_count(), - help='Number of threads to use when processing audio files') -args = parser.parse_args() - -args.input_dir = args.input_dir.rstrip('/') -args.dest_dir = args.dest_dir.rstrip('/') - - -def build_input_arr(input_dir): - txt_files = glob.glob(os.path.join(input_dir, '**', '*.trans.txt'), - recursive=True) - input_data = [] - for txt_file in txt_files: - rel_path = os.path.relpath(txt_file, input_dir) - with open(txt_file) as fp: - for line in fp: - fname, _, transcript = line.partition(' ') - input_data.append(dict(input_relpath=os.path.dirname(rel_path), - input_fname=fname + '.flac', - transcript=transcript)) - return input_data - - -print("[%s] Scaning input dir..." % args.output_json) -dataset = build_input_arr(input_dir=args.input_dir) - -print("[%s] Converting audio files..." % args.output_json) -dataset = parallel_preprocess(dataset=dataset, - input_dir=args.input_dir, - dest_dir=args.dest_dir, - target_sr=args.target_sr, - speed=args.speed, - overwrite=args.overwrite, - parallel=args.parallel) - -print("[%s] Generating json..." % args.output_json) -df = pd.DataFrame(dataset, dtype=object) - -# Save json with python. df.to_json() produces back slashed in file paths -dataset = df.to_dict(orient='records') -with open(args.output_json, 'w') as fp: - json.dump(dataset, fp, indent=2) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_librispeech.py deleted file mode 100644 index f7e5eda1309..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_librispeech.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import argparse -import pandas as pd - -from download_utils import download_file, md5_checksum, extract - -parser = argparse.ArgumentParser( - description='Download, verify and extract dataset files') -parser.add_argument('csv', type=str, - help='CSV file with urls and checksums to download.') -parser.add_argument('dest', type=str, - help='Download destnation folder.') -parser.add_argument('-e', type=str, default=None, - help='Extraction destnation folder. Defaults to download folder if not provided') -parser.add_argument('--skip_download', action='store_true', - help='Skip downloading the files') -parser.add_argument('--skip_checksum', action='store_true', - help='Skip checksum') -parser.add_argument('--skip_extract', action='store_true', - help='Skip extracting files') -args = parser.parse_args() -args.e = args.e or args.dest - - -df = pd.read_csv(args.csv, delimiter=',') - - -if not args.skip_download: - for url in df.url: - fname = url.split('/')[-1] - print("Downloading %s:" % fname) - download_file(url=url, dest_folder=args.dest, fname=fname) -else: - print("Skipping file download") - - -if not args.skip_checksum: - for index, row in df.iterrows(): - url = row['url'] - md5 = row['md5'] - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Verifing %s: " % fname, end='') - ret = md5_checksum(fpath=fpath, target_hash=md5) - if not ret: - raise ValueError(f"Checksum for {fname} failed!") - else: - print(f"Checksum correct for {fname}") -else: - print("Skipping checksum") - - -if not args.skip_extract: - for url in df.url: - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Decompressing %s:" % fpath) - extract(fpath=fpath, dest_folder=args.e) -else: - print("Skipping file extraction") diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_utils.py deleted file mode 100644 index bda4193fbb0..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/download_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import hashlib -import requests -import os -import tarfile -import tqdm - - -def download_file(url, dest_folder, fname, overwrite=False): - fpath = os.path.join(dest_folder, fname) - if os.path.isfile(fpath): - if overwrite: - print("Overwriting existing file") - else: - print("File exists, skipping download.") - return - - tmp_fpath = fpath + '.tmp' - - r = requests.get(url, stream=True) - file_size = int(r.headers['Content-Length']) - chunk_size = 1024 * 1024 # 1MB - total_chunks = int(file_size / chunk_size) - - with open(tmp_fpath, 'wb') as fp: - content_iterator = r.iter_content(chunk_size=chunk_size) - chunks = tqdm.tqdm(content_iterator, total=total_chunks, - unit='MB', desc=fpath, leave=True) - for chunk in chunks: - fp.write(chunk) - - os.rename(tmp_fpath, fpath) - - -def md5_checksum(fpath, target_hash): - file_hash = hashlib.md5() - with open(fpath, "rb") as fp: - for chunk in iter(lambda: fp.read(1024 * 1024), b""): - file_hash.update(chunk) - return file_hash.hexdigest() == target_hash - - -def extract(fpath, dest_folder): - if fpath.endswith('.tar.gz'): - mode = 'r:gz' - elif fpath.endswith('.tar'): - mode = 'r:' - else: - raise IOError('fpath has unknown extention: %s' % fpath) - - with tarfile.open(fpath, mode) as tar: - members = tar.getmembers() - for member in tqdm.tqdm(iterable=members, total=len(members), leave=True): - tar.extract(path=dest_folder, member=member) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/inference_librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/inference_librispeech.csv deleted file mode 100644 index 40dac4e0e61..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/inference_librispeech.csv +++ /dev/null @@ -1,5 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 -http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931 -http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9 -http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech-inference.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech-inference.csv deleted file mode 100644 index b5e43b222e6..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech-inference.csv +++ /dev/null @@ -1,2 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech.csv deleted file mode 100644 index d48a9f8db72..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/librispeech.csv +++ /dev/null @@ -1,8 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 -http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931 -http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9 -http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135 -http://www.openslr.org/resources/12/train-clean-100.tar.gz,2a93770f6d5c6c964bc36631d331a522 -http://www.openslr.org/resources/12/train-clean-360.tar.gz,c0e676e450a7ff2f54aeade5171606fa -http://www.openslr.org/resources/12/train-other-500.tar.gz,d1a0fd59409feb2c614ce4d30c387708 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/preprocessing_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/preprocessing_utils.py deleted file mode 100644 index 260e860b80d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/utils/preprocessing_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import multiprocessing -import functools - -import sox - - -from tqdm import tqdm - - -def preprocess(data, input_dir, dest_dir, target_sr=None, speed=None, - overwrite=True): - speed = speed or [] - speed.append(1) - speed = list(set(speed)) # Make uniqe - - input_fname = os.path.join(input_dir, - data['input_relpath'], - data['input_fname']) - input_sr = sox.file_info.sample_rate(input_fname) - target_sr = target_sr or input_sr - - os.makedirs(os.path.join(dest_dir, data['input_relpath']), exist_ok=True) - - output_dict = {} - output_dict['transcript'] = data['transcript'].lower().strip() - output_dict['files'] = [] - - fname = os.path.splitext(data['input_fname'])[0] - for s in speed: - output_fname = fname + \ - '{}.wav'.format('' if s == 1 else '-{}'.format(s)) - output_fpath = os.path.join(dest_dir, - data['input_relpath'], - output_fname) - - if not os.path.exists(output_fpath) or overwrite: - cbn = sox.Transformer().speed(factor=s).convert(target_sr) - cbn.build(input_fname, output_fpath) - - file_info = sox.file_info.info(output_fpath) - file_info['fname'] = os.path.join(os.path.basename(dest_dir), - data['input_relpath'], - output_fname) - file_info['speed'] = s - output_dict['files'].append(file_info) - - if s == 1: - file_info = sox.file_info.info(output_fpath) - output_dict['original_duration'] = file_info['duration'] - output_dict['original_num_samples'] = file_info['num_samples'] - - return output_dict - - -def parallel_preprocess(dataset, input_dir, dest_dir, target_sr, speed, overwrite, parallel): - with multiprocessing.Pool(parallel) as p: - func = functools.partial(preprocess, - input_dir=input_dir, dest_dir=dest_dir, - target_sr=target_sr, speed=speed, overwrite=overwrite) - dataset = list(tqdm(p.imap(func, dataset), total=len(dataset))) - return dataset diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch_SUT.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch_SUT.py deleted file mode 100644 index c0324571d43..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch_SUT.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2020, Cerebras Systems, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -import array -import torch -import numpy as np -import toml -import mlperf_loadgen as lg -from tqdm import tqdm - -from QSL import AudioQSL, AudioQSLInMemory -from decoders import ScriptGreedyDecoder -from helpers import add_blank_label -from preprocessing import AudioPreprocessing -from model_separable_rnnt import RNNT - - -def load_and_migrate_checkpoint(ckpt_path): - checkpoint = torch.load(ckpt_path, map_location="cpu") - migrated_state_dict = {} - for key, value in checkpoint['state_dict'].items(): - key = key.replace("joint_net", "joint.net") - migrated_state_dict[key] = value - del migrated_state_dict["audio_preprocessor.featurizer.fb"] - del migrated_state_dict["audio_preprocessor.featurizer.window"] - return migrated_state_dict - - -class PytorchSUT: - def __init__(self, config_toml, checkpoint_path, dataset_dir, - manifest_filepath, perf_count): - config = toml.load(config_toml) - - dataset_vocab = config['labels']['labels'] - rnnt_vocab = add_blank_label(dataset_vocab) - featurizer_config = config['input_eval'] - - self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, - self.process_latencies) - self.qsl = AudioQSLInMemory(dataset_dir, - manifest_filepath, - dataset_vocab, - featurizer_config["sample_rate"], - perf_count) - self.audio_preprocessor = AudioPreprocessing(**featurizer_config) - self.audio_preprocessor.eval() - self.audio_preprocessor = torch.jit.script(self.audio_preprocessor) - self.audio_preprocessor = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(self.audio_preprocessor._c)) - - model = RNNT( - feature_config=featurizer_config, - rnnt=config['rnnt'], - num_classes=len(rnnt_vocab) - ) - model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path), - strict=True) - model.eval() - - # model.encoder = torch.jit.script(model.encoder) - # model.encoder = torch.jit._recursive.wrap_cpp_module( - # torch._C._freeze_module(model.encoder._c)) - # model.prediction = torch.jit.script(model.prediction) - # model.prediction = torch.jit._recursive.wrap_cpp_module( - # torch._C._freeze_module(model.prediction._c)) - # model.joint = torch.jit.script(model.joint) - # model.joint = torch.jit._recursive.wrap_cpp_module( - # torch._C._freeze_module(model.joint._c)) - # model = torch.jit.script(model) - - self.greedy_decoder = ScriptGreedyDecoder(len(rnnt_vocab) - 1, model) - - def issue_queries(self, query_samples): - for query_sample in query_samples: - waveform = self.qsl[query_sample.index] - assert waveform.ndim == 1 - waveform_length = np.array(waveform.shape[0], dtype=np.int64) - waveform = np.expand_dims(waveform, 0) - waveform_length = np.expand_dims(waveform_length, 0) - with torch.no_grad(): - waveform = torch.from_numpy(waveform) - waveform_length = torch.from_numpy(waveform_length) - feature, feature_length = self.audio_preprocessor.forward((waveform, waveform_length)) - assert feature.ndim == 3 - assert feature_length.ndim == 1 - feature = feature.permute(2, 0, 1) - - _, _, transcript = self.greedy_decoder.forward(feature, feature_length) - - assert len(transcript) == 1 - response_array = array.array('q', transcript[0]) - bi = response_array.buffer_info() - response = lg.QuerySampleResponse(query_sample.id, bi[0], - bi[1] * response_array.itemsize) - lg.QuerySamplesComplete([response]) - - def flush_queries(self): - pass - - def process_latencies(self, latencies_ns): - print("Average latency (ms) per query:") - print(np.mean(latencies_ns)/1000000.0) - print("Median latency (ms): ") - print(np.percentile(latencies_ns, 50)/1000000.0) - print("90 percentile latency (ms): ") - print(np.percentile(latencies_ns, 90)/1000000.0) - - def __del__(self): - lg.DestroySUT(self.sut) - print("Finished destroying SUT.") diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/requirements.txt b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/requirements.txt deleted file mode 100644 index b46a7ac3d18..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -neural-compressor -sox -absl-py -toml -text-unidecode -inflect -librosa==0.8.1 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.py deleted file mode 100644 index 7088a8d0fab..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2020 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import argparse -import mlperf_loadgen as lg -import subprocess - -import os -from pathlib import Path - -MLPERF_CONF = Path(os.path.dirname(os.path.realpath(__file__))) / "./mlperf.conf" -MLPERF_CONF = MLPERF_CONF.resolve() - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--backend", choices=["pytorch"], default="pytorch", help="Backend") - parser.add_argument("--scenario", choices=["SingleStream", "Offline", "Server"], default="Offline", help="Scenario") - parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") - parser.add_argument("--mlperf_conf", default=str(MLPERF_CONF), help="mlperf rules config") - parser.add_argument("--user_conf", default="user.conf", help="user config for user LoadGen settings such as target QPS") - parser.add_argument("--pytorch_config_toml", default="pytorch/configs/rnnt.toml") - parser.add_argument("--pytorch_checkpoint", default="pytorch/work_dir/rnnt.pt") - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--perf_count", type=int, default=None) - parser.add_argument("--log_dir", required=True) - args = parser.parse_args() - return args - - -scenario_map = { - "SingleStream": lg.TestScenario.SingleStream, - "Offline": lg.TestScenario.Offline, - "Server": lg.TestScenario.Server, -} - - -def main(): - args = get_args() - - if args.backend == "pytorch": - from pytorch_SUT import PytorchSUT - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count) - else: - raise ValueError("Unknown backend: {:}".format(args.backend)) - - settings = lg.TestSettings() - settings.scenario = scenario_map[args.scenario] - settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) - settings.FromConfig(args.user_conf, "rnnt", args.scenario) - - if args.accuracy: - settings.mode = lg.TestMode.AccuracyOnly - else: - settings.mode = lg.TestMode.PerformanceOnly - - log_path = args.log_dir - os.makedirs(log_path, exist_ok=True) - log_output_settings = lg.LogOutputSettings() - log_output_settings.outdir = log_path - log_output_settings.copy_summary_to_stdout = True - log_settings = lg.LogSettings() - log_settings.log_output = log_output_settings - - print("Running Loadgen test...") - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - - if args.accuracy: - cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" - print(f"Running accuracy script: {cmd}") - subprocess.check_call(cmd, shell=True) - - print("Done!") - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.sh deleted file mode 100644 index 7538df99bdb..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run.sh +++ /dev/null @@ -1,90 +0,0 @@ -#/bin/bash - -set -euo pipefail - -work_dir=/export/b07/ws15dgalvez/mlperf-rnnt-librispeech -local_data_dir=$work_dir/local_data -librispeech_download_dir=$local_data_dir/LibriSpeech -stage=3 - -mkdir -p $work_dir $local_data_dir $librispeech_download_dir - -install_dir=third_party/install -mkdir -p $install_dir -install_dir=$(readlink -f $install_dir) - -set +u -source "$($CONDA_EXE info --base)/etc/profile.d/conda.sh" -set -u - -# stage -1: install dependencies -if [[ $stage -le -1 ]]; then - conda env create --force -v --file environment.yml - - set +u - source "$(conda info --base)/etc/profile.d/conda.sh" - conda activate mlperf-rnnt - set -u - - # We need to convert .flac files to .wav files via sox. Not all sox installs have flac support, so we install from source. - wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz - (cd third_party; tar xf flac-1.3.2.tar.xz; cd flac-1.3.2; ./configure --prefix=$install_dir && make && make install) - - wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz - (cd third_party; tar zxf sox-14.4.2.tar.gz; cd sox-14.4.2; LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install) - - (cd $(git rev-parse --show-toplevel)/loadgen; python setup.py install) -fi - -export PATH="$install_dir/bin/:$PATH" - -set +u -conda activate mlperf-rnnt -set -u - -# stage 0: download model. Check checksum to skip? -if [[ $stage -le 0 ]]; then - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt -fi - -# stage 1: download data. This will hae a non-zero exit code if the -# checksum is incorrect. -if [[ $stage -le 1 ]]; then - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $librispeech_download_dir \ - -e $local_data_dir -fi - -if [[ $stage -le 2 ]]; then - python pytorch/utils/convert_librispeech.py \ - --input_dir $librispeech_download_dir/dev-clean \ - --dest_dir $local_data_dir/dev-clean-wav \ - --output_json $local_data_dir/dev-clean-wav.json -fi - -if [[ $stage -le 3 ]]; then - for backend in pytorch; do - for accuracy in "--accuracy" ""; do - for scenario in SingleStream Offline Server; do - log_dir=${work_dir}/${scenario}_${backend} - if [ ! -z ${accuracy} ]; then - log_dir+=_accuracy - fi - log_dir+=rerun - - python run.py --backend pytorch \ - --dataset_dir $local_data_dir \ - --manifest $local_data_dir/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $work_dir/rnnt.pt \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir ${log_dir} \ - ${accuracy} & - - done - done - done - wait -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tune.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tune.py deleted file mode 100644 index 3041c97b75b..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tune.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2020 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import argparse -import mlperf_loadgen as lg -import subprocess - -import time -import os -from pathlib import Path -import re - -MLPERF_CONF = Path(os.path.dirname(os.path.realpath(__file__))) / "./mlperf.conf" -MLPERF_CONF = MLPERF_CONF.resolve() - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--tune', dest='tune', action='store_true', - help='tune best int8 model on calibration dataset') - parser.add_argument("--backend", choices=["pytorch"], default="pytorch", help="Backend") - parser.add_argument("--scenario", choices=["SingleStream", "Offline", "Server"], - default="Offline", help="Scenario") - parser.add_argument("--mlperf_conf", default=str(MLPERF_CONF), help="mlperf rules config") - parser.add_argument("--user_conf", default="user.conf", - help="user config for user LoadGen settings such as target QPS") - parser.add_argument("--pytorch_config_toml", default="pytorch/configs/rnnt.toml") - parser.add_argument("--pytorch_checkpoint", default="pytorch/work_dir/rnnt.pt") - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--perf_count", type=int, default=None) - parser.add_argument("--log_dir", default='./saved_log') - parser.add_argument('--benchmark', dest='benchmark', action='store_true', - help='run benchmark') - parser.add_argument("--accuracy_only", dest='accuracy_only', action='store_true', - help='For accuracy measurement only.') - parser.add_argument('--int8', dest='int8', action='store_true', help='run benchmark') - parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH', - help='path to checkpoint tuned by Neural Compressor (default: ./)') - args = parser.parse_args() - return args - - -scenario_map = { - "SingleStream": lg.TestScenario.SingleStream, - "Offline": lg.TestScenario.Offline, - "Server": lg.TestScenario.Server, -} - - -def main(): - args = get_args() - - if args.backend == "pytorch": - from pytorch_SUT import PytorchSUT - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count) - model = sut.greedy_decoder._model - else: - raise ValueError("Unknown backend: {:}".format(args.backend)) - - settings = lg.TestSettings() - settings.scenario = scenario_map[args.scenario] - settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) - settings.FromConfig(args.user_conf, "rnnt", args.scenario) - - if args.benchmark: - settings.mode = lg.TestMode.PerformanceOnly - else: - settings.mode = lg.TestMode.AccuracyOnly - - log_path = args.log_dir - os.makedirs(log_path, exist_ok=True) - log_output_settings = lg.LogOutputSettings() - log_output_settings.outdir = log_path - log_output_settings.copy_summary_to_stdout = True - log_settings = lg.LogSettings() - log_settings.log_output = log_output_settings - - pattern = ['accuracy=\d+.\d+', 'samples_per_query : \d+', 'Mean latency.*'] - - def eval_func(model): - print("Running Loadgen test...") - sut.greedy_decoder._model = model - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - cmd = f"python3 accuracy_eval.py --log_dir {log_path} \ - --dataset_dir {args.dataset_dir} --manifest {args.manifest}" - out = subprocess.check_output(cmd, shell=True) - out = out.decode() - regex_accu = re.compile(pattern[0]) - accu = float(regex_accu.findall(out)[0].split('=')[1]) - print('Accuracy: %.3f ' % (accu)) - return accu - - def benchmark(model): - print("Running Loadgen test...") - sut.greedy_decoder._model = model - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - file_path = os.path.join(log_path, 'mlperf_log_summary.txt') - f = open(file_path, 'r', encoding='UTF-8') - file_content = f.read() - f.close() - regex_batch = re.compile(pattern[1]) - regex_late = re.compile(pattern[2]) - samples_per_query = int(regex_batch.findall(file_content)[0].split(': ')[1]) - latency_per_sample = int(regex_late.findall(file_content)[0].split(': ')[1]) - print('Batch size = %d' % samples_per_query) - print('Latency: %.3f ms' % (latency_per_sample / 10**6)) - print('Throughput: %.3f samples/sec' % (10**9/latency_per_sample)) - - if args.tune: - # Dynamic Quantization with Neural Compressor - from neural_compressor.experimental import Quantization, common - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(model) - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save(args.tuned_checkpoint) - - elif args.int8: - from neural_compressor.utils.pytorch import load - int8_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) - if args.accuracy_only: - eval_func(int8_model) - elif args.benchmark: - benchmark(int8_model) - else: - if args.accuracy_only: - eval_func(model) - elif args.benchmark: - benchmark(model) - - - print("Done!", flush=True) - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tuning.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tuning.sh deleted file mode 100644 index 63f0b6d9231..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/run_tuning.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_tuning - -} - -# init params -function init_params { - tuned_checkpoint=saved_results - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - tuned_checkpoint=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# run_tuning -function run_tuning { - extra_cmd="" - if [ -n "$dataset_location" ];then - extra_cmd=$extra_cmd"--dataset_dir ${dataset_location} " - fi - if [ -n "$input_model" ];then - extra_cmd=$extra_cmd"--pytorch_checkpoint ${input_model} " - fi - if [ -n "$tuned_checkpoint" ];then - extra_cmd=$extra_cmd"--tuned_checkpoint ${tuned_checkpoint} " - fi - - python run_tune.py \ - --tune \ - --backend pytorch \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --scenario Offline \ - ${extra_cmd} -} - -main "$@" \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/user.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/user.conf deleted file mode 100644 index 545569c1ac6..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/user.conf +++ /dev/null @@ -1,6 +0,0 @@ -# Please set these fields depending on the performance of your system to -# override default LoadGen settings. -*.SingleStream.target_latency = 10 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 -*.MultiStream.samples_per_query = 4 \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/README.md b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md similarity index 95% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/README.md rename to examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md index 57e73641863..46c014871ef 100644 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/README.md +++ b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md @@ -11,7 +11,7 @@ This document describes the step-by-step instructions for reproducing torchaudio pip install neural-compressor ``` ```shell -cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager +cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx pip install -r requirements.txt ``` diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/requirements.txt b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt similarity index 63% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/requirements.txt rename to examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt index fd2b6919be8..ae6baa964f9 100644 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/requirements.txt +++ b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt @@ -1,4 +1,4 @@ --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.11.0+cpu -torchaudio==0.11.0+cpu +torch +torchaudio jiwer diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_asr.py b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py similarity index 76% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_asr.py rename to examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py index ae5bc558769..9c881918f97 100644 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_asr.py +++ b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py @@ -40,7 +40,7 @@ help='For accuracy measurement only.') parser.add_argument('--tuned_checkpoint', default='./saved_results', type=str, metavar='PATH', help='path to checkpoint tuned by Neural Compressor (default: ./)') -parser.add_argument('--iter', default=0, type=int, +parser.add_argument('--iters', default=100, type=int, help='For accuracy measurement only.') parser.add_argument('--warmup_iter', default=5, type=int, help='For benchmark measurement only.') @@ -119,30 +119,40 @@ def eval_func(model): text.append(wave[2][0]) prediction = [pre.replace("|", " ")for pre in predict] WER = wer(text, prediction) + print("Accuracy: %.5f" % (1-WER)) return 1-WER - from neural_compressor.experimental import Quantization, common - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(model) - quantizer.eval_func = eval_func - q_model = quantizer.fit() + from neural_compressor import PostTrainingQuantConfig, quantization +<<<<<<< HEAD:examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py + conf = PostTrainingQuantConfig(approach="static") + q_model = quantization.fit(model, + conf=conf, + eval_func=eval_func, + calib_dataloader=val_dataloader +======= + conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch") + q_model = quantization.fit(model, + conf=conf, + eval_func=eval_func +>>>>>>> fb5560e5e1df2aefe2c6e9154e7c452bf1aa1de6:examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_asr.py + ) q_model.save(args.tuned_checkpoint) exit(0) #benchmark - if args.benchmark or args.accuracy_only: - if args.int8: - from neural_compressor.utils.pytorch import load - new_model = load( - os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) - else: - new_model = model - def eval_func(model): + if args.int8: + from neural_compressor.utils.pytorch import load + model = load( + os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) + + if model_args.benchmark: + def b_func(model): predict = [] text = [] results = {} batch_time = AverageMeter('Time', ':6.3f') with torch.inference_mode(): for i, wave in enumerate(val_dataloader): + if i >= args.warmup_iter: start = time.time() emission, _ = model(wave[0][0]) @@ -152,7 +162,7 @@ def eval_func(model): batch_time.update(time.time() - start) predict.append(transcript) text.append(wave[2][0]) - if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1): + if args.iters > 0 and i >= (args.warmup_iter + args.iters - 1): break prediction = [pre.replace("|", " ")for pre in predict] WER = wer(text, prediction) @@ -164,6 +174,32 @@ def eval_func(model): print('Throughput: %.3f images/sec' % (args.batch_size / results['average_batch_time'])) print('Batch size = %d' % args.batch_size) return results['accuracy'] + + from neural_compressor.config import BenchmarkConfig + from neural_compressor import benchmark + b_conf = BenchmarkConfig(warmup=5, + iteration=args.iters, + cores_per_instance=4, + num_of_instance=1) + benchmark.fit(model, b_conf, b_func=b_func) + if args.accuracy_only: + eval_func(model) + + + + + + + + + + + + + + + + acc = eval_func(new_model) exit(0) diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_benchmark.sh b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh similarity index 85% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_benchmark.sh rename to examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh index 07295e02f39..071ad381055 100644 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_benchmark.sh +++ b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh @@ -57,7 +57,7 @@ function run_benchmark { if [[ ${mode} == "accuracy" ]]; then mode_cmd=" --accuracy_only" elif [[ ${mode} == "benchmark" ]]; then - mode_cmd=" --iter ${iters} --benchmark " + mode_cmd=" --iters ${iters} --benchmark " else echo "Error: No such mode: ${mode}" exit 1 @@ -68,10 +68,16 @@ function run_benchmark { extra_cmd=$extra_cmd" --int8" fi - if [ -n "$input_model"];then - input_model=$topology + if [[ "${topology}" == "hubert_fx" ]]; then + input_model=hubert + elif [[ "${topology}" == "wav2vec_fx" ]]; then + input_model=wav2vec + else + echo "Error: please set the correct topology." + exit 1 fi + python run_asr.py \ --model $input_model \ --root $root \ diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_tuning.sh b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_tuning.sh similarity index 84% rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_tuning.sh rename to examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_tuning.sh index 645c5ac5adb..5c009d253c4 100755 --- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/eager/run_tuning.sh +++ b/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_tuning.sh @@ -44,9 +44,16 @@ function init_params { # run_tuning function run_tuning { extra_cmd="" - if [ -n "$input_model"];then - input_model=$topology + if [[ "${topology}" == "hubert_fx" ]]; then + input_model=hubert + elif [[ "${topology}" == "wav2vec_fx" ]]; then + input_model=wav2vec + else + echo "Error: please set the correct topology." + exit 1 + fi + extra_cmd=$extra_cmd python run_asr.py \ --model $input_model \ diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md index 3a9bb41f9b3..415a155c4b2 100644 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md @@ -38,8 +38,8 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] Download the model from tensorflow-hub. image recognition -- [mobilenetv1](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v1_075_224/classification/5) -- [mobilenetv2](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v2_035_224/classification/5) +- [mobilenetv1(experiment)](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v1_075_224/classification/5) +- [mobilenetv2(experiment)](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v2_035_224/classification/5) - [efficientnet_v2_b0](https://hub.tensorflow.google.cn/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2) ## Write Yaml config file @@ -51,4 +51,4 @@ In examples directory, there are mobilenet_v1.yaml, mobilenet_v2.yaml and effici ``` ```shell bash run_benchmark.sh --config=./config.yaml --input_model=./SavedModel --mode=performance - ``` \ No newline at end of file + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md index 73510cee998..b12d9052f3e 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md @@ -12,9 +12,9 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` > Note: Supported Tensorflow [Version](../../../../../../../README.md). @@ -26,7 +26,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -43,12 +44,25 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a inception_v3.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The inception_v3_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command +#### Tune ```shell - bash run_tuning.sh --config=inception_v3.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=inception_v3.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` +#### Benchmark + ```shell + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=performance + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=accuracy + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3.yaml b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3.yaml deleted file mode 100644 index d7cc15e64dc..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: inception_v3 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - op_wise: { - 'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3_itex.yaml b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3_itex.yaml deleted file mode 100644 index 6156c1bcd5e..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/inception_v3_itex.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: inception_v3 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - op_wise: { - 'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py index a40d0c06aef..4112977edc6 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,7 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser from neural_compressor import data import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) @@ -42,87 +40,101 @@ flags.DEFINE_bool( 'benchmark', False, 'whether to benchmark the model') -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - flags.DEFINE_string( 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.transforms.imagenet_transform import LabelShift from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) + [BilinearImagenetTransform(height=299, width=299)])) if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=299, width=299)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": - tf.compat.v1.app.run() \ No newline at end of file + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py index ba2c276a2c1..fc6119d5c50 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse from tensorflow.keras.applications.inception_v3 import InceptionV3 def get_inception_v3_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_benchmark.sh index ca49af56795..43b1636c839 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_benchmark.sh @@ -10,20 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) ;; esac done @@ -35,10 +41,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/README.md b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/README.md index e1318d4638d..682d4de9bcd 100644 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/README.md +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/README.md @@ -1,10 +1,9 @@ Step-by-Step ============ -This document is used to list steps of reproducing TensorFlow keras Intel® Neural Compressor QAT conversion. +This document is used to apply QAT to Tensorflow Keras models using Intel® Neural Compressor. This example can run on Intel CPUs and GPUs. - ## Prerequisite ### 1. Installation @@ -12,45 +11,108 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow and TensorFlow Model Optimization +### 2. Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this QAT example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. ```shell -pip install intel-tensorflow==2.4.0 -pip install tensorflow_model_optimization==0.5.0 +pip install -r requirements.txt ``` -> Note: To generate correct qat model with tensorflow_model_optimization 0.5.0, pls use TensorFlow 2.4 or above. +> Note: Supported Tensorflow [Version](../../../../../../../README.md). -### 3. Install Intel Extension for Tensorflow +### 3. Benchmarking the model on Intel GPU (Optional) -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. +To run benchmark of the model on Intel GPUs, Intel Extension for Tensorflow for Intel GPUs is required. ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). ### 4. Prepare Pretrained model -Run the `train.py` script to get pretrained fp32 model. +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` -### 5. Prepare QAT model - -Run the `qat.py` script to get QAT model which in fact is a fp32 model with quant/dequant pair inserted. - -## Write Yaml config file -In examples directory, there is a mnist.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The mnist_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. ## Run Command ```shell - python convert.py # to convert QAT model to quantized model. - - python benchmark.py # to run accuracy benchmark. + bash run_tuning.sh --input_model=./path/to/model --output_model=./result + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --batch_size=32 ``` +Details of enabling Intel® Neural Compressor to apply QAT. +========================= + +This is a tutorial of how to to apply QAT with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model* to apply quantization, the dataset is automatically downloaded. In this step, QDQ patterns will be inserted to the keras model, but the fp32 model will not be converted to a int8 model. + +2. User specifies *model* with QDQ patterns inserted, evaluate function to run benchmark. The model we get from the previous step will be run on ITEX backend. Then, the model is going to be fused and inferred. + +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = QuantizationAwareTrainingConfig( + device="gpu", + backend="itex", + ... + ) +``` + +### Code update + +After prepare step is done, we add quantization and benchmark code to generate quantized model and benchmark. + +#### Tune +```python + logger.info('start quantizing the model...') + from neural_compressor import training, QuantizationAwareTrainingConfig + config = QuantizationAwareTrainingConfig() + # create a compression_manager instance to implement QAT + compression_manager = training.prepare_compression(FLAGS.input_model, config) + # QDQ patterns will be inserted to the input keras model + compression_manager.callbacks.on_train_begin() + # get the model with QDQ patterns inserted + q_aware_model = compression_manager.model.model + + # training code defined by users + q_aware_model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + q_aware_model.summary() + train_images_subset = train_images[0:1000] + train_labels_subset = train_labels[0:1000] + q_aware_model.fit(train_images_subset, train_labels_subset, + batch_size=500, epochs=1, validation_split=0.1) + _, q_aware_model_accuracy = q_aware_model.evaluate( + test_images, test_labels, verbose=0) + print('Quant test accuracy:', q_aware_model_accuracy) + + # apply some post process steps and save the output model + compression_manager.callbacks.on_train_end() + compression_manager.save(FLAGS.output_model) +``` +#### Benchmark +```python + from neural_compressor.benchmark import fit + from neural_compressor.experimental import common + from neural_compressor.config import BenchmarkConfig + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + + # convert the quantized keras model to graph_def so that it can be fused by ITEX + model = common.Model(FLAGS.input_model).graph_def + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_func=evaluate) + elif FLAGS.mode == 'accuracy': + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) +``` \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/benchmark.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/benchmark.py deleted file mode 100644 index df49ab3b075..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/benchmark.py +++ /dev/null @@ -1,28 +0,0 @@ -import tensorflow as tf -from tensorflow import keras -import numpy as np - -class dataloader(object): - def __init__(self, batch_size=100): - mnist = keras.datasets.mnist - (train_images, train_labels), (test_images, test_labels) = mnist.load_data() - - # Normalize the input image so that each pixel value is between 0 to 1. - self.train_images = train_images / 255.0 - self.test_images = test_images / 255.0 - self.train_labels = train_labels - self.test_labels = test_labels - - self.batch_size = batch_size - self.i = 0 - - def __iter__(self): - while self.i < len(self.test_images): - yield self.test_images[self.i: self.i + self.batch_size], self.test_labels[self.i: self.i + self.batch_size] - self.i = self.i + self.batch_size - -from neural_compressor.experimental import Benchmark, common -evaluator = Benchmark('mnist.yaml') -evaluator.model = common.Model('quantized_model') -evaluator.b_dataloader = dataloader() -evaluator('accuracy') diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/convert.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/convert.py deleted file mode 100644 index f1b8c7054b3..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/convert.py +++ /dev/null @@ -1,7 +0,0 @@ -from neural_compressor.experimental import ModelConversion, common -conversion = ModelConversion() -conversion.source = 'QAT' -conversion.destination = 'default' -conversion.model = common.Model('../qat/trained_qat_model') -q_model = conversion() -q_model.save('quantized_model') diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/main.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/main.py new file mode 100644 index 00000000000..0709d23794d --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/main.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import tensorflow as tf + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS +logger = logging.getLogger(__name__) + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size') + + +def prepare_data(): + """Load the dataset of MNIST. + + Returns: + train (tuple): The images and labels for training. + test (tuple): The images and labels for testing. + """ + # Load MNIST dataset + mnist = tf.keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + return (train_images, train_labels), (test_images, test_labels) + +(train_images, train_labels), (test_images, test_labels) = prepare_data() + +class dataloader(object): + def __init__(self, batch_size=100): + mnist = tf.keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + self.train_images = train_images / 255.0 + self.test_images = test_images / 255.0 + self.train_labels = train_labels + self.test_labels = test_labels + + self.batch_size = batch_size + self.i = 0 + + def __iter__(self): + while self.i < len(self.test_images): + yield self.test_images[self.i: self.i + self.batch_size], self.test_labels[self.i: self.i + self.batch_size] + self.i = self.i + self.batch_size + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.experimental import common + model = common.Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = 100 + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + assert len(input_tensor) == len(inputs), \ + 'inputs len must equal with input_tensor' + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / FLAGS.batch_size + return latency + + dataloader = dataloader(batch_size=FLAGS.batch_size) + latency = eval_func(dataloader) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(FLAGS.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + + +def main(): + if FLAGS.tune: + logger.info('start quantizing the model...') + from neural_compressor import training, QuantizationAwareTrainingConfig + config = QuantizationAwareTrainingConfig() + compression_manager = training.prepare_compression(FLAGS.input_model, config) + compression_manager.callbacks.on_train_begin() + + q_aware_model = compression_manager.model.model + + q_aware_model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + + q_aware_model.summary() + train_images_subset = train_images[0:1000] + train_labels_subset = train_labels[0:1000] + q_aware_model.fit(train_images_subset, train_labels_subset, + batch_size=500, epochs=1, validation_split=0.1) + _, q_aware_model_accuracy = q_aware_model.evaluate( + test_images, test_labels, verbose=0) + print('Quant test accuracy:', q_aware_model_accuracy) + + compression_manager.callbacks.on_train_end() + compression_manager.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.experimental import common + from neural_compressor.config import BenchmarkConfig + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + + model = common.Model(FLAGS.input_model).graph_def + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_func=evaluate) + elif FLAGS.mode == 'accuracy': + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + main() diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist.yaml b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist.yaml deleted file mode 100644 index 30c89e41ce6..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mnist - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - Accuracy: {} # built-in metrics are topk, map, f1, allow user to register new metric. - diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist_itex.yaml b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist_itex.yaml deleted file mode 100644 index 5681e991f3a..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/mnist_itex.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mnist - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - Accuracy: {} # built-in metrics are topk, map, f1, allow user to register new metric. - diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/prepare_model.py new file mode 100644 index 00000000000..907196a046d --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/prepare_model.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import tensorflow as tf +from tensorflow import keras + +def train_func(): + # Load MNIST dataset + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential([ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10) + ]) + + # Train the digit classification model + model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate( + test_images, test_labels, verbose=0) + + print('Baseline test accuracy:', baseline_model_accuracy) + + return model + +def get_mnist_model(saved_path): + assert saved_path is not None, "save path should not be None" + model = train_func() + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_mnist_model(args.output_model) \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/qat.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/qat.py deleted file mode 100644 index 655f70fc9dd..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/qat.py +++ /dev/null @@ -1,37 +0,0 @@ -import tensorflow as tf -from tensorflow import keras - -# Load MNIST dataset -mnist = keras.datasets.mnist -(train_images, train_labels), (test_images, test_labels) = mnist.load_data() - -# Normalize the input image so that each pixel value is between 0 to 1. -train_images = train_images / 255.0 -test_images = test_images / 255.0 - -model = tf.keras.models.load_model("baseline_model") - -import tensorflow_model_optimization as tfmot -quantize_model = tfmot.quantization.keras.quantize_model - -# q_aware stands for for quantization aware. -q_aware_model = quantize_model(model) - -# `quantize_model` requires a recompile. -q_aware_model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=['accuracy']) - -q_aware_model.summary() - -train_images_subset = train_images[0:1000] # out of 60000 -train_labels_subset = train_labels[0:1000] - -q_aware_model.fit(train_images_subset, train_labels_subset, - batch_size=500, epochs=1, validation_split=0.1) - -_, q_aware_model_accuracy = q_aware_model.evaluate( - test_images, test_labels, verbose=0) - -print('Quant test accuracy:', q_aware_model_accuracy) -q_aware_model.save("trained_qat_model") diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/requirements.txt b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/requirements.txt new file mode 100644 index 00000000000..c8cbd6d70a6 --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +intel-extension-for-tensorflow[cpu] \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_benchmark.sh new file mode 100644 index 00000000000..a50d81dcd9c --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_benchmark.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --batch_size ${batch_size} \ +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_tuning.sh new file mode 100644 index 00000000000..ad02bf6ea2f --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/run_tuning.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/train.py b/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/train.py deleted file mode 100644 index 5820b434628..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mnist/quantization/qat/train.py +++ /dev/null @@ -1,38 +0,0 @@ -import tensorflow as tf -from tensorflow import keras - -# Load MNIST dataset -mnist = keras.datasets.mnist -(train_images, train_labels), (test_images, test_labels) = mnist.load_data() - -# Normalize the input image so that each pixel value is between 0 to 1. -train_images = train_images / 255.0 -test_images = test_images / 255.0 - -# Define the model architecture. -model = keras.Sequential([ - keras.layers.InputLayer(input_shape=(28, 28)), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10) -]) - -# Train the digit classification model -model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=['accuracy']) - -model.fit( - train_images, - train_labels, - epochs=1, - validation_split=0.1, -) - -_, baseline_model_accuracy = model.evaluate( - test_images, test_labels, verbose=0) - -print('Baseline test accuracy:', baseline_model_accuracy) -model.save("baseline_model") diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/README.md index 967f760d42a..5f0170f2546 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/README.md @@ -12,9 +12,9 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` > Note: Supported Tensorflow [Version](../../../../../../../README.md). @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -41,12 +42,25 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a mobilenet_v2.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The mobilenet_v2_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command +#### Tune ```shell - bash run_tuning.sh --config=mobilenet_v2.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=mobilenet_v2.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` +#### Benchmark + ```shell + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=performance + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=accuracy + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py index 82993668022..51fdcab18c4 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,7 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser from neural_compressor import data import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) @@ -28,101 +26,128 @@ ## Required parameters flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') + 'input_model', None, 'Run inference with specified keras model.') flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') + 'output_model', None, 'The output quantized model.') flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') + 'mode', 'performance', 'define benchmark mode for accuracy or performance') flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') + 'tune', False, 'whether to tune the model') flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') + 'benchmark', False, 'whether to benchmark the model') flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') + 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') + 'eval_data', None, 'location of evaluate dataset') -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.transforms.imagenet_transform import LabelShift from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) + [BilinearImagenetTransform(height=224, width=224)])) if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=224, width=224)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + op_name_list={ + 'StatefulPartitionedCall/mobilenetv2_1.00_224/expanded_conv_depthwise/depthwise': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + }, + 'StatefulPartitionedCall/mobilenetv2_1.00_224/expanded_conv_project_BN/FusedBatchNormV3/Mul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50], + op_name_list=op_name_list) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": - tf.compat.v1.app.run() \ No newline at end of file + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2.yaml b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2.yaml deleted file mode 100644 index c96be536aa7..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: mobilenet_v2 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - - op_wise: { - 'MobilenetV2/expanded_conv/depthwise/depthwise': { - 'activation': {'dtype': ['fp32']}, - }, - 'MobilenetV2/Conv_1/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2_itex.yaml b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2_itex.yaml deleted file mode 100644 index 818a2944e30..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/mobilenet_v2_itex.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: mobilenet_v2 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - - op_wise: { - 'MobilenetV2/expanded_conv/depthwise/depthwise': { - 'activation': {'dtype': ['fp32']}, - }, - 'MobilenetV2/Conv_1/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py index be6dcd0d505..ae96dcb1859 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 def get_mobilenet_v2_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh index ca49af56795..43b1636c839 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh @@ -10,20 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) ;; esac done @@ -35,10 +41,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md index a36fbb36115..c8ea5bea846 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md @@ -12,9 +12,9 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` > Note: Supported Tensorflow [Version](../../../../../../../README.md). ### 3. Install Intel Extension for Tensorflow @@ -24,7 +24,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -41,12 +42,25 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a resnet101.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The resnet101_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command +#### Tune ```shell - bash run_tuning.sh --config=resnet101.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=resnet101.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` +#### Benchmark + ```shell + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=performance + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=accuracy + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py index 7904dda618d..6666d155806 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# +# import time -import shutil import numpy as np -from argparse import ArgumentParser from neural_compressor import data import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) @@ -28,101 +26,115 @@ ## Required parameters flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') + 'input_model', None, 'Run inference with specified keras model.') flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') + 'output_model', None, 'The output quantized model.') flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') + 'mode', 'performance', 'define benchmark mode for accuracy or performance') flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') + 'tune', False, 'whether to tune the model') flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') + 'benchmark', False, 'whether to benchmark the model') flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') + 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') + 'eval_data', None, 'location of evaluate dataset') -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.transforms.imagenet_transform import LabelShift from neural_compressor.data.transforms.imagenet_transform import TensorflowResizeCropImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) + [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": - tf.compat.v1.app.run() + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py index 13ec895f72c..552a0942157 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse import tensorflow as tf def get_resnet101_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101.yaml b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101.yaml deleted file mode 100644 index e97409b1b8f..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnet_v1_101 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101_itex.yaml b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101_itex.yaml deleted file mode 100644 index ff5447efe92..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/resnet101_itex.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnet_v1_101 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_benchmark.sh index ca49af56795..43b1636c839 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_benchmark.sh @@ -10,20 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) ;; esac done @@ -35,10 +41,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/README b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/README new file mode 100644 index 00000000000..e755742156d --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/README @@ -0,0 +1,119 @@ +Step-by-Step +============ + +This document is used to apply QAT to Tensorflow Keras models using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this QAT example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` +> Note: Supported Tensorflow [Version](../../../../../../../README.md). + +### 3. Benchmarking the model on Intel GPU (Optional) + +To run benchmark of the model on Intel GPUs, Intel Extension for Tensorflow for Intel GPUs is required. + +```shell +pip install --upgrade intel-extension-for-tensorflow[gpu] +``` + +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). + +### 4. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` + +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## Run Command + ```shell + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=100 + ``` + +Details of enabling Intel® Neural Compressor to apply QAT. +========================= + +This is a tutorial of how to to apply QAT with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model*, training dataset *dataset_location* to apply quantization. In this step, QDQ patterns will be inserted to the keras model, but the fp32 model will not be converted to a int8 model. + +2. User specifies *model* with QDQ patterns inserted, evaluate function to run benchmark. The model we get from the previous step will be run on ITEX backend. Then, the model is going to be fused and inferred. + +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = QuantizationAwareTrainingConfig( + device="gpu", + backend="itex", + ... + ) +``` + +### Code update + +After prepare step is done, we add quantization and benchmark code to generate quantized model and benchmark. + +#### Tune +```python + logger.info('start quantizing the model...') + from neural_compressor import training, QuantizationAwareTrainingConfig + config = QuantizationAwareTrainingConfig() + # create a compression_manager instance to implement QAT + compression_manager = training.prepare_compression(FLAGS.input_model, config) + # QDQ patterns will be inserted to the input keras model + compression_manager.callbacks.on_train_begin() + # get the model with QDQ patterns inserted + q_aware_model = compression_manager.model.model + + # training code defined by users + q_aware_model.compile( + optimizer='sgd', + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=["accuracy"], + ) + q_aware_model.summary() + x_train, y_train = prepare_data(FLAGS.dataset_location) + q_aware_model.fit(x_train, + y_train, + batch_size=64, + epochs=1) + + # apply some post process steps and save the output model + compression_manager.callbacks.on_train_end() + compression_manager.save(FLAGS.output_model) +``` +#### Benchmark +```python + from neural_compressor.benchmark import fit + from neural_compressor.experimental import common + from neural_compressor.config import BenchmarkConfig + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + + # convert the quantized keras model to graph_def so that it can be fused by ITEX + model = common.Model(FLAGS.input_model).graph_def + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_func=evaluate) + elif FLAGS.mode == 'accuracy': + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) +``` \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/main.py b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/main.py new file mode 100644 index 00000000000..d7a150f665a --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/main.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import codeop +import logging +import numpy as np +import tensorflow as tf + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS +logger = logging.getLogger(__name__) + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'dataset_location', None, 'location of the dataset on tfrecord format') + +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size') + +from neural_compressor.experimental.metric.metric import TensorflowTopK +from neural_compressor.experimental.data.transforms.transform import ComposeTransform +from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.transforms.imagenet_transform import TensorflowResizeCropImagenetTransform + +def prepare_data(root): + """ + Parse the input tf_record data. + + Args: + root (string): The path to tfrecord files. + + Returns: + data (float): The images that can be used for training or evaluation. + label (float): The labels corresponding to the images. + """ + dataset = TensorflowImageRecord( + root=root, + transform=ComposeTransform(transform_list=[ + TensorflowResizeCropImagenetTransform( + height=224, width=224) + ])) + + data = np.array(list(dataset.map(lambda x, y: x))) + data = tf.keras.applications.resnet.preprocess_input(data) + label = np.array(list(dataset.map(lambda x, y: y))).squeeze(1) + + if len(data) > 10000: + data = data[:10000] + label = label[:10000] + + for idx, i in enumerate(label): + label[idx] = i-1 + + return data, label + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.experimental import common + model = common.Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = 100 + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + assert len(input_tensor) == len(inputs), \ + 'inputs len must equal with input_tensor' + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / FLAGS.batch_size + return latency + + from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader + dataset = TensorflowImageRecord(root=FLAGS.dataset_location, transform=ComposeTransform(transform_list=[ + TensorflowResizeCropImagenetTransform(height=224, width=224)])) + dataloader = DefaultDataLoader(dataset, batch_size=FLAGS.batch_size) + latency = eval_func(dataloader) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(FLAGS.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(): + if FLAGS.tune: + logger.info('start quantizing the model...') + from neural_compressor import training, QuantizationAwareTrainingConfig + config = QuantizationAwareTrainingConfig() + compression_manager = training.prepare_compression(FLAGS.input_model, config) + compression_manager.callbacks.on_train_begin() + + q_aware_model = compression_manager.model.model + q_aware_model.compile( + optimizer='sgd', + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=["accuracy"], + ) + + q_aware_model.summary() + x_train, y_train = prepare_data(FLAGS.dataset_location) + q_aware_model.fit(x_train, + y_train, + batch_size=64, + epochs=1) + + compression_manager.callbacks.on_train_end() + compression_manager.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.experimental import common + from neural_compressor.config import BenchmarkConfig + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + + model = common.Model(FLAGS.input_model).graph_def + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_func=evaluate) + elif FLAGS.mode == 'accuracy': + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + main() diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/prepare_model.py new file mode 100644 index 00000000000..086690ea37b --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/prepare_model.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from tensorflow.keras.applications import ResNet50 + + +def get_resnet50_model(saved_path): + assert saved_path is not None, "save path should not be None" + model = ResNet50(weights='imagenet') + model.save(saved_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_resnet50_model(args.output_model) diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/requirements.txt b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/requirements.txt new file mode 100644 index 00000000000..c8cbd6d70a6 --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +intel-extension-for-tensorflow[cpu] \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_benchmark.sh new file mode 100644 index 00000000000..203630c8d95 --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_benchmark.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --batch_size ${batch_size} \ + --dataset_location ${dataset_location} +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_tuning.sh new file mode 100644 index 00000000000..43c392a1be0 --- /dev/null +++ b/examples/tensorflow/image_recognition/keras_models/resnet50/quantization/qat/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/resnet50_fashion/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/resnet50_fashion/quantization/ptq/README.md index 8c957a93a2a..95515cbd7c3 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet50_fashion/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/resnet50_fashion/quantization/ptq/README.md @@ -36,7 +36,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] ### 4. Prepare Pretrained model -Run the `resnet50_fashion_mnist_train.py` script located in `LowPrecisionInferenceTool/examples/tensorflow/keras`, and it will generate a saved model called `resnet50_fashion` at current path. +Run the `resnet50_fashion_mnist_train.py` script located in `examples/tensorflow/image_recognition/keras_models/resnet50_fashion/quantization/ptq`, and it will generate a saved model called `resnet50_fashion` at current path. ### 5. Prepare dataset diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md index d6a50d60f64..a4e84abc8b5 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md @@ -12,9 +12,9 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` > Note: Supported Tensorflow [Version](../../../../../../../README.md). @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -42,12 +43,25 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a resnetv2_101.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The resnetv2_101_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command +#### Tune ```shell - bash run_tuning.sh --config=resnetv2_101.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=resnetv2_101.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` +#### Benchmark + ```shell + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=performance + bash run_benchmark.sh --input_model=./path/to/model --dataset_location=/path/to/evaluation/dataset --mode=accuracy + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py index 23c1c00e096..556c7627cb4 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,7 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser from neural_compressor import data import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) @@ -42,87 +40,102 @@ flags.DEFINE_bool( 'benchmark', False, 'whether to benchmark the model') -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - flags.DEFINE_string( 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.transforms.imagenet_transform import LabelShift from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) + [BilinearImagenetTransform(height=224, width=224)])) if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=224, width=224)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + + conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py index 4a6f20d4c1e..764ea2be82f 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse import tensorflow as tf def get_resnet101_v2_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101.yaml b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101.yaml deleted file mode 100644 index f5b1165bf44..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnetv2_101 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101_itex.yaml b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101_itex.yaml deleted file mode 100644 index 3c13302f138..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/resnetv2_101_itex.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnetv2_101 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh index ca49af56795..43b1636c839 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh @@ -10,20 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) ;; esac done @@ -35,10 +41,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md index d42e9db612e..1f1ca4254cd 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md @@ -308,7 +308,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] --input_model=/PATH/TO/frozen_nasnet_mobile.pb --output_model=./nc_nasnet_mobile ``` -### 20. EfficientNet-b0 +### 20. EfficientNet-b0(experiment) Download pre-trained checkpoint ```shell diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml index 79da662a36f..b9da893f6da 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml +++ b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml @@ -38,6 +38,14 @@ quantization: # optional. tuning constrai algorithm: minmax weight: granularity: per_channel + op_wise: { + 'densenet121/MaxPool2D/MaxPool': { + 'activation': {'dtype': ['fp32']} + }, + 'densenet121/transition_block[1-3]/AvgPool2D/AvgPool': { + 'activation': {'dtype': ['fp32']}, + } + } evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml index b5629ad649c..5312ed341fa 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml +++ b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml @@ -38,6 +38,14 @@ quantization: # optional. tuning constrai algorithm: minmax weight: granularity: per_channel + op_wise: { + 'densenet161/MaxPool2D/MaxPool': { + 'activation': {'dtype': ['fp32']} + }, + 'densenet161/transition_block[1-3]/AvgPool2D/AvgPool': { + 'activation': {'dtype': ['fp32']}, + } + } evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml index 6892b69dc73..b63414d8acf 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml +++ b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml @@ -38,6 +38,14 @@ quantization: # optional. tuning constrai algorithm: minmax weight: granularity: per_channel + op_wise: { + 'densenet169/MaxPool2D/MaxPool': { + 'activation': {'dtype': ['fp32']} + }, + 'densenet169/transition_block[1-3]/AvgPool2D/AvgPool': { + 'activation': {'dtype': ['fp32']}, + } + } evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md index 663a9cd89ee..d978b0ed335 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md @@ -173,7 +173,7 @@ After prepare step is done, we add tune and benchmark code to generate quantized #### Benchmark ```python from neural_compressor.experimental import Benchmark, common - from neural_compressor.model.model import get_model_type + from neural_compressor.model.tensorflow_model import get_model_type evaluator = Benchmark(FLAGS.config) dataset = Dataset(eval_file, FLAGS.eval_batch_size) evaluator.b_dataloader = common.DataLoader(\ diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py index 9ffd1c6c1a0..5620ab3775a 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py @@ -1109,7 +1109,7 @@ def result(self): evaluator.metric = Accuracy() - from neural_compressor.model.model import get_model_type + from neural_compressor.model.tensorflow_model import get_model_type model_type = get_model_type(FLAGS.input_model) if model_type == 'frozen_pb': evaluator.model = FLAGS.input_model diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md b/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md index 6c2072a2b31..fa82967ce42 100644 --- a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md +++ b/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md @@ -15,7 +15,7 @@ pip install neural-compressor ```shell pip install intel-tensorflow ``` -> Note: Supported Tensorflow versions please refer to Neural Compressor readme file. +> Note: Only supported Tensorflow 1.x versions. ### 3. Installation Dependency packages ```shell diff --git a/examples/tensorflow/oob_models/quantization/ptq/model_detail.py b/examples/tensorflow/oob_models/quantization/ptq/model_detail.py index 07ce37cf892..8c5f2d1770d 100644 --- a/examples/tensorflow/oob_models/quantization/ptq/model_detail.py +++ b/examples/tensorflow/oob_models/quantization/ptq/model_detail.py @@ -385,5 +385,11 @@ 'low': -1.0, 'high': 1.0 }, + # centernet_hg104 + { + 'model_name': 'centernet_hg104', + 'input': {'input_tensor': generate_data([224, 224, 3]),}, + 'output': ['Identity'], + }, ] diff --git a/examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh b/examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh index 87d16a45c1e..efd68dde04d 100755 --- a/examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh @@ -101,6 +101,9 @@ function set_args { NeuMF PRNet DIEN_Deep-Interest-Evolution-Network + EfficientDet-D2-768x768 + EfficientDet-D4-1024x1024 + centernet_hg104 -------- ) diff --git a/examples/tensorflow/oob_models/quantization/ptq/run_tuning.sh b/examples/tensorflow/oob_models/quantization/ptq/run_tuning.sh index 2971bedf7c3..a183dbb52e6 100755 --- a/examples/tensorflow/oob_models/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/oob_models/quantization/ptq/run_tuning.sh @@ -83,6 +83,7 @@ function set_args { DIEN_Deep-Interest-Evolution-Network EfficientDet-D2-768x768 EfficientDet-D4-1024x1024 + centernet_hg104 -------- ) diff --git a/neural_coder/README.md b/neural_coder/README.md index 9c07f9f6503..b7c6d0fb63b 100644 --- a/neural_coder/README.md +++ b/neural_coder/README.md @@ -35,10 +35,15 @@ simultaneously on below PyTorch evaluation code, we generate the optimized code ## Getting Started! -There are currently 2 ways to use Neural Coder for automatic quantization enabling and benchmark. +There are currently 3 ways to use Neural Coder for automatic quantization enabling and benchmark. ### Jupyter Lab Extension -We offer Neural Coder as an extension plugin in Jupyter Lab. This enables users to utilize Neural Coder while writing their Deep Learning models in Jupyter Lab coding platform. Users can simply search for ```jupyter-lab-neural-compressor``` in the Extension Manager in JupyterLab and install Neural Coder with one click. For more details, please refer to this [guide](extensions/neural_compressor_ext_lab/README.md) +We offer Neural Coder as an extension plugin in Jupyter Lab. This enables users to utilize Neural Coder while writing their Deep Learning models in Jupyter Lab coding platform. Users can simply search for ```jupyter-lab-neural-compressor``` in the Extension Manager in JupyterLab and install Neural Coder with one click. For more details, please refer to this [guide](extensions/neural_compressor_ext_lab/README.md). + +[AWS Amazon SageMaker](https://aws.amazon.com/sagemaker/) users can also use Neural Coder as an extension following this [guide](docs/AWSSageMakerSupport.md). + +### Python Launcher +Neural Coder can be used as a Python Launcher. Users can run the Python Deep Learning model code as it is with automatic enabling of optimizations by simply adding an inline prefix ```-m neural_coder``` to the Python command line. For more details, please refer to this [guide](docs/PythonLauncher.md). ### Python API There are 3 user-facing APIs for Neural Coder: enable, bench and superbench. For more details, please refer to this [guide](docs/PythonAPI.md). We have provided a [list](docs/SupportMatrix.md) of supported Deep Learning optimization features. Specifically for quantization, we provide an auto-quantization API that helps automatically enable quantization on Deep Learning models and automatically evaluates for the best performance on the model with no manual coding needed. Supported features include Post-Training Static Quantization, Post-Training Dynamic Quantization, and Mixed Precision. For more details, please refer to this [guide](docs/Quantization.md). diff --git a/neural_coder/__main__.py b/neural_coder/__main__.py index f9011e91f8b..1eee5a47fa7 100644 --- a/neural_coder/__main__.py +++ b/neural_coder/__main__.py @@ -25,15 +25,19 @@ def parse_args(): """ parser = ArgumentParser(description="command-launch a Python script with quantization auto-enabled") - parser.add_argument("--opt", type=str, default="", + parser.add_argument("-o", "--opt", type=str, default="", help="optimization feature to enable") - parser.add_argument("--strategy", type=str, default="static", - help="quantization strategy") + parser.add_argument("-a", "--approach", type=str, default="static", + + help="quantization approach (strategy)") parser.add_argument('--config', type=str, default="", help='quantization configuration file path') + parser.add_argument('-b', '--bench', default=False, action='store_true', + help='conduct auto_quant benchmark instead of enable') + # positional parser.add_argument("script", type=str, help="The full path to the script to be launched. " @@ -50,32 +54,41 @@ def parse_args(): script_copied = args.script[:-3] + "_optimized.py" shutil.copy(args.script, script_copied) -# optimize on copied script with Neural Coder -from neural_coder import enable -if args.opt == "": - if args.strategy == "static": - features=["pytorch_inc_static_quant_fx"] - if args.strategy == "static_ipex": - features=["pytorch_inc_static_quant_ipex"] - if args.strategy == "dynamic": - features=["pytorch_inc_dynamic_quant"] -else: - features=[args.opt] -enable( - code=script_copied, - features=features, - overwrite=True, -) - -# execute on copied script, which has already been optimized -cmd = [] - -cmd.append(sys.executable) # "/xxx/xxx/python" -cmd.append("-u") -cmd.append(script_copied) -cmd.extend(args.script_args) - -cmd = " ".join(cmd) # list convert to string - -process = subprocess.Popen(cmd, env=os.environ, shell=True) # nosec -process.wait() +if not args.bench: # enable + # optimize on copied script with Neural Coder + from neural_coder import enable + if args.opt == "": + if args.approach == "static": + features = ["pytorch_inc_static_quant_fx"] + if args.approach == "static_ipex": + features = ["pytorch_inc_static_quant_ipex"] + if args.approach == "dynamic": + features = ["pytorch_inc_dynamic_quant"] + else: + features = args.opt.split(",") + + # execute optimization enabling + enable( + code=script_copied, + features=features, + overwrite=True, + ) + + # execute on copied script, which has already been optimized + cmd = [] + + cmd.append(sys.executable) # "/xxx/xxx/python" + cmd.append("-u") + cmd.append(script_copied) + cmd.extend(args.script_args) + + cmd = " ".join(cmd) # list convert to string + + process = subprocess.Popen(cmd, env=os.environ, shell=True) # nosec + process.wait() +else: # auto_quant + from neural_coder import auto_quant + auto_quant( + code=script_copied, + args=' '.join(args.script_args), # convert list of strings to a single string + ) diff --git a/neural_coder/backends/intel_extension_for_transformers.yaml b/neural_coder/backends/intel_extension_for_transformers.yaml new file mode 100644 index 00000000000..a1accbbfb4b --- /dev/null +++ b/neural_coder/backends/intel_extension_for_transformers.yaml @@ -0,0 +1,35 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: For intel_extension_for_transformers support +# we default apply "PostTrainingDynamic" and "eval_f1" +# support for customization is pending further evaluation + +transformation: + location: + - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"] + content: + - |- + [+] metric = metrics.Metric(name="eval_f1", is_relative=True, criterion=0.01) + [+] objective = objectives.performance + [+] q_config = QuantizationConfig(approach="PostTrainingDynamic", metrics=[metric], objectives=[objective]) + [+] MODEL_NAME = trainer.quantize(quant_config=q_config) + order: + - below: + above: + - pytorch_jit_script + - pytorch_jit_script_ofi + - pytorch_jit_trace + - pytorch_jit_trace_ofi + - pytorch_channels_last diff --git a/neural_coder/backends/nano_bf16.yaml b/neural_coder/backends/nano_bf16.yaml new file mode 100644 index 00000000000..2fe2bf7a4b0 --- /dev/null +++ b/neural_coder/backends/nano_bf16.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_bf16_channels_last.yaml b/neural_coder/backends/nano_bf16_channels_last.yaml new file mode 100644 index 00000000000..47127983f3b --- /dev/null +++ b/neural_coder/backends/nano_bf16_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_bf16_ipex.yaml b/neural_coder/backends/nano_bf16_ipex.yaml new file mode 100644 index 00000000000..a79635d55c8 --- /dev/null +++ b/neural_coder/backends/nano_bf16_ipex.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", use_ipex=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_bf16_ipex_channels_last.yaml new file mode 100644 index 00000000000..c8ecf0917d0 --- /dev/null +++ b/neural_coder/backends/nano_bf16_ipex_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", use_ipex=True, channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_fp32_channels_last.yaml b/neural_coder/backends/nano_fp32_channels_last.yaml new file mode 100644 index 00000000000..f6027539929 --- /dev/null +++ b/neural_coder/backends/nano_fp32_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_fp32_ipex.yaml b/neural_coder/backends/nano_fp32_ipex.yaml new file mode 100644 index 00000000000..a8c69963c56 --- /dev/null +++ b/neural_coder/backends/nano_fp32_ipex.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, use_ipex=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_fp32_ipex_channels_last.yaml new file mode 100644 index 00000000000..53094440d9e --- /dev/null +++ b/neural_coder/backends/nano_fp32_ipex_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, use_ipex=True, channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/launch.sh b/neural_coder/backends/nano_gpu_to_cpu.yaml similarity index 52% rename from examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/launch.sh rename to neural_coder/backends/nano_gpu_to_cpu.yaml index 5c9c6a3f346..ae9ebd842ec 100644 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/docker/launch.sh +++ b/neural_coder/backends/nano_gpu_to_cpu.yaml @@ -1,32 +1,24 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -DATA_DIR=$1 -CHECKPOINT_DIR=$2 -RESULT_DIR=$3 - -docker run -it --rm \ - --gpus='"device=1"' \ - --shm-size=4g \ - --ulimit memlock=-1 \ - --ulimit stack=67108864 \ - -v "$DATA_DIR":/datasets \ - -v "$CHECKPOINT_DIR":/checkpoints/ \ - -v "$RESULT_DIR":/results/ \ - -v $PWD:/code \ - -v $PWD:/workspace/jasper \ - mlperf-rnnt-ref bash +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_above_model_definition_line", "insert_above_input_definition_line"] + content: + - |- + [+] from bigdl.nano.pytorch import patch_torch + [+] patch_torch() + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_int8.yaml b/neural_coder/backends/nano_int8.yaml new file mode 100644 index 00000000000..c15cbbe51a4 --- /dev/null +++ b/neural_coder/backends/nano_int8.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="int8", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_bf16.yaml b/neural_coder/backends/nano_jit_bf16.yaml new file mode 100644 index 00000000000..275eb9d0225 --- /dev/null +++ b/neural_coder/backends/nano_jit_bf16.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_bf16_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_channels_last.yaml new file mode 100644 index 00000000000..ffa22db618c --- /dev/null +++ b/neural_coder/backends/nano_jit_bf16_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_bf16_ipex.yaml b/neural_coder/backends/nano_jit_bf16_ipex.yaml new file mode 100644 index 00000000000..21397985fc5 --- /dev/null +++ b/neural_coder/backends/nano_jit_bf16_ipex.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", use_ipex=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml new file mode 100644 index 00000000000..b41fbf71578 --- /dev/null +++ b/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", use_ipex=True, channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_fp32.yaml b/neural_coder/backends/nano_jit_fp32.yaml new file mode 100644 index 00000000000..71e7d4ede95 --- /dev/null +++ b/neural_coder/backends/nano_jit_fp32.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: diff --git a/neural_coder/backends/nano_jit_fp32_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_channels_last.yaml new file mode 100644 index 00000000000..bdc9a3154e4 --- /dev/null +++ b/neural_coder/backends/nano_jit_fp32_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_jit_fp32_ipex.yaml b/neural_coder/backends/nano_jit_fp32_ipex.yaml new file mode 100644 index 00000000000..f673b076a20 --- /dev/null +++ b/neural_coder/backends/nano_jit_fp32_ipex.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", use_ipex=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: diff --git a/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml new file mode 100644 index 00000000000..d434e58c886 --- /dev/null +++ b/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", use_ipex=True, channels_last=True, input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_onnxruntime_fp32.yaml b/neural_coder/backends/nano_onnxruntime_fp32.yaml new file mode 100644 index 00000000000..820ad2441c2 --- /dev/null +++ b/neural_coder/backends/nano_onnxruntime_fp32.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="onnxruntime", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml b/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml new file mode 100644 index 00000000000..7fd4e09989b --- /dev/null +++ b/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="onnxruntime", precision="int8", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_openvino_fp32.yaml b/neural_coder/backends/nano_openvino_fp32.yaml new file mode 100644 index 00000000000..d6b88ecf712 --- /dev/null +++ b/neural_coder/backends/nano_openvino_fp32.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="openvino", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/backends/nano_openvino_int8.yaml b/neural_coder/backends/nano_openvino_int8.yaml new file mode 100644 index 00000000000..b5c8ae7d045 --- /dev/null +++ b/neural_coder/backends/nano_openvino_int8.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_model_definition_line", "insert_below_input_definition_line"] + - "indent_inference_line" + content: + - |- + [+] from bigdl.nano.pytorch import InferenceOptimizer + [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="openvino", precision="int8", input_sample=INPUT_NAME) + [+] with InferenceOptimizer.get_context(MODEL_NAME): + - 1 + order: + - below: + above: \ No newline at end of file diff --git a/neural_coder/coders/autoinc/autoinc_harness.py b/neural_coder/coders/autoinc/autoinc_harness.py index 0430534f768..6918ac489ad 100644 --- a/neural_coder/coders/autoinc/autoinc_harness.py +++ b/neural_coder/coders/autoinc/autoinc_harness.py @@ -271,8 +271,6 @@ def register_transformation(self): lines_to_insert = lines_to_insert \ .replace("DATALOADER_NAME", dataloader_name) - if globals.optimum_quant_config == "": - globals.optimum_quant_config = "quantization/quant_config" optimum_quant_config_line = \ 'IncQuantizationConfig.from_pretrained("' + globals.optimum_quant_config + '")' diff --git a/neural_coder/coders/autoinc/eval_func.py b/neural_coder/coders/autoinc/eval_func.py index a0318800846..97e859724a5 100644 --- a/neural_coder/coders/autoinc/eval_func.py +++ b/neural_coder/coders/autoinc/eval_func.py @@ -40,6 +40,8 @@ def register_transformation(self): ' "eval_corr",', ' "eval_mnli/acc",', ' "eval_mnli-mm/acc",', + ' "eval_exact_match",', + ' "eval_f1",', '] # METRIC_TAGS in transformers', 'for key in keys:', ' if key in metrics.keys():', diff --git a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py new file mode 100644 index 00000000000..512310c46e9 --- /dev/null +++ b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from ...utils.line_operation import get_line_indent_level + +class TrainerToNLPTrainer(object): + def __init__(self, file) -> None: + self.file = file + self.result = [] + + def transform(self): + lines = self.file.split('\n') + + for line in lines: + if self.is_modify(line): + new_line = self.modify(line) + self.result.append(new_line) + else: + self.result.append(line) + for index, line in enumerate(self.result): + if index != len(self.result)-1: + self.result[index] += '\n' + return ''.join(self.result) + + def is_modify(self, s): + if 'trainer = Trainer(' in s: + return True + else: + return False + + def modify(self, s): + old = 'Trainer' + s = s.replace(old, 'NLPTrainer') + return s diff --git a/neural_coder/coders/tensorflow/inc.py b/neural_coder/coders/tensorflow/inc.py new file mode 100644 index 00000000000..06791c855b7 --- /dev/null +++ b/neural_coder/coders/tensorflow/inc.py @@ -0,0 +1,48 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ...utils.line_operation import get_line_left_hand_side + +class TensorFlowKerasINC(object): + def __init__(self, file) -> None: + self.file = file + self.result = [] + + def transform(self): + # import pdb + # pdb.set_trace() + lines = self.file.split('\n') + for line in lines: + if self.is_modify(line): + self.result.append(line) + self.result.append("from neural_compressor.conf.config import QuantConf") + self.result.append("from neural_compressor.experimental import Quantization") + self.result.append("from neural_compressor.experimental import common") + self.result.append("quant_config = QuantConf()") + self.result.append("quant_config.usr_cfg.model.framework = 'tensorflow'") + self.result.append("quantizer = Quantization(quant_config)") + self.result.append("quantizer.model = common.Model(" + model_name + ")") + self.result.append(model_name + " = quantizer.fit()") + else: + self.result.append(line) + for index, line in enumerate(self.result): + if index != len(self.result)-1: + self.result[index] += '\n' + return ''.join(self.result) + + def is_modify(self, s): + if 'model = tf.' in s: + return True + else: + return False diff --git a/neural_coder/docs/AWSSageMakerSupport.md b/neural_coder/docs/AWSSageMakerSupport.md new file mode 100644 index 00000000000..eb8926c12ee --- /dev/null +++ b/neural_coder/docs/AWSSageMakerSupport.md @@ -0,0 +1,32 @@ +AWS Amazon SageMaker Support +===== + +[AWS Amazon SageMaker](https://aws.amazon.com/sagemaker/) users can easily enjoy the productivity boost brought by Neural Coder by one-click installing [Neural Coder Jupyter Lab extension](https://www.npmjs.com/package/jupyter-lab-neural-compressor) in either **SageMaker Studio** or **SageMaker Notebook instance**. + +## Start Jupyter Lab 3 +[Neural Coder Jupyter extension](https://www.npmjs.com/package/jupyter-lab-neural-compressor) requires Jupyter Lab 3. Using Jupyter Lab 1 will cause installation error. To start Jupyter Lab 3, please check the following: + +#### For SageMaker Studio +SageMaker Studio Jupyter Lab 3 + +#### For SageMaker Notebook instance +SageMaker Notebook instance Jupyter Lab 3 + +## Installation Guide +For both cases, the installation process is exactly the same, which is by **searching ```neural-compressor``` in the Extension Manager**. + +1. Search and Install + +SageMaker Notebook instance Jupyter Lab 3 + +2. Rebuild + +SageMaker Notebook instance Jupyter Lab 3 + +3. Save and Reload + +SageMaker Notebook instance Jupyter Lab 3 + +4. Done! + +SageMaker Notebook instance Jupyter Lab 3 diff --git a/neural_coder/docs/BigDLNanoSupport.md b/neural_coder/docs/BigDLNanoSupport.md new file mode 100644 index 00000000000..a05d3f7fe2a --- /dev/null +++ b/neural_coder/docs/BigDLNanoSupport.md @@ -0,0 +1,26 @@ +BigDL Nano Support +=========================== + +| Optimization Set | API Alias | +| ------------- | ------------- | +| BF16 + Channels Last | `nano_bf16_channels_last` | +| BF16 + IPEX + Channels Last | `nano_bf16_ipex_channels_last` | +| BF16 + IPEX | `nano_bf16_ipex` | +| BF16 | `nano_bf16` | +| Channels Last | `nano_fp32_channels_last` | +| IPEX + Channels Last | `nano_fp32_ipex_channels_last` | +| IPEX | `nano_fp32_ipex` | +| Convert CUDA TO GPU | `nano_gpu_to_cpu` | +| INT8 | `nano_int8` | +| JIT + BF16 + Channels Last | `nano_jit_bf16_channels_last` | +| JIT + BF16 + IPEX + Channels Last | `nano_jit_bf16_ipex_channels_last` | +| JIT + BF16 + IPEX | `nano_jit_bf16_ipex` | +| JIT + BF16 | `nano_jit_bf16` | +| JIT + Channels Last | `nano_jit_fp32_channels_last` | +| JIT + IPEX + Channels Last | `nano_jit_fp32_ipex_channels_last` | +| JIT + IPEX | `nano_jit_fp32_ipex` | +| JIT | `nano_jit_fp32` | +| ONNX Runtime | `nano_onnxruntime_fp32` | +| ONNX Runtime + INT8 | `nano_onnxruntime_int8_qlinear` | +| OpenVINO | `nano_openvino_fp32` | +| OpenVINO + INT8 | `nano_openvino_int8` | diff --git a/neural_coder/docs/PythonLauncher.md b/neural_coder/docs/PythonLauncher.md index 38e3bd5fae7..f08fc1d2085 100644 --- a/neural_coder/docs/PythonLauncher.md +++ b/neural_coder/docs/PythonLauncher.md @@ -10,7 +10,7 @@ Example: Let's say you are running an NLP model using ```run_glue.py``` from Hug python run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result ``` -With Neural Coder's **Launcher**, users can easily enjoy Deep Learning optimizations (default: INT8 static quantization by Intel® Neural Compressor) by simply adding an inline prefix +With Neural Coder's **Launcher**, users can easily enjoy Deep Learning optimizations (default: INT8 dynamic quantization by Intel® Neural Compressor) by simply adding an inline prefix ```bash -m neural_coder ``` @@ -27,7 +27,7 @@ Note: Any modification on the optimized code ```run_glue_optimized.py``` will be Users can specify which Deep Learning optimization they want to conduct using ```--opt``` argument. The list of supported Deep Learning optimization features can be found [here](SupportMatrix.md). -Note that if specifically optimizing with INT8 quantization by Intel® Neural Compressor, ```--strategy``` argument can be specified with either ```static```, ```static_ipex``` or ```dynamic```. For example, to run INT8 dynamic quantization by Intel® Neural Compressor instead of the default static quantization: +Note that if specifically optimizing with INT8 quantization by Intel® Neural Compressor, to choose a quantization approach (strategy), ```--approach``` argument can be specified with either ```static```, ```static_ipex``` or ```dynamic```. For example, to run INT8 static quantization by Intel® Neural Compressor instead of the default dynamic quantization: ```bash -python -m neural_coder --strategy dynamic run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result +python -m neural_coder --approach static run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result ``` diff --git a/neural_coder/docs/SupportMatrix.md b/neural_coder/docs/SupportMatrix.md index 0b8686020dd..5d49f5442c2 100644 --- a/neural_coder/docs/SupportMatrix.md +++ b/neural_coder/docs/SupportMatrix.md @@ -1,18 +1,24 @@ Supported Optimization Features =========================== -| Framework | Optimization | API Alias | +| Category | Optimization | API Alias | | ------------- | ------------- | ------------- | | PyTorch | [Mixed Precision](https://pytorch.org/docs/stable/amp.html) | `pytorch_amp` | | PyTorch | [Channels Last](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) | `pytorch_channels_last` | | PyTorch | [JIT (Just-In-Time) Script/Trace](https://pytorch.org/docs/stable/jit.html) & [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html) | `pytorch_jit_script`, `pytorch_jit_trace`, `pytorch_jit_script_ofi`, `pytorch_jit_trace_ofi` | | PyTorch | JIT with [TorchDynamo](https://github.com/pytorch/torchdynamo) | `pytorch_torchdynamo_jit_script`, `pytorch_torchdynamo_jit_trace`, `pytorch_torchdynamo_jit_script_ofi`, `pytorch_torchdynamo_jit_trace_ofi` | -| PyTorch | [Intel Neural Compressor Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/mixed_precision.md) | `pytorch_inc_bf16` | -| PyTorch | [Intel Neural Compressor INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/PTQ.md) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex` | -| PyTorch | [Intel Neural Compressor INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/dynamic_quantization.md) | `pytorch_inc_dynamic_quant` | +| PyTorch | [Intel Neural Compressor (INC) Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/source/mixed_precision.md) | `pytorch_inc_bf16` | +| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/PTQ.md) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex` | +| PyTorch | [INC INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/dynamic_quantization.md) | `pytorch_inc_dynamic_quant` | | PyTorch | [Intel Extension for PyTorch (FP32, BF16, INT8 Static/Dynamic Quantization)](https://github.com/intel/intel-extension-for-pytorch) | `pytorch_ipex_fp32`, `pytorch_ipex_bf16`, `pytorch_ipex_int8_static_quant`, `pytorch_ipex_int8_dynamic_quant` | | PyTorch | [Alibaba Blade-DISC](https://github.com/alibaba/BladeDISC) | `pytorch_aliblade` | | PyTorch Lightning | [Mixed Precision](https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html) | `pytorch_lightning_bf16_cpu` | | TensorFlow | [Mixed Precision](https://www.intel.com/content/www/us/en/developer/articles/guide/getting-started-with-automixedprecisionmkl.html) | `tensorflow_amp` | | Keras | [Mixed Precision](https://www.tensorflow.org/guide/mixed_precision) | `keras_amp` | +| TensorFlow/Keras | [INC Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/PTQ.md) | `tensorflow_inc` | | ONNX Runtime | [INC Static Quantization (QLinear)](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/README.md#operator-oriented-with-qlinearops) | `onnx_inc_static_quant_qlinear` | +| ONNX Runtime | [INC Static Quantization (QDQ)](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/README.md#tensor-oriented-qdq-format) | `onnx_inc_static_quant_qdq` | +| ONNX Runtime | [INC Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/README.md#dynamic-quantization) | `onnx_inc_dynamic_quant` | +| [HuggingFace Optimum-Intel](https://huggingface.co/docs/optimum/intel/index) | INC Quantization | `pytorch_inc_huggingface_optimum_static`, `pytorch_inc_huggingface_optimum_dynamic` | +| [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers/) | INC Quantization | `intel_extension_for_transformers` | +| [BigDL Nano](https://bigdl.readthedocs.io/en/latest/doc/PythonAPI/Nano/pytorch.html#bigdl-nano-pytorch-inferenceoptimizer) | [Optimization List](./BigDLNanoSupport.md) | `nano_` + [specific alias](./BigDLNanoSupport.md) | diff --git a/neural_coder/docs/release_notes/v0.4.md b/neural_coder/docs/release_notes/v0.4.md new file mode 100644 index 00000000000..933c02900a3 --- /dev/null +++ b/neural_coder/docs/release_notes/v0.4.md @@ -0,0 +1,25 @@ +v0.4 +===== + +## Highlights +- **Visual Studio Code extension**: We are delighted to announce the release of Neural Coder's [Visual Studio Code extension](https://marketplace.visualstudio.com/items?itemName=IntelNeuralCompressor.neural-coder-ext-vscode). VS Code programmers can enjoy one-click automatic enabling of Deep Learning optimization API and accelerate their Deep Learning models without manual coding. + +- **HuggingFace Transformers**: + - We supported **all** HuggingFace Transformers [examples](https://github.com/huggingface/transformers/tree/main/examples/pytorch) that calls ```Trainer``` class, and validated over **500** models from HuggingFace Transformers [model hub](https://huggingface.co/models). The models are able to be accelerated automatically with Neural Coder with minimum loss of prediction accuracy. + - We enabled the support of [HuggingFace Optimum-Intel](https://huggingface.co/docs/optimum/intel/index). User scripts of HuggingFace Transformers models will by default be optimized with Optimum-Intel API to enjoy performance speed-up brought by INT8 quantization. + - We enabled the support of [Intel® Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), an innovative toolkit to accelerate Transformer-based models on Intel platforms. For more details, please refer to the updated [support matrix](../SupportMatrix.md). + +- **Support of BigDL Nano**: We are delighted to announce the collaboration between Neural Coder and [BigDL Nano](https://bigdl.readthedocs.io/en/latest/doc/Nano/index.html). Users can now one-click enable BigDL Nano optimizations for PyTorch in Neural Coder. For detailed support matrix for BigDL Nano features, please refer to this [guide](../BigDLNanoSupport.md). + +- **Amazon AWS SageMaker**: We provided a user [tutorial](../AWSSageMakerSupport.md) for installing Neural Coder's JupyterLab extension in AWS SageMaker platform. Users are able to one-click install the extension in Amazon AWS SageMaker with Jupyter 3 and enjoy Neural Coder's functionalities. + +- **Python Launcher**: We added the implementation of [Python Launcher](../PythonLauncher.md) usage for Neural Coder, which will be one of the recommended user interfaces in the future as a replacement of Python API. Users can run the Python model code as it is with automatic enabling of Deep Learning optimizations by using Neural Coder's inline Python Launcher design: ```-m neural_coder```. + +- **Device Detection**: We enabled the capability of detecting running device and its ISA automatically and adjusting applied optimization features accordingly. For instance, when running Neural Coder on Intel GPU instead of Intel CPU, the PyTorch Mixed Precision optimization feature will adapt ```xpu``` instead of ```cpu```, and ```torch.half``` instead of ```torch.bfloat16```. + +## Others +- **INT8 Accuracy Evaluation**: We enabled accuracy evaluation for INT8 quantizations in Neural Coder. Users are able to view the accuracy delta for each quantization optimization in Neural Coder's auto-benchmark output log. The calculation is ```acc_delta = (int8_acc - fp32_acc)/(fp32_acc)```. + +- **Auto-quantize TensorFlow/Keras scripts**: We enabled the support of auto-quantizing TensorFlow/Keras script-based models with Intel® Neural Compressor. The default quantization scheme will be applied. For more details, please refer to the updated [support matrix](../SupportMatrix.md). + +- **Auto-quantize ONNX Runtime scripts**: We enabled the support of auto-quantizing ONNX Runtime script-based models with Intel® Neural Compressor. We support [dynamic quantization](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#dynamic-quantization), static quantization ([QDQ](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#tensor-oriented-qdq-format)), and static quantization ([QLinearOps](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#operator-oriented-with-qlinearops)). For more details, please refer to the updated [support matrix](../SupportMatrix.md). diff --git a/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package-lock.json b/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package-lock.json index f8b6ed0a318..7097ab3fe30 100644 --- a/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package-lock.json +++ b/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package-lock.json @@ -2500,6 +2500,19 @@ "postcss-value-parser": "^4.1.0", "schema-utils": "^3.0.0", "semver": "^7.3.5" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "cssesc": { @@ -2840,12 +2853,6 @@ } } }, - "duplexer3": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.5.tgz", - "integrity": "sha512-1A8za6ws41LQgv9HrE/66jyC5yuSjQ3L/KOpFtoBilsAK2iA2wuS5rTt1OCzIvtS2V7nVmedsUU+DGRcjBmOYA==", - "dev": true - }, "duplicate-package-checker-webpack-plugin": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/duplicate-package-checker-webpack-plugin/-/duplicate-package-checker-webpack-plugin-3.0.0.tgz", @@ -2930,7 +2937,6 @@ "version": "1.4.4", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "dev": true, "requires": { "once": "^1.4.0" } @@ -3615,6 +3621,17 @@ "schema-utils": "^2.6.5" }, "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + }, "schema-utils": { "version": "2.7.1", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-2.7.1.tgz", @@ -4951,9 +4968,9 @@ "dev": true }, "loader-utils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.3.tgz", - "integrity": "sha512-THWqIsn8QRnvLl0shHYVBN9syumU8pYWEHPTmkiVGd+7K5eFNVSY6AJhRvgGF70gg1Dz+l/k8WicvFCxdEs60A==", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", "requires": { "big.js": "^5.2.2", "emojis-list": "^3.0.0", @@ -5306,6 +5323,19 @@ "loader-utils": "^2.0.0", "schema-utils": "^3.0.0", "webpack-sources": "^1.1.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "minimatch": { @@ -5694,7 +5724,6 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "dev": true, "requires": { "wrappy": "1" } @@ -5851,28 +5880,46 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", - "dev": true, "requires": { "pump": "^3.0.0" } }, "got": { - "version": "9.6.0", - "resolved": "https://registry.npmjs.org/got/-/got-9.6.0.tgz", - "integrity": "sha512-R7eWptXuGYxwijs0eV+v3o6+XH1IqVK8dJOEecQfTmkncw9AV4dcw/Dhxi8MdlqPthxxpZyizMzyg8RTmEsG+Q==", + "version": "11.8.5", + "resolved": "https://registry.npmjs.org/got/-/got-11.8.5.tgz", + "integrity": "sha512-o0Je4NvQObAuZPHLFoRSkdG2lTgtcynqymzg2Vupdx6PorhaT5MCbIyXG6d4D94kk8ZG57QeosgdiqfJWhEhlQ==", "dev": true, "requires": { - "@sindresorhus/is": "^0.14.0", - "@szmarczak/http-timer": "^1.1.2", - "cacheable-request": "^6.0.0", - "decompress-response": "^3.3.0", - "duplexer3": "^0.1.4", - "get-stream": "^4.1.0", - "lowercase-keys": "^1.0.1", - "mimic-response": "^1.0.1", - "p-cancelable": "^1.0.0", - "to-readable-stream": "^1.0.0", - "url-parse-lax": "^3.0.0" + "@sindresorhus/is": "^4.0.0", + "@szmarczak/http-timer": "^4.0.5", + "@types/cacheable-request": "^6.0.1", + "@types/responselike": "^1.0.0", + "cacheable-lookup": "^5.0.3", + "cacheable-request": "^7.0.2", + "decompress-response": "^6.0.0", + "http2-wrapper": "^1.0.0-beta.5.2", + "lowercase-keys": "^2.0.0", + "p-cancelable": "^2.0.0", + "responselike": "^2.0.0" + }, + "dependencies": { + "responselike": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/responselike/-/responselike-2.0.1.tgz", + "integrity": "sha512-4gl03wn3hj1HP3yzgdI7d3lCkF95F21Pz4BPGvKHinyQzALR5CapwC8yIi0Rh58DEMQ/SguC03wFj2k0M/mHhw==", + "dev": true, + "requires": { + "lowercase-keys": "^2.0.0" + }, + "dependencies": { + "lowercase-keys": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", + "integrity": "sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==", + "dev": true + } + } + } } }, "json-buffer": { @@ -6163,12 +6210,6 @@ "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", "dev": true }, - "prepend-http": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/prepend-http/-/prepend-http-2.0.0.tgz", - "integrity": "sha512-ravE6m9Atw9Z/jjttRUZ+clIXogdghyZAuWJ3qEzjT+jI/dL1ifAqhZeC5VHzQp1MSt1+jxKkFNemj/iO7tVUA==", - "dev": true - }, "prettier": { "version": "2.7.1", "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.7.1.tgz", @@ -6259,7 +6300,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", - "dev": true, "requires": { "end-of-stream": "^1.1.0", "once": "^1.3.1" @@ -6342,6 +6382,19 @@ "requires": { "loader-utils": "^2.0.0", "schema-utils": "^3.0.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "rc": { @@ -7191,6 +7244,19 @@ "requires": { "loader-utils": "^2.0.0", "schema-utils": "^3.0.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "style-search": { @@ -7328,6 +7394,19 @@ "requires": { "file-loader": "~6.0.0", "loader-utils": "~2.0.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "symbol-tree": { @@ -7474,12 +7553,6 @@ "os-tmpdir": "~1.0.2" } }, - "to-readable-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/to-readable-stream/-/to-readable-stream-1.0.0.tgz", - "integrity": "sha512-Iq25XBt6zD5npPhlLVXGFN3/gyR2/qODcKNNyTMd4vbm39HUaOiAM4PMq0eMVC/Tkxz+Zjdsc55g9yyz+Yq00Q==", - "dev": true - }, "to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -7508,14 +7581,14 @@ } }, "loader-utils": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.4.1.tgz", - "integrity": "sha512-1Qo97Y2oKaU+Ro2xnDMR26g1BwMT29jNbem1EvcujW2jqt+j5COXyscjM7bLQkM9HaxI7pkWeW7gnI072yMI9Q==", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", "dev": true, "requires": { "big.js": "^5.2.2", "emojis-list": "^3.0.0", - "json5": "^1.0.1" + "json5": "^2.1.2" } } } @@ -7762,6 +7835,19 @@ "loader-utils": "^2.0.0", "mime-types": "^2.1.27", "schema-utils": "^3.0.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "url-parse": { @@ -7773,15 +7859,6 @@ "requires-port": "^1.0.0" } }, - "url-parse-lax": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz", - "integrity": "sha512-NjFKA0DidqPa5ciFcSrXnAltTtzz84ogy+NebPvfEgAck0+TNg4UJ4IN+fB7zRZfbgUf0syOo9MDxFkDSMuFaQ==", - "dev": true, - "requires": { - "prepend-http": "^2.0.0" - } - }, "util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", @@ -8249,13 +8326,25 @@ "requires": { "loader-utils": "^2.0.0", "schema-utils": "^3.0.0" + }, + "dependencies": { + "loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + } + } } }, "wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "dev": true + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, "write-file-atomic": { "version": "4.0.2", diff --git a/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package.json b/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package.json index b7480628710..72d55e23cd0 100644 --- a/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package.json +++ b/neural_coder/extensions/neural_compressor_ext_lab_alibaba/package.json @@ -59,7 +59,7 @@ "@jupyterlab/mainmenu": "^3.4.7", "@jupyterlab/notebook": "^3.4.7", "@phosphor/commands": "^1.7.2", - "loader-utils": "^2.0.3", + "loader-utils": "^2.0.4", "react": "^18.2.0", "react-sanitized-html": "^2.0.0" }, @@ -106,6 +106,6 @@ }, "resolutions": { "got": "^11.8.5", - "loader-utils": "^2.0.3" + "loader-utils": "^2.0.4" } } diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/.eslintrc.json b/neural_coder/extensions/neural_compressor_ext_vscode/.eslintrc.json new file mode 100644 index 00000000000..f9b22b793c2 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/.eslintrc.json @@ -0,0 +1,24 @@ +{ + "root": true, + "parser": "@typescript-eslint/parser", + "parserOptions": { + "ecmaVersion": 6, + "sourceType": "module" + }, + "plugins": [ + "@typescript-eslint" + ], + "rules": { + "@typescript-eslint/naming-convention": "warn", + "@typescript-eslint/semi": "warn", + "curly": "warn", + "eqeqeq": "warn", + "no-throw-literal": "warn", + "semi": "off" + }, + "ignorePatterns": [ + "out", + "dist", + "**/*.d.ts" + ] +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/.gitignore b/neural_coder/extensions/neural_compressor_ext_vscode/.gitignore new file mode 100644 index 00000000000..0b60dfa12fb --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/.gitignore @@ -0,0 +1,5 @@ +out +dist +node_modules +.vscode-test/ +*.vsix diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/.vscodeignore b/neural_coder/extensions/neural_compressor_ext_vscode/.vscodeignore new file mode 100644 index 00000000000..8791332ab30 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/.vscodeignore @@ -0,0 +1,12 @@ +.vscode/** +.vscode-test/** +out/** +node_modules/** +.gitignore +.yarnrc +webpack.config.js +vsc-extension-quickstart.md +**/tsconfig.json +**/.eslintrc.json +**/*.map +**/*.ts diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/CHANGELOG.md b/neural_coder/extensions/neural_compressor_ext_vscode/CHANGELOG.md new file mode 100644 index 00000000000..03ea45e180e --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/CHANGELOG.md @@ -0,0 +1,9 @@ +# Change Log + +All notable changes to the "neural-compressor-ext-vscode" extension will be documented in this file. + +Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. + +## [Unreleased] + +- Initial release \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/README.md b/neural_coder/extensions/neural_compressor_ext_vscode/README.md new file mode 100644 index 00000000000..dbe3529be23 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/README.md @@ -0,0 +1,127 @@ +Neural Coder +======================= +Neural Coder, a novel component under **Intel® Neural Compressor** to further simplify the deployment of DL models via **one-click** automated code changes for device compatibility and optimization enabling, has also enabled its extension service in VS Code to further simplify the usage for general programmers that use VS Code as a daily application. +

+
+ Neural Coder Logo +

+ +[Neural Coder](https://marketplace.visualstudio.com/items?itemName=IntelNeuralCompressor.neural-coder-ext-vscode) is an [open-source](https://github.com/intel/neural-compressor/tree/master/neural_coder) extension for [Visual Studio Code](https://code.visualstudio.com). + +# Background Introduction +## Intel® Neural Compressor +Intel® Neural Compressor is an open-source Python library for model compression that reduces the model size and increases DL inference performance on CPUs or GPUs. It supports post-training static and dynamic quantization of PyTorch models. It supports automatic accuracy-driven tuning strategies for users to easily generate quantized model. The users can easily apply static, dynamic and aware-training quantization approaches while giving an expected accuracy criteria. +## Neural Coder +Neural Coder is a novel component under Intel® Neural Compressor to further simplify the deployment of DL models via one-click automated code changes for device compatibility and optimization enabling. Subsequently, Neural Coder can perform automated benchmark on all optimization approaches and evaluate for the best out-of-box optimized performance. Neural Coder uses static program analysis and heuristics to help users take advantage of Intel DL Boost and hardware features to improve performance. This one-click enabling boosts developer productivity while making it easier to take advantage of acceleration. +## Neural Coder Extension in VSCode +VS Code users can leverage Neural Coder's extension to perform **automatic quantization and benchmark evaluation** for Deep Learning models. + +

+ Neural coder Usage +

+ +# Neural Coder Extension Usage +We provide here a detailed step-by-step guide on using Neural Coder extension in VS Code: +

+ Neural coder Config +

+ +### 1. Open +Open VS Code Extension and link to a remote Linux-based server via SSH since Neural Coder requires a Linux-based environment. + +

+VS Code connects to a remote Linux server +

+ +> If you're using VS Code on a Linux-based machine, then this step is not needed. + +### 2. Search +Search for Neural Coder extension in VS Code extension market: + +Simply enter "Neural Coder" in the search box of the extension market to find the extension package. + +You will be able to find the icon below, and then just click "Install". (Note: The extension installation location should be a SSH remote server to which you are connected, assuming your VS Code is Windows-based.) +

+Search for Neural Coder in VSCode extension market +

+Once the installation is done, it will display that the extension is installed and enabled on your machine, and you're also able to disable it or uninstall it in case you need to. +

+Neural Coder extension has been successfully installed and enabled +

+ +### 3. Setting +Click the "Extension Settings" tab of the setting button on the right and fill in the path of Python you want to run. +- Click the "Extension Settings" tab + +

+Click Extension SettingsPython Path for Neural Coder +

+ +- fill in the path +

+Click Extension SettingsPython Path for Neural Coder +

+ +### 4. Icon +Open your code for the Deep Learning model that you want to quantize and evaluate. + +You can see a new icon appear to the upper right, and also a left sidebars for the purpose of displaying operation history. +Hover over and see that it's a Neural Coder's icon. +- The icon of Neural Coder extension +

+Click Extension SettingsPython Path for Neural Coder +

+ +- The history panel of Neural Coder +

+Click Extension SettingsPython Path for Neural Coder +

+ +### 5. optimization (quantization) +Click the Neural Coder button at the top right and select the optimization (quantization) you want to conduct on your Deep Learning code. +#### 5.1 Enable +Select "INC Enable INT8 (Static)", "INC Enable INT8 (Dynamic)", or "INC Enable BF16" + +

+Select 'INC Enable' +

+ +Wait for the progress bar. You will see that the quantization has been enabled into your Deep Learning code: + +

+Auto-Enabling of Quantization via VS Code Neural Coder extension (e.g. HuggingFace model) +

+ +The history of Neural Coder enabling that you have conducted will appear in the history panel, in the form of patch files, and you can easily track back to see how the quantization enabling is done by Neural Coder for your code at a specific time point: + +- The history of Neural Coder enabling that you have conducted +

+The history of Neural Coder enabling that you have conducted +

+ +- A specific Neural Coder enabling as a patch file +

+A specific Neural Coder enabling as a patch file +

+ +#### 5.2 Auto +Select "INC Auto Enable Benchmark" +

+Select 'INC Auto' +

+Enter the Python code execution parameters (argparse) for the current code: +

+Enter your Python code execution parameters +

+An "Output" panel will pop up below, displaying the enabling and benchmark results for the current Deep Learning code: +

+Result output +

+The "Auto" section in the history panel keeps the patch file (.diff) for each optimization within this benchmark execution: +

+Patch files for all optimizations in this benchmark +

+Also click to view patch result: +

+Content in the patch file +

\ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-auto.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-auto.svg new file mode 100644 index 00000000000..1fb66bb73d9 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-auto.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-bf16.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-bf16.svg new file mode 100644 index 00000000000..1d9a47ca737 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-bf16.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-dynamic.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-dynamic.svg new file mode 100644 index 00000000000..ad25a0b7bc6 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-dynamic.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-static.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-static.svg new file mode 100644 index 00000000000..6b450f8546a --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-int8-static.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-title.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-title.svg new file mode 100644 index 00000000000..5842aa2398b --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark-icon-menu-title.svg @@ -0,0 +1,4 @@ + + + + diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark_logo.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark_logo.png new file mode 100644 index 00000000000..f8841abbb68 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/dark_logo.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/left-bar-icon.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/left-bar-icon.svg new file mode 100644 index 00000000000..2619b857b7b --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/left-bar-icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/sideBarInput.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/sideBarInput.svg new file mode 100644 index 00000000000..0c7ff57fdc1 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/dark/sideBarInput.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/dark-icon-menu-auto.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/dark-icon-menu-auto.svg new file mode 100644 index 00000000000..f77303e887f --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/dark-icon-menu-auto.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-bf16.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-bf16.svg new file mode 100644 index 00000000000..df6ec6617a2 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-bf16.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-dynamic.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-dynamic.svg new file mode 100644 index 00000000000..a4a276c6c41 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-dynamic.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-static.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-static.svg new file mode 100644 index 00000000000..d87e0506521 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-int8-static.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-title.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-title.svg new file mode 100644 index 00000000000..1a8a158316f --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light-icon-menu-title.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light_logo.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light_logo.png new file mode 100644 index 00000000000..e2a466c6d46 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/light_logo.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/light/sideBarInput.svg b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/sideBarInput.svg new file mode 100644 index 00000000000..b02fef18a8d --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/images/light/sideBarInput.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/logo_icon.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/logo_icon.png new file mode 100644 index 00000000000..da2fabc7ca4 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/logo_icon.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/CONFIG.gif b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/CONFIG.gif new file mode 100644 index 00000000000..400146a2bc8 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/CONFIG.gif differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/USAGE.gif b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/USAGE.gif new file mode 100644 index 00000000000..27d46670e80 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/USAGE.gif differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/auto.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/auto.png new file mode 100644 index 00000000000..0c782f68dca Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/auto.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/autoEnabling.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/autoEnabling.png new file mode 100644 index 00000000000..81608cf6272 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/autoEnabling.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickAuto.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickAuto.png new file mode 100644 index 00000000000..705db6d4371 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickAuto.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickEnable.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickEnable.png new file mode 100644 index 00000000000..6315a68837d Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickEnable.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickSetting.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickSetting.png new file mode 100644 index 00000000000..199173dc7f9 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/clickSetting.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/connectSSH.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/connectSSH.png new file mode 100644 index 00000000000..eba75e9fc10 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/connectSSH.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/content.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/content.png new file mode 100644 index 00000000000..d40ce896e59 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/content.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/enableHistory.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/enableHistory.png new file mode 100644 index 00000000000..ebaf0230252 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/enableHistory.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/historyDetail.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/historyDetail.png new file mode 100644 index 00000000000..4d7d8223066 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/historyDetail.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/install.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/install.png new file mode 100644 index 00000000000..6cfd1c64499 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/install.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/leftIcon.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/leftIcon.png new file mode 100644 index 00000000000..71c46d03992 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/leftIcon.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/logo.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/logo.png new file mode 100644 index 00000000000..7d8ec7c4620 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/logo.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/outPut.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/outPut.png new file mode 100644 index 00000000000..31c007c4acf Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/outPut.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/params.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/params.png new file mode 100644 index 00000000000..6ecd9545ec9 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/params.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/search.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/search.png new file mode 100644 index 00000000000..d6ce6796101 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/search.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/settingPath.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/settingPath.png new file mode 100644 index 00000000000..e73ba35d258 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/settingPath.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/topRight.png b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/topRight.png new file mode 100644 index 00000000000..fe5f592fd24 Binary files /dev/null and b/neural_coder/extensions/neural_compressor_ext_vscode/images/readme/topRight.png differ diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/package-lock.json b/neural_coder/extensions/neural_compressor_ext_vscode/package-lock.json new file mode 100644 index 00000000000..b7d89ae9ac5 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/package-lock.json @@ -0,0 +1,2609 @@ +{ + "name": "neural-coder-ext-vscode", + "version": "0.0.8", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@discoveryjs/json-ext": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz", + "integrity": "sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==", + "dev": true + }, + "@eslint/eslintrc": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.3.tgz", + "integrity": "sha512-uj3pT6Mg+3t39fvLrj8iuCIJ38zKO9FpGtJ4BBJebJhEwjoT+KLVNCcHT5QC9NGRIEi7fZ0ZR8YRb884auB4Lg==", + "dev": true, + "requires": { + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^9.4.0", + "globals": "^13.15.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" + } + }, + "@humanwhocodes/config-array": { + "version": "0.10.7", + "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.10.7.tgz", + "integrity": "sha512-MDl6D6sBsaV452/QSdX+4CXIjZhIcI0PELsxUjk4U828yd58vk3bTIvk/6w5FY+4hIy9sLW0sfrV7K7Kc++j/w==", + "dev": true, + "requires": { + "@humanwhocodes/object-schema": "^1.2.1", + "debug": "^4.1.1", + "minimatch": "^3.0.4" + } + }, + "@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true + }, + "@humanwhocodes/object-schema": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz", + "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==", + "dev": true + }, + "@jridgewell/gen-mapping": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz", + "integrity": "sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A==", + "dev": true, + "requires": { + "@jridgewell/set-array": "^1.0.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.9" + } + }, + "@jridgewell/resolve-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz", + "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==", + "dev": true + }, + "@jridgewell/set-array": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz", + "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==", + "dev": true + }, + "@jridgewell/source-map": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.2.tgz", + "integrity": "sha512-m7O9o2uR8k2ObDysZYzdfhb08VuEml5oWGiosa1VdaPZ/A6QyPkAJuwN0Q1lhULOf6B7MtQmHENS743hWtCrgw==", + "dev": true, + "requires": { + "@jridgewell/gen-mapping": "^0.3.0", + "@jridgewell/trace-mapping": "^0.3.9" + } + }, + "@jridgewell/sourcemap-codec": { + "version": "1.4.14", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz", + "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==", + "dev": true + }, + "@jridgewell/trace-mapping": { + "version": "0.3.17", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.17.tgz", + "integrity": "sha512-MCNzAp77qzKca9+W/+I0+sEpaUnZoeasnghNeVc41VZCEKaCH73Vq3BZZ/SzWIgrqE4H4ceI+p+b6C0mHf9T4g==", + "dev": true, + "requires": { + "@jridgewell/resolve-uri": "3.1.0", + "@jridgewell/sourcemap-codec": "1.4.14" + } + }, + "@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, + "requires": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + } + }, + "@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true + }, + "@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, + "requires": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + } + }, + "@tootallnate/once": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-1.1.2.tgz", + "integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==", + "dev": true + }, + "@types/eslint": { + "version": "8.4.7", + "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.4.7.tgz", + "integrity": "sha512-ehM7cCt2RSFs42mb+lcmhFT9ouIlV92PuaeRGn8N8c98oMjG4Z5pJHA9b1QiCcuqnbPSHcyfiD3mlhqMaHsQIw==", + "dev": true, + "requires": { + "@types/estree": "*", + "@types/json-schema": "*" + } + }, + "@types/eslint-scope": { + "version": "3.7.4", + "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz", + "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==", + "dev": true, + "requires": { + "@types/eslint": "*", + "@types/estree": "*" + } + }, + "@types/estree": { + "version": "0.0.51", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.51.tgz", + "integrity": "sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ==", + "dev": true + }, + "@types/fs-extra": { + "version": "9.0.13", + "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-9.0.13.tgz", + "integrity": "sha512-nEnwB++1u5lVDM2UI4c1+5R+FYaKfaAzS4OococimjVm3nQw3TuzH5UNsocrcTBbhnerblyHj4A49qXbIiZdpA==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, + "@types/glob": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@types/glob/-/glob-8.0.0.tgz", + "integrity": "sha512-l6NQsDDyQUVeoTynNpC9uRvCUint/gSUXQA2euwmTuWGvPY5LSDUu6tkCtJB2SvGQlJQzLaKqcGZP4//7EDveA==", + "dev": true, + "requires": { + "@types/minimatch": "*", + "@types/node": "*" + } + }, + "@types/json-schema": { + "version": "7.0.11", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz", + "integrity": "sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ==", + "dev": true + }, + "@types/minimatch": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/@types/minimatch/-/minimatch-5.1.2.tgz", + "integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==", + "dev": true + }, + "@types/mocha": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.0.tgz", + "integrity": "sha512-rADY+HtTOA52l9VZWtgQfn4p+UDVM2eDVkMZT1I6syp0YKxW2F9v+0pbRZLsvskhQv/vMb6ZfCay81GHbz5SHg==", + "dev": true + }, + "@types/node": { + "version": "16.11.68", + "resolved": "https://registry.npmjs.org/@types/node/-/node-16.11.68.tgz", + "integrity": "sha512-JkRpuVz3xCNCWaeQ5EHLR/6woMbHZz/jZ7Kmc63AkU+1HxnoUugzSWMck7dsR4DvNYX8jp9wTi9K7WvnxOIQZQ==", + "dev": true + }, + "@types/semver": { + "version": "7.3.12", + "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.3.12.tgz", + "integrity": "sha512-WwA1MW0++RfXmCr12xeYOOC5baSC9mSb0ZqCquFzKhcoF4TvHu5MKOuXsncgZcpVFhB1pXd5hZmM0ryAoCp12A==", + "dev": true + }, + "@types/vscode": { + "version": "1.72.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.72.0.tgz", + "integrity": "sha512-WvHluhUo+lQvE3I4wUagRpnkHuysB4qSyOQUyIAS9n9PYMJjepzTUD8Jyks0YeXoPD0UGctjqp2u84/b3v6Ydw==", + "dev": true + }, + "@typescript-eslint/eslint-plugin": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.40.1.tgz", + "integrity": "sha512-FsWboKkWdytGiXT5O1/R9j37YgcjO8MKHSUmWnIEjVaz0krHkplPnYi7mwdb+5+cs0toFNQb0HIrN7zONdIEWg==", + "dev": true, + "requires": { + "@typescript-eslint/scope-manager": "5.40.1", + "@typescript-eslint/type-utils": "5.40.1", + "@typescript-eslint/utils": "5.40.1", + "debug": "^4.3.4", + "ignore": "^5.2.0", + "regexpp": "^3.2.0", + "semver": "^7.3.7", + "tsutils": "^3.21.0" + } + }, + "@typescript-eslint/parser": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.40.1.tgz", + "integrity": "sha512-IK6x55va5w4YvXd4b3VrXQPldV9vQTxi5ov+g4pMANsXPTXOcfjx08CRR1Dfrcc51syPtXHF5bgLlMHYFrvQtg==", + "dev": true, + "requires": { + "@typescript-eslint/scope-manager": "5.40.1", + "@typescript-eslint/types": "5.40.1", + "@typescript-eslint/typescript-estree": "5.40.1", + "debug": "^4.3.4" + } + }, + "@typescript-eslint/scope-manager": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.40.1.tgz", + "integrity": "sha512-jkn4xsJiUQucI16OLCXrLRXDZ3afKhOIqXs4R3O+M00hdQLKR58WuyXPZZjhKLFCEP2g+TXdBRtLQ33UfAdRUg==", + "dev": true, + "requires": { + "@typescript-eslint/types": "5.40.1", + "@typescript-eslint/visitor-keys": "5.40.1" + } + }, + "@typescript-eslint/type-utils": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-5.40.1.tgz", + "integrity": "sha512-DLAs+AHQOe6n5LRraXiv27IYPhleF0ldEmx6yBqBgBLaNRKTkffhV1RPsjoJBhVup2zHxfaRtan8/YRBgYhU9Q==", + "dev": true, + "requires": { + "@typescript-eslint/typescript-estree": "5.40.1", + "@typescript-eslint/utils": "5.40.1", + "debug": "^4.3.4", + "tsutils": "^3.21.0" + } + }, + "@typescript-eslint/types": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.40.1.tgz", + "integrity": "sha512-Icg9kiuVJSwdzSQvtdGspOlWNjVDnF3qVIKXdJ103o36yRprdl3Ge5cABQx+csx960nuMF21v8qvO31v9t3OHw==", + "dev": true + }, + "@typescript-eslint/typescript-estree": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.40.1.tgz", + "integrity": "sha512-5QTP/nW5+60jBcEPfXy/EZL01qrl9GZtbgDZtDPlfW5zj/zjNrdI2B5zMUHmOsfvOr2cWqwVdWjobCiHcedmQA==", + "dev": true, + "requires": { + "@typescript-eslint/types": "5.40.1", + "@typescript-eslint/visitor-keys": "5.40.1", + "debug": "^4.3.4", + "globby": "^11.1.0", + "is-glob": "^4.0.3", + "semver": "^7.3.7", + "tsutils": "^3.21.0" + } + }, + "@typescript-eslint/utils": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-5.40.1.tgz", + "integrity": "sha512-a2TAVScoX9fjryNrW6BZRnreDUszxqm9eQ9Esv8n5nXApMW0zeANUYlwh/DED04SC/ifuBvXgZpIK5xeJHQ3aw==", + "dev": true, + "requires": { + "@types/json-schema": "^7.0.9", + "@types/semver": "^7.3.12", + "@typescript-eslint/scope-manager": "5.40.1", + "@typescript-eslint/types": "5.40.1", + "@typescript-eslint/typescript-estree": "5.40.1", + "eslint-scope": "^5.1.1", + "eslint-utils": "^3.0.0", + "semver": "^7.3.7" + } + }, + "@typescript-eslint/visitor-keys": { + "version": "5.40.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.40.1.tgz", + "integrity": "sha512-A2DGmeZ+FMja0geX5rww+DpvILpwo1OsiQs0M+joPWJYsiEFBLsH0y1oFymPNul6Z5okSmHpP4ivkc2N0Cgfkw==", + "dev": true, + "requires": { + "@typescript-eslint/types": "5.40.1", + "eslint-visitor-keys": "^3.3.0" + } + }, + "@vscode/test-electron": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@vscode/test-electron/-/test-electron-2.1.5.tgz", + "integrity": "sha512-O/ioqFpV+RvKbRykX2ItYPnbcZ4Hk5V0rY4uhQjQTLhGL9WZUvS7exzuYQCCI+ilSqJpctvxq2llTfGXf9UnnA==", + "dev": true, + "requires": { + "http-proxy-agent": "^4.0.1", + "https-proxy-agent": "^5.0.0", + "rimraf": "^3.0.2", + "unzipper": "^0.10.11" + } + }, + "@webassemblyjs/ast": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz", + "integrity": "sha512-ukBh14qFLjxTQNTXocdyksN5QdM28S1CxHt2rdskFyL+xFV7VremuBLVbmCePj+URalXBENx/9Lm7lnhihtCSw==", + "dev": true, + "requires": { + "@webassemblyjs/helper-numbers": "1.11.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.1" + } + }, + "@webassemblyjs/floating-point-hex-parser": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.1.tgz", + "integrity": "sha512-iGRfyc5Bq+NnNuX8b5hwBrRjzf0ocrJPI6GWFodBFzmFnyvrQ83SHKhmilCU/8Jv67i4GJZBMhEzltxzcNagtQ==", + "dev": true + }, + "@webassemblyjs/helper-api-error": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.1.tgz", + "integrity": "sha512-RlhS8CBCXfRUR/cwo2ho9bkheSXG0+NwooXcc3PAILALf2QLdFyj7KGsKRbVc95hZnhnERon4kW/D3SZpp6Tcg==", + "dev": true + }, + "@webassemblyjs/helper-buffer": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.1.tgz", + "integrity": "sha512-gwikF65aDNeeXa8JxXa2BAk+REjSyhrNC9ZwdT0f8jc4dQQeDQ7G4m0f2QCLPJiMTTO6wfDmRmj/pW0PsUvIcA==", + "dev": true + }, + "@webassemblyjs/helper-numbers": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.1.tgz", + "integrity": "sha512-vDkbxiB8zfnPdNK9Rajcey5C0w+QJugEglN0of+kmO8l7lDb77AnlKYQF7aarZuCrv+l0UvqL+68gSDr3k9LPQ==", + "dev": true, + "requires": { + "@webassemblyjs/floating-point-hex-parser": "1.11.1", + "@webassemblyjs/helper-api-error": "1.11.1", + "@xtuc/long": "4.2.2" + } + }, + "@webassemblyjs/helper-wasm-bytecode": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.1.tgz", + "integrity": "sha512-PvpoOGiJwXeTrSf/qfudJhwlvDQxFgelbMqtq52WWiXC6Xgg1IREdngmPN3bs4RoO83PnL/nFrxucXj1+BX62Q==", + "dev": true + }, + "@webassemblyjs/helper-wasm-section": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.1.tgz", + "integrity": "sha512-10P9No29rYX1j7F3EVPX3JvGPQPae+AomuSTPiF9eBQeChHI6iqjMIwR9JmOJXwpnn/oVGDk7I5IlskuMwU/pg==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/helper-buffer": "1.11.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.1", + "@webassemblyjs/wasm-gen": "1.11.1" + } + }, + "@webassemblyjs/ieee754": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.1.tgz", + "integrity": "sha512-hJ87QIPtAMKbFq6CGTkZYJivEwZDbQUgYd3qKSadTNOhVY7p+gfP6Sr0lLRVTaG1JjFj+r3YchoqRYxNH3M0GQ==", + "dev": true, + "requires": { + "@xtuc/ieee754": "^1.2.0" + } + }, + "@webassemblyjs/leb128": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.1.tgz", + "integrity": "sha512-BJ2P0hNZ0u+Th1YZXJpzW6miwqQUGcIHT1G/sf72gLVD9DZ5AdYTqPNbHZh6K1M5VmKvFXwGSWZADz+qBWxeRw==", + "dev": true, + "requires": { + "@xtuc/long": "4.2.2" + } + }, + "@webassemblyjs/utf8": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.1.tgz", + "integrity": "sha512-9kqcxAEdMhiwQkHpkNiorZzqpGrodQQ2IGrHHxCy+Ozng0ofyMA0lTqiLkVs1uzTRejX+/O0EOT7KxqVPuXosQ==", + "dev": true + }, + "@webassemblyjs/wasm-edit": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.1.tgz", + "integrity": "sha512-g+RsupUC1aTHfR8CDgnsVRVZFJqdkFHpsHMfJuWQzWU3tvnLC07UqHICfP+4XyL2tnr1amvl1Sdp06TnYCmVkA==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/helper-buffer": "1.11.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.1", + "@webassemblyjs/helper-wasm-section": "1.11.1", + "@webassemblyjs/wasm-gen": "1.11.1", + "@webassemblyjs/wasm-opt": "1.11.1", + "@webassemblyjs/wasm-parser": "1.11.1", + "@webassemblyjs/wast-printer": "1.11.1" + } + }, + "@webassemblyjs/wasm-gen": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.1.tgz", + "integrity": "sha512-F7QqKXwwNlMmsulj6+O7r4mmtAlCWfO/0HdgOxSklZfQcDu0TpLiD1mRt/zF25Bk59FIjEuGAIyn5ei4yMfLhA==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.1", + "@webassemblyjs/ieee754": "1.11.1", + "@webassemblyjs/leb128": "1.11.1", + "@webassemblyjs/utf8": "1.11.1" + } + }, + "@webassemblyjs/wasm-opt": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.1.tgz", + "integrity": "sha512-VqnkNqnZlU5EB64pp1l7hdm3hmQw7Vgqa0KF/KCNO9sIpI6Fk6brDEiX+iCOYrvMuBWDws0NkTOxYEb85XQHHw==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/helper-buffer": "1.11.1", + "@webassemblyjs/wasm-gen": "1.11.1", + "@webassemblyjs/wasm-parser": "1.11.1" + } + }, + "@webassemblyjs/wasm-parser": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.1.tgz", + "integrity": "sha512-rrBujw+dJu32gYB7/Lup6UhdkPx9S9SnobZzRVL7VcBH9Bt9bCBLEuX/YXOOtBsOZ4NQrRykKhffRWHvigQvOA==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/helper-api-error": "1.11.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.1", + "@webassemblyjs/ieee754": "1.11.1", + "@webassemblyjs/leb128": "1.11.1", + "@webassemblyjs/utf8": "1.11.1" + } + }, + "@webassemblyjs/wast-printer": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.1.tgz", + "integrity": "sha512-IQboUWM4eKzWW+N/jij2sRatKMh99QEelo3Eb2q0qXkvPRISAj8Qxtmw5itwqK+TTkBuUIE45AxYPToqPtL5gg==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.11.1", + "@xtuc/long": "4.2.2" + } + }, + "@webpack-cli/configtest": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@webpack-cli/configtest/-/configtest-1.2.0.tgz", + "integrity": "sha512-4FB8Tj6xyVkyqjj1OaTqCjXYULB9FMkqQ8yGrZjRDrYh0nOE+7Lhs45WioWQQMV+ceFlE368Ukhe6xdvJM9Egg==", + "dev": true + }, + "@webpack-cli/info": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@webpack-cli/info/-/info-1.5.0.tgz", + "integrity": "sha512-e8tSXZpw2hPl2uMJY6fsMswaok5FdlGNRTktvFk2sD8RjH0hE2+XistawJx1vmKteh4NmGmNUrp+Tb2w+udPcQ==", + "dev": true, + "requires": { + "envinfo": "^7.7.3" + } + }, + "@webpack-cli/serve": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/@webpack-cli/serve/-/serve-1.7.0.tgz", + "integrity": "sha512-oxnCNGj88fL+xzV+dacXs44HcDwf1ovs3AuEzvP7mqXw7fQntqIhQ1BRmynh4qEKQSSSRSWVyXRjmTbZIX9V2Q==", + "dev": true + }, + "@xtuc/ieee754": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz", + "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==", + "dev": true + }, + "@xtuc/long": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz", + "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==", + "dev": true + }, + "acorn": { + "version": "8.8.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz", + "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w==", + "dev": true + }, + "acorn-import-assertions": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz", + "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==", + "dev": true + }, + "acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true + }, + "agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "dev": true, + "requires": { + "debug": "4" + } + }, + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "dev": true + }, + "ansi-colors": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", + "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "dev": true + }, + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "anymatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", + "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", + "dev": true, + "requires": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + } + }, + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true + }, + "array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "big-integer": { + "version": "1.6.51", + "resolved": "https://registry.npmjs.org/big-integer/-/big-integer-1.6.51.tgz", + "integrity": "sha512-GPEid2Y9QU1Exl1rpO9B2IPJGHPSupF5GnVIP0blYvNOMer2bTvSWs1jGOUg04hTmu67nmLsQ9TBo1puaotBHg==", + "dev": true + }, + "binary": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/binary/-/binary-0.3.0.tgz", + "integrity": "sha512-D4H1y5KYwpJgK8wk1Cue5LLPgmwHKYSChkbspQg5JtVuR5ulGckxfR62H3AE9UDkdMC8yyXlqYihuz3Aqg2XZg==", + "dev": true, + "requires": { + "buffers": "~0.1.1", + "chainsaw": "~0.1.0" + } + }, + "binary-extensions": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", + "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", + "dev": true + }, + "bluebird": { + "version": "3.4.7", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", + "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "requires": { + "fill-range": "^7.0.1" + } + }, + "browser-stdout": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz", + "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", + "dev": true + }, + "browserslist": { + "version": "4.21.4", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.4.tgz", + "integrity": "sha512-CBHJJdDmgjl3daYjN5Cp5kbTf1mUhZoS+beLklHIvkOWscs83YAhLlF3Wsh/lciQYAcbBJgTOD44VtG31ZM4Hw==", + "dev": true, + "requires": { + "caniuse-lite": "^1.0.30001400", + "electron-to-chromium": "^1.4.251", + "node-releases": "^2.0.6", + "update-browserslist-db": "^1.0.9" + } + }, + "buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true + }, + "buffer-indexof-polyfill": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/buffer-indexof-polyfill/-/buffer-indexof-polyfill-1.0.2.tgz", + "integrity": "sha512-I7wzHwA3t1/lwXQh+A5PbNvJxgfo5r3xulgpYDB5zckTu/Z9oUK9biouBKQUjEqzaz3HnAT6TYoovmE+GqSf7A==", + "dev": true + }, + "buffers": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/buffers/-/buffers-0.1.1.tgz", + "integrity": "sha512-9q/rDEGSb/Qsvv2qvzIzdluL5k7AaJOTrw23z9reQthrbF7is4CtlT0DXyO1oei2DCp4uojjzQ7igaSHp1kAEQ==", + "dev": true + }, + "callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true + }, + "camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "dev": true + }, + "caniuse-lite": { + "version": "1.0.30001422", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001422.tgz", + "integrity": "sha512-hSesn02u1QacQHhaxl/kNMZwqVG35Sz/8DgvmgedxSH8z9UUpcDYSPYgsj3x5dQNRcNp6BwpSfQfVzYUTm+fog==", + "dev": true + }, + "chainsaw": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/chainsaw/-/chainsaw-0.1.0.tgz", + "integrity": "sha512-75kWfWt6MEKNC8xYXIdRpDehRYY/tNSgwKaJq+dbbDcxORuVrrQ+SEHoWsniVn9XPYfP4gmdWIeDk/4YNp1rNQ==", + "dev": true, + "requires": { + "traverse": ">=0.3.0 <0.4" + } + }, + "chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + } + }, + "chokidar": { + "version": "3.5.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", + "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", + "dev": true, + "requires": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "fsevents": "~2.3.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + } + }, + "chrome-trace-event": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz", + "integrity": "sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==", + "dev": true + }, + "cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "dev": true, + "requires": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "clone-deep": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-4.0.1.tgz", + "integrity": "sha512-neHB9xuzh/wk0dIHweyAXv2aPGZIVk3pLMe+/RNzINf17fe0OG96QroktYAUm7SM1PBnzTabaLboqqxDyMU+SQ==", + "dev": true, + "requires": { + "is-plain-object": "^2.0.4", + "kind-of": "^6.0.2", + "shallow-clone": "^3.0.0" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "colorette": { + "version": "2.0.19", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.19.tgz", + "integrity": "sha512-3tlv/dIP7FWvj3BsbHrGLJ6l/oKh1O3TcgBqMn+yyCagOxc23fyzDS6HypQbgxWbkpDnf52p1LuR4eWDQ/K9WQ==", + "dev": true + }, + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "dev": true + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, + "core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", + "dev": true + }, + "cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "requires": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + } + }, + "debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dev": true, + "requires": { + "ms": "2.1.2" + } + }, + "decamelize": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz", + "integrity": "sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==", + "dev": true + }, + "deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true + }, + "diff": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", + "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==", + "dev": true + }, + "dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", + "dev": true, + "requires": { + "path-type": "^4.0.0" + } + }, + "doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "requires": { + "esutils": "^2.0.2" + } + }, + "duplexer2": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/duplexer2/-/duplexer2-0.1.4.tgz", + "integrity": "sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA==", + "dev": true, + "requires": { + "readable-stream": "^2.0.2" + } + }, + "electron-to-chromium": { + "version": "1.4.284", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.284.tgz", + "integrity": "sha512-M8WEXFuKXMYMVr45fo8mq0wUrrJHheiKZf6BArTKk9ZBYCKJEOU5H8cdWgDT+qCVZf7Na4lVUaZsA+h6uA9+PA==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "enhanced-resolve": { + "version": "5.10.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.10.0.tgz", + "integrity": "sha512-T0yTFjdpldGY8PmuXXR0PyQ1ufZpEGiHVrp7zHKB7jdR4qlmZHhONVM5AQOAWXuF/w3dnHbEQVrNptJgt7F+cQ==", + "dev": true, + "requires": { + "graceful-fs": "^4.2.4", + "tapable": "^2.2.0" + } + }, + "envinfo": { + "version": "7.8.1", + "resolved": "https://registry.npmjs.org/envinfo/-/envinfo-7.8.1.tgz", + "integrity": "sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==", + "dev": true + }, + "es-module-lexer": { + "version": "0.9.3", + "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-0.9.3.tgz", + "integrity": "sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ==", + "dev": true + }, + "escalade": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "dev": true + }, + "escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true + }, + "eslint": { + "version": "8.25.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.25.0.tgz", + "integrity": "sha512-DVlJOZ4Pn50zcKW5bYH7GQK/9MsoQG2d5eDH0ebEkE8PbgzTTmtt/VTH9GGJ4BfeZCpBLqFfvsjX35UacUL83A==", + "dev": true, + "requires": { + "@eslint/eslintrc": "^1.3.3", + "@humanwhocodes/config-array": "^0.10.5", + "@humanwhocodes/module-importer": "^1.0.1", + "ajv": "^6.10.0", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.3.2", + "doctrine": "^3.0.0", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^7.1.1", + "eslint-utils": "^3.0.0", + "eslint-visitor-keys": "^3.3.0", + "espree": "^9.4.0", + "esquery": "^1.4.0", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^6.0.1", + "find-up": "^5.0.0", + "glob-parent": "^6.0.1", + "globals": "^13.15.0", + "globby": "^11.1.0", + "grapheme-splitter": "^1.0.4", + "ignore": "^5.2.0", + "import-fresh": "^3.0.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "js-sdsl": "^4.1.4", + "js-yaml": "^4.1.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.2", + "natural-compare": "^1.4.0", + "optionator": "^0.9.1", + "regexpp": "^3.2.0", + "strip-ansi": "^6.0.1", + "strip-json-comments": "^3.1.0", + "text-table": "^0.2.0" + }, + "dependencies": { + "eslint-scope": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.1.1.tgz", + "integrity": "sha512-QKQM/UXpIiHcLqJ5AOyIW7XZmzjkzQXYE54n1++wb0u9V/abW3l9uQnxX8Z5Xd18xyKIMTUAyQ0k1e8pz6LUrw==", + "dev": true, + "requires": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + } + }, + "estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true + }, + "glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "requires": { + "is-glob": "^4.0.3" + } + } + } + }, + "eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "dev": true, + "requires": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + } + }, + "eslint-utils": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-3.0.0.tgz", + "integrity": "sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==", + "dev": true, + "requires": { + "eslint-visitor-keys": "^2.0.0" + }, + "dependencies": { + "eslint-visitor-keys": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", + "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", + "dev": true + } + } + }, + "eslint-visitor-keys": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz", + "integrity": "sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA==", + "dev": true + }, + "espree": { + "version": "9.4.0", + "resolved": "https://registry.npmjs.org/espree/-/espree-9.4.0.tgz", + "integrity": "sha512-DQmnRpLj7f6TgN/NYb0MTzJXL+vJF9h3pHy4JhCIs3zwcgez8xmGg3sXHcEO97BrmO2OSvCwMdfdlyl+E9KjOw==", + "dev": true, + "requires": { + "acorn": "^8.8.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^3.3.0" + } + }, + "esquery": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz", + "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==", + "dev": true, + "requires": { + "estraverse": "^5.1.0" + }, + "dependencies": { + "estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true + } + } + }, + "esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "requires": { + "estraverse": "^5.2.0" + }, + "dependencies": { + "estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true + } + } + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, + "events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "dev": true + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "fast-glob": { + "version": "3.2.12", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.12.tgz", + "integrity": "sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w==", + "dev": true, + "requires": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.4" + } + }, + "fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true + }, + "fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "dev": true + }, + "fastq": { + "version": "1.13.0", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.13.0.tgz", + "integrity": "sha512-YpkpUnK8od0o1hmeSc7UUs/eB/vIPWJYjKck2QKIzAf71Vm1AAQ3EbuZB3g2JIy+pg+ERD0vqI79KyZiB2e2Nw==", + "dev": true, + "requires": { + "reusify": "^1.0.4" + } + }, + "file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "requires": { + "flat-cache": "^3.0.4" + } + }, + "fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "requires": { + "to-regex-range": "^5.0.1" + } + }, + "find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "requires": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + } + }, + "flat": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", + "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", + "dev": true + }, + "flat-cache": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", + "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==", + "dev": true, + "requires": { + "flatted": "^3.1.0", + "rimraf": "^3.0.2" + } + }, + "flatted": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.7.tgz", + "integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==", + "dev": true + }, + "fs-extra": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", + "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", + "requires": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + } + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true + }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "optional": true + }, + "fstream": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/fstream/-/fstream-1.0.12.tgz", + "integrity": "sha512-WvJ193OHa0GHPEL+AycEJgxvBEwyfRkN1vhjca23OaPVMCaLCXTd5qAu82AjTcgP1UJmytkOKb63Ypde7raDIg==", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "inherits": "~2.0.0", + "mkdirp": ">=0.5 0", + "rimraf": "2" + }, + "dependencies": { + "glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "rimraf": { + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz", + "integrity": "sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==", + "dev": true, + "requires": { + "glob": "^7.1.3" + } + } + } + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true + }, + "glob": { + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-8.0.3.tgz", + "integrity": "sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^5.0.1", + "once": "^1.3.0" + }, + "dependencies": { + "brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0" + } + }, + "minimatch": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz", + "integrity": "sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg==", + "dev": true, + "requires": { + "brace-expansion": "^2.0.1" + } + } + } + }, + "glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "requires": { + "is-glob": "^4.0.1" + } + }, + "glob-to-regexp": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz", + "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", + "dev": true + }, + "globals": { + "version": "13.17.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.17.0.tgz", + "integrity": "sha512-1C+6nQRb1GwGMKm2dH/E7enFAMxGTmGI7/dEdhy/DNelv85w9B72t3uc5frtMNXIbzrarJJ/lTCjcaZwbLJmyw==", + "dev": true, + "requires": { + "type-fest": "^0.20.2" + } + }, + "globby": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz", + "integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==", + "dev": true, + "requires": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.2.9", + "ignore": "^5.2.0", + "merge2": "^1.4.1", + "slash": "^3.0.0" + } + }, + "graceful-fs": { + "version": "4.2.10", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz", + "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==" + }, + "grapheme-splitter": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz", + "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==", + "dev": true + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true + }, + "http-proxy-agent": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz", + "integrity": "sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==", + "dev": true, + "requires": { + "@tootallnate/once": "1", + "agent-base": "6", + "debug": "4" + } + }, + "https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dev": true, + "requires": { + "agent-base": "6", + "debug": "4" + } + }, + "ignore": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.0.tgz", + "integrity": "sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ==", + "dev": true + }, + "import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "requires": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + } + }, + "import-local": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", + "integrity": "sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==", + "dev": true, + "requires": { + "pkg-dir": "^4.2.0", + "resolve-cwd": "^3.0.0" + } + }, + "imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "interpret": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/interpret/-/interpret-2.2.0.tgz", + "integrity": "sha512-Ju0Bz/cEia55xDwUWEa8+olFpCiQoypjnQySseKtmjNrnps3P+xfpUmGr90T7yjlVJmOtybRvPXhKMbHr+fWnw==", + "dev": true + }, + "is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "requires": { + "binary-extensions": "^2.0.0" + } + }, + "is-core-module": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.11.0.tgz", + "integrity": "sha512-RRjxlvLDkD1YJwDbroBHMb+cukurkDWNyHx7D3oNB5x9rb5ogcksMC5wHCadcXoo67gVr/+3GFySh3134zi6rw==", + "dev": true, + "requires": { + "has": "^1.0.3" + } + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true + }, + "is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true + }, + "is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true + }, + "is-plain-obj": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", + "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==", + "dev": true + }, + "is-plain-object": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", + "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", + "dev": true, + "requires": { + "isobject": "^3.0.1" + } + }, + "is-unicode-supported": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", + "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", + "dev": true + }, + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", + "dev": true + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "isobject": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==", + "dev": true + }, + "jest-worker": { + "version": "27.5.1", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-27.5.1.tgz", + "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==", + "dev": true, + "requires": { + "@types/node": "*", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "dependencies": { + "supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + } + } + }, + "js-sdsl": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.1.5.tgz", + "integrity": "sha512-08bOAKweV2NUC1wqTtf3qZlnpOX/R2DU9ikpjOHs0H+ibQv3zpncVQg6um4uYtRtrwIX8M4Nh3ytK4HGlYAq7Q==", + "dev": true + }, + "js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dev": true, + "requires": { + "argparse": "^2.0.1" + } + }, + "json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true + }, + "jsonfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "requires": { + "graceful-fs": "^4.1.6", + "universalify": "^2.0.0" + } + }, + "kind-of": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz", + "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==", + "dev": true + }, + "levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + } + }, + "listenercount": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/listenercount/-/listenercount-1.0.1.tgz", + "integrity": "sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ==", + "dev": true + }, + "loader-runner": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz", + "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==", + "dev": true + }, + "locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "requires": { + "p-locate": "^5.0.0" + } + }, + "lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true + }, + "log-symbols": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", + "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", + "dev": true, + "requires": { + "chalk": "^4.1.0", + "is-unicode-supported": "^0.1.0" + } + }, + "lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "requires": { + "yallist": "^4.0.0" + } + }, + "merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true + }, + "merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true + }, + "micromatch": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "dev": true, + "requires": { + "braces": "^3.0.2", + "picomatch": "^2.3.1" + } + }, + "mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "dev": true + }, + "mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dev": true, + "requires": { + "mime-db": "1.52.0" + } + }, + "minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.7.tgz", + "integrity": "sha512-bzfL1YUZsP41gmu/qjrEk0Q6i2ix/cVeAhbCbqH9u3zYutS1cLg00qhrD0M2MVdCcx4Sc0UpP2eBWo9rotpq6g==", + "dev": true + }, + "mkdirp": { + "version": "0.5.6", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", + "integrity": "sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==", + "dev": true, + "requires": { + "minimist": "^1.2.6" + } + }, + "mocha": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.1.0.tgz", + "integrity": "sha512-vUF7IYxEoN7XhQpFLxQAEMtE4W91acW4B6En9l97MwE9stL1A9gusXfoHZCLVHDUJ/7V5+lbCM6yMqzo5vNymg==", + "dev": true, + "requires": { + "ansi-colors": "4.1.1", + "browser-stdout": "1.3.1", + "chokidar": "3.5.3", + "debug": "4.3.4", + "diff": "5.0.0", + "escape-string-regexp": "4.0.0", + "find-up": "5.0.0", + "glob": "7.2.0", + "he": "1.2.0", + "js-yaml": "4.1.0", + "log-symbols": "4.1.0", + "minimatch": "5.0.1", + "ms": "2.1.3", + "nanoid": "3.3.3", + "serialize-javascript": "6.0.0", + "strip-json-comments": "3.1.1", + "supports-color": "8.1.1", + "workerpool": "6.2.1", + "yargs": "16.2.0", + "yargs-parser": "20.2.4", + "yargs-unparser": "2.0.0" + }, + "dependencies": { + "glob": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz", + "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "dependencies": { + "minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + } + } + }, + "minimatch": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.0.1.tgz", + "integrity": "sha512-nLDxIFRyhDblz3qMuq+SoRZED4+miJ/G+tdDrjkkkRnjAsBexeGpgjLEQ0blJy7rHhR2b93rhQY4SvyWu9v03g==", + "dev": true, + "requires": { + "brace-expansion": "^2.0.1" + }, + "dependencies": { + "brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0" + } + } + } + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + } + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "nanoid": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.3.tgz", + "integrity": "sha512-p1sjXuopFs0xg+fPASzQ28agW1oHD7xDsd9Xkf3T15H3c/cifrFHVwrh74PdoklAPi+i7MdRsE47vm2r6JoB+w==", + "dev": true + }, + "natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, + "neo-async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==", + "dev": true + }, + "node-releases": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.6.tgz", + "integrity": "sha512-PiVXnNuFm5+iYkLBNeq5211hvO38y63T0i2KKh2KnUs3RpzJ+JtODFjkD8yjLwnDkTYF1eKXheUwdssR+NRZdg==", + "dev": true + }, + "normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "optionator": { + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", + "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==", + "dev": true, + "requires": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.3" + } + }, + "p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "requires": { + "yocto-queue": "^0.1.0" + } + }, + "p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "requires": { + "p-limit": "^3.0.2" + } + }, + "p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true + }, + "parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "requires": { + "callsites": "^3.0.0" + } + }, + "path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true + }, + "path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true + }, + "path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "dev": true + }, + "path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true + }, + "picocolors": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", + "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==", + "dev": true + }, + "picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true + }, + "pkg-dir": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", + "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==", + "dev": true, + "requires": { + "find-up": "^4.0.0" + }, + "dependencies": { + "find-up": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", + "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", + "dev": true, + "requires": { + "locate-path": "^5.0.0", + "path-exists": "^4.0.0" + } + }, + "locate-path": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", + "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", + "dev": true, + "requires": { + "p-locate": "^4.1.0" + } + }, + "p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "requires": { + "p-try": "^2.0.0" + } + }, + "p-locate": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", + "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", + "dev": true, + "requires": { + "p-limit": "^2.2.0" + } + } + } + }, + "prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true + }, + "process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "dev": true + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true + }, + "python-shell": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/python-shell/-/python-shell-3.0.1.tgz", + "integrity": "sha512-TWeotuxe1auhXa5bGRScxnc2J+0r41NBntSa6RYZtMBLtAEsvCboKrEbW6DvASosWQepVkhZZlT3B5Ei766G+Q==" + }, + "queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true + }, + "randombytes": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "dev": true, + "requires": { + "safe-buffer": "^5.1.0" + } + }, + "readable-stream": { + "version": "2.3.7", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz", + "integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==", + "dev": true, + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "requires": { + "picomatch": "^2.2.1" + } + }, + "rechoir": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/rechoir/-/rechoir-0.7.1.tgz", + "integrity": "sha512-/njmZ8s1wVeR6pjTZ+0nCnv8SpZNRMT2D1RLOJQESlYFDBvwpTA4KWJpZ+sBJ4+vhjILRcK7JIFdGCdxEAAitg==", + "dev": true, + "requires": { + "resolve": "^1.9.0" + } + }, + "regexpp": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.2.0.tgz", + "integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==", + "dev": true + }, + "require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true + }, + "resolve": { + "version": "1.22.1", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.1.tgz", + "integrity": "sha512-nBpuuYuY5jFsli/JIs1oldw6fOQCBioohqWZg/2hiaOybXOft4lonv85uDOKXdf8rhyK159cxU5cDcK/NKk8zw==", + "dev": true, + "requires": { + "is-core-module": "^2.9.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + } + }, + "resolve-cwd": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz", + "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==", + "dev": true, + "requires": { + "resolve-from": "^5.0.0" + }, + "dependencies": { + "resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true + } + } + }, + "resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true + }, + "reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true + }, + "rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "dev": true, + "requires": { + "glob": "^7.1.3" + }, + "dependencies": { + "glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + } + } + }, + "run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "requires": { + "queue-microtask": "^1.2.2" + } + }, + "rxjs": { + "version": "7.5.7", + "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.5.7.tgz", + "integrity": "sha512-z9MzKh/UcOqB3i20H6rtrlaE/CgjLOvheWK/9ILrbhROGTweAi1BaFsTT9FbwZi5Trr1qNRs+MXkhmR06awzQA==", + "requires": { + "tslib": "^2.1.0" + }, + "dependencies": { + "tslib": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.0.tgz", + "integrity": "sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ==" + } + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true + }, + "schema-utils": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz", + "integrity": "sha512-Y5PQxS4ITlC+EahLuXaY86TXfR7Dc5lw294alXOq86JAHCihAIZfqv8nNCWvaEJvaC51uN9hbLGeV0cFBdH+Fw==", + "dev": true, + "requires": { + "@types/json-schema": "^7.0.8", + "ajv": "^6.12.5", + "ajv-keywords": "^3.5.2" + } + }, + "semver": { + "version": "7.3.8", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz", + "integrity": "sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==", + "dev": true, + "requires": { + "lru-cache": "^6.0.0" + } + }, + "serialize-javascript": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", + "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", + "dev": true, + "requires": { + "randombytes": "^2.1.0" + } + }, + "setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "dev": true + }, + "shallow-clone": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-3.0.1.tgz", + "integrity": "sha512-/6KqX+GVUdqPuPPd2LxDDxzX6CAbjJehAAOKlNpqqUpAqPM6HeL8f+o3a+JsyGjn2lv0WY8UsTgUJjU9Ok55NA==", + "dev": true, + "requires": { + "kind-of": "^6.0.2" + } + }, + "shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "requires": { + "shebang-regex": "^3.0.0" + } + }, + "shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true + }, + "slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + }, + "source-map-support": { + "version": "0.5.21", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz", + "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + } + }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dev": true, + "requires": { + "safe-buffer": "~5.1.0" + } + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + }, + "strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + }, + "supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "dev": true + }, + "tapable": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", + "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", + "dev": true + }, + "terser": { + "version": "5.15.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-5.15.1.tgz", + "integrity": "sha512-K1faMUvpm/FBxjBXud0LWVAGxmvoPbZbfTCYbSgaaYQaIXI3/TdI7a7ZGA73Zrou6Q8Zmz3oeUTsp/dj+ag2Xw==", + "dev": true, + "requires": { + "@jridgewell/source-map": "^0.3.2", + "acorn": "^8.5.0", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" + } + }, + "terser-webpack-plugin": { + "version": "5.3.6", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.6.tgz", + "integrity": "sha512-kfLFk+PoLUQIbLmB1+PZDMRSZS99Mp+/MHqDNmMA6tOItzRt+Npe3E+fsMs5mfcM0wCtrrdU387UnV+vnSffXQ==", + "dev": true, + "requires": { + "@jridgewell/trace-mapping": "^0.3.14", + "jest-worker": "^27.4.5", + "schema-utils": "^3.1.1", + "serialize-javascript": "^6.0.0", + "terser": "^5.14.1" + } + }, + "text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true + }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "requires": { + "is-number": "^7.0.0" + } + }, + "traverse": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/traverse/-/traverse-0.3.9.tgz", + "integrity": "sha512-iawgk0hLP3SxGKDfnDJf8wTz4p2qImnyihM5Hh/sGvQ3K37dPi/w8sRhdNIxYA1TwFwc5mDhIJq+O0RsvXBKdQ==", + "dev": true + }, + "ts-loader": { + "version": "9.4.1", + "resolved": "https://registry.npmjs.org/ts-loader/-/ts-loader-9.4.1.tgz", + "integrity": "sha512-384TYAqGs70rn9F0VBnh6BPTfhga7yFNdC5gXbQpDrBj9/KsT4iRkGqKXhziofHOlE2j6YEaiTYVGKKvPhGWvw==", + "dev": true, + "requires": { + "chalk": "^4.1.0", + "enhanced-resolve": "^5.0.0", + "micromatch": "^4.0.0", + "semver": "^7.3.4" + } + }, + "tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "tsutils": { + "version": "3.21.0", + "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz", + "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==", + "dev": true, + "requires": { + "tslib": "^1.8.1" + } + }, + "type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1" + } + }, + "type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true + }, + "typescript": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.8.4.tgz", + "integrity": "sha512-QCh+85mCy+h0IGff8r5XWzOVSbBO+KfeYrMQh7NJ58QujwcE22u+NUSmUxqF+un70P9GXKxa2HCNiTTMJknyjQ==", + "dev": true + }, + "universalify": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz", + "integrity": "sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==" + }, + "unzipper": { + "version": "0.10.11", + "resolved": "https://registry.npmjs.org/unzipper/-/unzipper-0.10.11.tgz", + "integrity": "sha512-+BrAq2oFqWod5IESRjL3S8baohbevGcVA+teAIOYWM3pDVdseogqbzhhvvmiyQrUNKFUnDMtELW3X8ykbyDCJw==", + "dev": true, + "requires": { + "big-integer": "^1.6.17", + "binary": "~0.3.0", + "bluebird": "~3.4.1", + "buffer-indexof-polyfill": "~1.0.0", + "duplexer2": "~0.1.4", + "fstream": "^1.0.12", + "graceful-fs": "^4.2.2", + "listenercount": "~1.0.1", + "readable-stream": "~2.3.6", + "setimmediate": "~1.0.4" + } + }, + "update-browserslist-db": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.10.tgz", + "integrity": "sha512-OztqDenkfFkbSG+tRxBeAnCVPckDBcvibKd35yDONx6OU8N7sqgwc7rCbkJ/WcYtVRZ4ba68d6byhC21GFh7sQ==", + "dev": true, + "requires": { + "escalade": "^3.1.1", + "picocolors": "^1.0.0" + } + }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "dev": true + }, + "watchpack": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz", + "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==", + "dev": true, + "requires": { + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.1.2" + } + }, + "webpack": { + "version": "5.74.0", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.74.0.tgz", + "integrity": "sha512-A2InDwnhhGN4LYctJj6M1JEaGL7Luj6LOmyBHjcI8529cm5p6VXiTIW2sn6ffvEAKmveLzvu4jrihwXtPojlAA==", + "dev": true, + "requires": { + "@types/eslint-scope": "^3.7.3", + "@types/estree": "^0.0.51", + "@webassemblyjs/ast": "1.11.1", + "@webassemblyjs/wasm-edit": "1.11.1", + "@webassemblyjs/wasm-parser": "1.11.1", + "acorn": "^8.7.1", + "acorn-import-assertions": "^1.7.6", + "browserslist": "^4.14.5", + "chrome-trace-event": "^1.0.2", + "enhanced-resolve": "^5.10.0", + "es-module-lexer": "^0.9.0", + "eslint-scope": "5.1.1", + "events": "^3.2.0", + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.2.9", + "json-parse-even-better-errors": "^2.3.1", + "loader-runner": "^4.2.0", + "mime-types": "^2.1.27", + "neo-async": "^2.6.2", + "schema-utils": "^3.1.0", + "tapable": "^2.1.1", + "terser-webpack-plugin": "^5.1.3", + "watchpack": "^2.4.0", + "webpack-sources": "^3.2.3" + } + }, + "webpack-cli": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-4.10.0.tgz", + "integrity": "sha512-NLhDfH/h4O6UOy+0LSso42xvYypClINuMNBVVzX4vX98TmTaTUxwRbXdhucbFMd2qLaCTcLq/PdYrvi8onw90w==", + "dev": true, + "requires": { + "@discoveryjs/json-ext": "^0.5.0", + "@webpack-cli/configtest": "^1.2.0", + "@webpack-cli/info": "^1.5.0", + "@webpack-cli/serve": "^1.7.0", + "colorette": "^2.0.14", + "commander": "^7.0.0", + "cross-spawn": "^7.0.3", + "fastest-levenshtein": "^1.0.12", + "import-local": "^3.0.2", + "interpret": "^2.2.0", + "rechoir": "^0.7.0", + "webpack-merge": "^5.7.3" + }, + "dependencies": { + "commander": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz", + "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==", + "dev": true + } + } + }, + "webpack-merge": { + "version": "5.8.0", + "resolved": "https://registry.npmjs.org/webpack-merge/-/webpack-merge-5.8.0.tgz", + "integrity": "sha512-/SaI7xY0831XwP6kzuwhKWVKDP9t1QY1h65lAFLbZqMPIuYcD9QAW4u9STIbU9kaJbPBB/geU/gLr1wDjOhQ+Q==", + "dev": true, + "requires": { + "clone-deep": "^4.0.1", + "wildcard": "^2.0.0" + } + }, + "webpack-sources": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz", + "integrity": "sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==", + "dev": true + }, + "which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "requires": { + "isexe": "^2.0.0" + } + }, + "wildcard": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/wildcard/-/wildcard-2.0.0.tgz", + "integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==", + "dev": true + }, + "word-wrap": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", + "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", + "dev": true + }, + "workerpool": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.1.tgz", + "integrity": "sha512-ILEIE97kDZvF9Wb9f6h5aXK4swSlKGUcOEGiIYb2OOu/IrDU9iwj0fD//SsA6E5ibwJxpEvhullJY4Sl4GcpAw==", + "dev": true + }, + "wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "requires": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true + }, + "yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "yargs": { + "version": "16.2.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", + "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "dev": true, + "requires": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + } + }, + "yargs-parser": { + "version": "20.2.4", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.4.tgz", + "integrity": "sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==", + "dev": true + }, + "yargs-unparser": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/yargs-unparser/-/yargs-unparser-2.0.0.tgz", + "integrity": "sha512-7pRTIA9Qc1caZ0bZ6RYRGbHJthJWuakf+WmHK0rVeLkNrrGhfoabBNdue6kdINI6r4if7ocq9aD/n7xwKOdzOA==", + "dev": true, + "requires": { + "camelcase": "^6.0.0", + "decamelize": "^4.0.0", + "flat": "^5.0.2", + "is-plain-obj": "^2.1.0" + } + }, + "yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true + } + } +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/package.json b/neural_coder/extensions/neural_compressor_ext_vscode/package.json new file mode 100644 index 00000000000..e3207918bc2 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/package.json @@ -0,0 +1,185 @@ +{ + "name": "neural-coder-ext-vscode", + "displayName": "Neural Coder", + "description": "Enable and benchmark quantization on Deep Learning models in one-click", + "publisher": "IntelNeuralCompressor", + "version": "0.0.8", + "engines": { + "vscode": "^1.70.0" + }, + "categories": [ + "Other" + ], + "icon": "images/logo_icon.png", + "activationEvents": [ + "onCommand:neuralCoder.incEnableINT8Static", + "onCommand:neuralCoder.incEnableINT8Dynamic", + "onCommand:neuralCoder.incEnableBF16", + "onCommand:neuralCoder.incAutoEnableBenchmark", + "onCommand:neuralCoder.sideBarAutoAddParams", + "onView:Enable_Log_File", + "onView:Auto_Log_File" + ], + "main": "./dist/extension.js", + "contributes": { + "configuration": { + "title": "Intel Neural Coder Configuration", + "properties": { + "neuralCoder.pythonPath": { + "type": "string", + "default": "", + "description": "Python Path for Neural Coder" + } + } + }, + "commands": [ + { + "command": "neuralCoder.incEnableINT8Static", + "title": "INC Enable INT8 (Static)", + "icon": { + "dark": "./images/dark/dark-icon-menu-int8-static.svg", + "light": "./images/light/light-icon-menu-int8-static.svg" + } + }, + { + "command": "neuralCoder.incEnableINT8Dynamic", + "title": "INC Enable INT8 (Dynamic)", + "icon": { + "dark": "./images/dark/dark-icon-menu-int8-dynamic.svg", + "light": "./images/light/light-icon-menu-int8-dynamic.svg" + } + }, + { + "command": "neuralCoder.incEnableBF16", + "title": "INC Enable BF16", + "icon": { + "dark": "./images/dark/dark-icon-menu-bf16.svg", + "light": "./images/light/light-icon-menu-bf16.svg" + } + }, + { + "command": "neuralCoder.incAutoEnableBenchmark", + "title": "INC Auto Enable Benchmark", + "icon": { + "dark": "./images/dark/dark-icon-menu-auto.svg", + "light": "./images/light/light-icon-menu-auto.svg" + } + } + ], + "menus": { + "neuralCoder/editor/title": [ + { + "command": "neuralCoder.incEnableINT8Static", + "when": "resourceLangId == python", + "group": "1_nc@1" + }, + { + "command": "neuralCoder.incEnableINT8Dynamic", + "when": "resourceLangId == python", + "group": "1_nc@2" + }, + { + "command": "neuralCoder.incEnableBF16", + "when": "resourceLangId == python", + "group": "1_nc@3" + }, + { + "command": "neuralCoder.incAutoEnableBenchmark", + "when": "resourceLangId == python", + "group": "1_nc@4" + } + ], + "editor/title": [ + { + "submenu": "neuralCoder/editor/title", + "when": "resourceLangId == python", + "group": "navigation" + }, + { + "command": "neuralCoder.incEnableINT8Static", + "when": "resourceLangId == python", + "group": "1_nc@1" + }, + { + "command": "neuralCoder.incEnableINT8Dynamic", + "when": "resourceLangId == python", + "group": "1_nc@2" + }, + { + "command": "neuralCoder.incEnableBF16", + "when": "resourceLangId == python", + "group": "1_nc@3" + }, + { + "command": "neuralCoder.incAutoEnableBenchmark", + "when": " resourceLangId == python", + "group": "1_nc@4" + } + ] + }, + "submenus": [ + { + "id": "neuralCoder/editor/title", + "label": "Neural Coder", + "icon": { + "dark": "./images/dark/dark_logo.png", + "light": "./images/light/light_logo.svg" + } + } + ], + "viewsContainers": { + "activitybar": [ + { + "id": "NeuralCoder_INFO", + "title": "Neural Coder", + "icon": "images/dark/dark_logo.png" + } + ] + }, + "views": { + "NeuralCoder_INFO": [ + { + "id": "Enable_Log_File", + "name": "ENABLE" + }, + { + "id": "Auto_Log_File", + "name": "AUTO" + } + ] + } + }, + "scripts": { + "vscode:prepublish": "npm run package", + "compile": "webpack", + "watch": "webpack --watch", + "package": "webpack --mode production --devtool hidden-source-map", + "compile-tests": "tsc -p . --outDir out", + "watch-tests": "tsc -p . -w --outDir out", + "pretest": "npm run compile-tests && npm run compile && npm run lint", + "lint": "eslint src --ext ts", + "test": "node ./out/test/runTest.js" + }, + "devDependencies": { + "@types/fs-extra": "^9.0.13", + "@types/glob": "^8.0.0", + "@types/mocha": "^10.0.0", + "@types/node": "16.x", + "@types/vscode": "^1.70.0", + "@typescript-eslint/eslint-plugin": "^5.38.1", + "@typescript-eslint/parser": "^5.38.1", + "@vscode/test-electron": "^2.1.5", + "eslint": "^8.24.0", + "glob": "^8.0.3", + "mocha": "^10.0.0", + "ts-loader": "^9.4.1", + "typescript": "^4.8.4", + "webpack": "^5.74.0", + "webpack-cli": "^4.10.0" + }, + "dependencies": { + "fs-extra": "^10.1.0", + "python-shell": "^3.0.1", + "rxjs": "^7.5.7" + } +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/src/NcProcessScript.py b/neural_coder/extensions/neural_compressor_ext_vscode/src/NcProcessScript.py new file mode 100644 index 00000000000..983767d7928 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/src/NcProcessScript.py @@ -0,0 +1,47 @@ +import os +import sys +import subprocess +libs = ['neural-compressor'] + +try: + from neural_coder import enable + from neural_coder import auto_quant + +except ModuleNotFoundError: + for lib in libs: + os.system(f'{sys.argv[6]} -m pip install -U {lib}') + from neural_coder import enable + from neural_coder import auto_quant + +if (sys.argv[4] == "normal"): + enable(code=sys.argv[1], features=[sys.argv[2]], overwrite=True) + logResult = enable(code=sys.argv[1], features=[ + sys.argv[2]], save_patch_path=sys.argv[5]) + +elif (sys.argv[4] == "genLog"): + if (sys.argv[2] == ""): + # codeResult have 3 params: perfomance, mode, path + codeResult = enable( + code=sys.argv[1], features=[], run_bench=True, args=sys.argv[3]) + + else: + codeResult = enable(code=sys.argv[1], features=[ + sys.argv[2]], run_bench=True, args=sys.argv[3]) + logResult = enable(code=sys.argv[1], features=[ + sys.argv[2]], args=sys.argv[3], save_patch_path=sys.argv[5]) + + # print fps + with open(codeResult[2] + '/bench.log', 'r') as f: + logs = f.readlines() + for log in logs: + if (log.find('fps') != -1): + log_line = log + fps = log_line.split("[")[1].split("]")[0] + print(fps) + +elif (sys.argv[4] == "hardWare"): + subp = subprocess.Popen("lscpu | grep 'Model name'", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") + subp.wait(2) + hardware = subp.communicate()[0].replace("Model name:", "").strip() + print(hardware) diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/src/extension.ts b/neural_coder/extensions/neural_compressor_ext_vscode/src/extension.ts new file mode 100644 index 00000000000..6f7a2e14f82 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/src/extension.ts @@ -0,0 +1,183 @@ +import * as vscode from "vscode"; +import { NeuralCodeOptimizer } from "./neuralcoder"; +import * as DirPath from "path"; + +// highLight +async function highLight () { + const editor = vscode.window.activeTextEditor; + if (!editor) { + return; + } + const document = editor.document; + + const text = document.getText(); + const regStart = /# \[NeuralCoder\] .*? \[Beginning Line\]/g; + const regEnd = /# \[NeuralCoder\] .*? \[Ending Line\]/g; + const startMatch = regStart.exec(text); + const endMatch = regEnd.exec(text); + if(startMatch && endMatch) { + const startLine = document.positionAt(startMatch.index); + const endLine = document.positionAt(endMatch.index); + const start = document.lineAt(startLine).range.start.character; + const end = document.lineAt(endLine).range.end.character; + const range: vscode.Range = new vscode.Range(Number(startLine), start, Number(endLine), end); + // highLight + let path = vscode.window.activeTextEditor?.document.fileName; + if (path) { + let filePath = DirPath.resolve(`${__dirname}`, "../", path); + // vscode.workspace.openTextDocument(filePath).then(async (document) => { + // await vscode.window.showTextDocument(document , {preserveFocus: false, selection: range, viewColumn: vscode.ViewColumn.One}); + // }); + } + } +} + +export async function activate(context: vscode.ExtensionContext) { + // init + const key = "neuralCoder.pythonPath"; + let config: vscode.WorkspaceConfiguration = + vscode.workspace.getConfiguration(); + let currentCondaName = config.get(key); + + if (!currentCondaName) { + vscode.window.showErrorMessage("Please input python Path!"); + return; + } + // conda Env + context.subscriptions.push( + vscode.workspace.onDidChangeConfiguration(() => { + const currentCondaName = vscode.workspace.getConfiguration().get(key); + if (!currentCondaName) { + vscode.window.showErrorMessage("Please input python Path!"); + return; + } + }) + ); + + // start + let path = vscode.window.activeTextEditor?.document.fileName; + let userInput: string = ""; + let ncCoder = new NeuralCodeOptimizer(); + let curPythonPath = currentCondaName; + + let incEnableINT8Static = vscode.commands.registerCommand( + "neuralCoder.incEnableINT8Static", + () => { + vscode.window.withProgress( + { + cancellable: false, + location: vscode.ProgressLocation.Notification, + title: "Running INT8 Static!", + }, + async () => { + ncCoder.optimizeCodes( + curPythonPath, + "pytorch_inc_static_quant_fx", + path, + "" + ); + } + ); + } + ); + let incEnableINT8Dynamic = vscode.commands.registerCommand( + "neuralCoder.incEnableINT8Dynamic", + () => { + vscode.window.withProgress( + { + cancellable: false, + location: vscode.ProgressLocation.Notification, + title: "Running INT8 Dynamic!", + }, + async () => { + ncCoder.optimizeCodes( + curPythonPath, + "pytorch_inc_dynamic_quant", + path, + "" + ); + } + ); + } + ); + let incEnableBF16 = vscode.commands.registerCommand( + "neuralCoder.incEnableBF16", + () => { + vscode.window.withProgress( + { + cancellable: false, + location: vscode.ProgressLocation.Notification, + title: "Running BF16!", + }, + async () => { + ncCoder.optimizeCodes(curPythonPath, "pytorch_inc_bf16", path, ""); + } + ); + } + ); + + let incAutoEnableBenchmark = vscode.commands.registerCommand( + "neuralCoder.incAutoEnableBenchmark", + () => { + vscode.window.withProgress( + { + cancellable: false, + location: vscode.ProgressLocation.Notification, + title: "Running AutoEnableBenchmark!", + }, + async () => { + vscode.window + .showInputBox({ + password: false, // need password? + ignoreFocusOut: true, // when focus other thing + placeHolder: "INPUT EXECUTE PARAMETERS OR NOT: ", // hint + }) + .then((value) => { + if (typeof value !== "undefined") { + userInput = value ? value : ""; + const opc = + vscode.window.createOutputChannel("Neural Coder Auto-Bench"); + ncCoder.optimizeCodes( + curPythonPath, + "auto-quant", + path, + userInput, + opc + ); + } + }); + } + ); + } + ); + + context.subscriptions.push(incEnableINT8Static); + context.subscriptions.push(incEnableINT8Dynamic); + context.subscriptions.push(incEnableBF16); + context.subscriptions.push(incAutoEnableBenchmark); + context.subscriptions.push(vscode.workspace.onDidChangeTextDocument((e: vscode.TextDocumentChangeEvent) => { + highLight(); +})); + + //register command MyTreeItem.itemClick + context.subscriptions.push( + vscode.commands.registerCommand( + "MyTreeItem.itemClick", + (label, filePath) => { + //display content + vscode.workspace.openTextDocument(filePath) + .then(doc => { + vscode.window.showTextDocument(doc); + }, err => { + console.log(`Open ${filePath} error, ${err}.`); + }).then(undefined, err => { + console.log(`Open ${filePath} error, ${err}.`); + }); + } + ) + ); +} + +// this method is called when your extension is deactivated +export function deactivate() {} + diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/src/neuralcoder.ts b/neural_coder/extensions/neural_compressor_ext_vscode/src/neuralcoder.ts new file mode 100644 index 00000000000..402ca9d9036 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/src/neuralcoder.ts @@ -0,0 +1,402 @@ +import { PythonShell } from "python-shell"; +import { MyTreeData } from "./sideBar"; +import * as vscode from "vscode"; +import * as DirPath from "path"; +import * as fs from "fs-extra"; + +class CodeOptimizer { + protected working: boolean; + protected pathExist: boolean; + public outPutLogPath: string; + public outPutLogFilePath: string; + public autoSaveLogPath: string; + public enableSaveLogPath: string; + public autoSaveFinalLogPath: string; + public enableSaveFinalLogPath: string; + public fpsList: number[]; + public treeProvider: string; + public curCondaName?: string; + public curPyPath: string; + public outPutStr: string[]; + + constructor() { + this.working = false; + this.pathExist = false; + this.autoSaveLogPath = "../neural_coder_workspace/Auto/"; + this.enableSaveLogPath = "../neural_coder_workspace/Enable/"; + this.outPutLogPath = "../neural_coder_workspace/outPutLog/"; + this.autoSaveFinalLogPath = ""; + this.enableSaveFinalLogPath = ""; + this.outPutLogFilePath = ""; + + this.fpsList = []; + this.outPutStr = []; + + this.treeProvider = ""; + this.curCondaName = vscode.workspace + .getConfiguration() + .get("neuralCoder.condaName"); + this.curPyPath = ""; + } + + public registerTreeDataProvider(treeName: string, logPath: string) { + vscode.window.registerTreeDataProvider( + treeName, + new MyTreeData(DirPath.resolve(logPath, "../")) + ); + } + + // output content + public outPutFunc( + outPut: vscode.OutputChannel, + outPutStr: string[], + content: string[] + ) { + content.forEach((val) => { + outPut.appendLine(val); + outPutStr.push(val); + }); + } + + // save log in a file + public saveLogFile( + outDir: string, + logContent: string, + ) { + let nowTime = new Date(Date.parse(new Date().toString())); + let nowTimeStr = + nowTime.getFullYear() + + "_" + + (nowTime.getMonth() > 8 ? (nowTime.getMonth() + 1) : ('0' + (nowTime.getMonth() + 1))) + + "_" + + (nowTime.getDate() > 9 ? nowTime.getDate() : ('0' + nowTime.getDate())) + + "_" + + (nowTime.getHours() > 9 ? nowTime.getHours() : ('0' + nowTime.getHours())) + + "_" + + (nowTime.getMinutes() > 9 ? nowTime.getMinutes() : ('0' + nowTime.getMinutes())) + + "_" + + (nowTime.getSeconds() > 9 ? nowTime.getSeconds() : ('0' + nowTime.getSeconds())); + + let finalPath = DirPath.resolve(`${__dirname}`, outDir + nowTimeStr); + + if (logContent === "Auto") { + this.autoSaveFinalLogPath = finalPath; + } else if (logContent === "Enable") { + this.enableSaveFinalLogPath = finalPath; + } else if (logContent === "output") { + this.outPutLogFilePath = finalPath; + } + // mkdir file + let isOutExist: boolean = fs.pathExistsSync(finalPath); + if (!isOutExist) { + fs.mkdirsSync(finalPath); + } + } + + // pythonShell Script + async ncPyScript( + dirPath: string, + currentFilePath: string, + feature: string, + currentFileArgs: string, + status: string, + currentPythonPath: string, + saveLogPath: string + ) { + return new Promise((resolve, reject) => { + PythonShell.run( + "NcProcessScript.py", + { + mode: "text", + pythonOptions: ["-u"], + scriptPath: dirPath, + pythonPath: currentPythonPath, + args: [ + currentFilePath, + feature, + currentFileArgs, + status, + saveLogPath + "/" + feature, + currentPythonPath + ], + }, + (err, result) => { + this.pathExist = true; + if (err) { + // vscode.window.showErrorMessage("Please install correct package!"); + this.working = false; + } + resolve(result); + } + ); + }); + } + + // neural coder params + async ncProcess( + ncPath: string | undefined, + feature: string, + ncArgs: string | undefined, + status: string, + currentPythonPath: string, + saveLogPath: string + ) { + let pythonRes: any; + // find currentFile path + const dirPath = DirPath.resolve(`${__dirname}`, "../src"); + // find running file path + let currentFilePath = ncPath ? ncPath : ""; + let currentFileArgs = ncArgs ? ncArgs : ""; + + // try { + // asyn -> sync + pythonRes = await this.ncPyScript( + dirPath, + currentFilePath, + feature, + currentFileArgs, + status, + currentPythonPath, + saveLogPath + ); + + if (!this.pathExist) { + vscode.window.showErrorMessage("Please input correct python Path!"); + this.working = false; + } + return pythonRes; + } + + async optimizeCode( + feature: string, + name: string, + next: string, + opt: string | undefined, + args: string | undefined, + ncPath: string | undefined, + currentPythonPath: string, + opc?: vscode.OutputChannel + ) { + if (opt === "normal") { + await this.ncProcess( + ncPath, + feature, + args, + "normal", + currentPythonPath, + this.enableSaveFinalLogPath + ); + this.registerTreeDataProvider( + "Enable_Log_File", + this.enableSaveFinalLogPath + ); + } else { + if (opc) { + if (feature === "") { + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......`, + ]); + + // Get the fps + const resFps = await this.ncProcess( + ncPath, + "", + args, + "genLog", + currentPythonPath, + this.autoSaveFinalLogPath + ); + const currentFps = resFps.pop(); + + this.fpsList.push(parseFloat(currentFps)); + + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${currentFps} (samples/second)`, + `[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......`, + `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......`, + ]); + } else { + const resFps = await this.ncProcess( + ncPath, + feature, + args, + "genLog", + currentPythonPath, + this.autoSaveFinalLogPath + ); + const currentFps = resFps.pop(); + this.fpsList.push(parseFloat(currentFps)); + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${currentFps} (samples/second)`, + ]); + if (next !== "") { + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......`, + ]); + } + if (feature === "pytorch_inc_bf16") { + let features = [ + "", + "pytorch_inc_static_quant_fx", + "pytorch_inc_dynamic_quant", + "pytorch_inc_bf16", + ]; + let featureName = [ + "Original Model", + "INC Enable INT8 (Static)", + "INC Enable INT8 (Dynamic)", + "INC Enable BF16", + ]; + + let bestFps = Math.max(...this.fpsList); + let bestIndex = this.fpsList.indexOf(bestFps); + let bestFeature = features[bestIndex]; + let bestFeatureName = featureName[bestIndex]; + let boost = (bestFps / this.fpsList[0]).toFixed(2); + + // Best result + await this.ncProcess( + ncPath, + bestFeature, + args, + "normal", + currentPythonPath, + this.autoSaveFinalLogPath + ); + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] The Best Intel Optimization: ${bestFeatureName}.`, + `[NeuralCoder INFO] You can get up to ${boost}X performance boost.`, + ]); + + const resHardWare = await this.ncProcess( + ncPath, + bestFeature, + args, + "hardWare", + currentPythonPath, + this.autoSaveFinalLogPath + ); + + // log File + let logContent = [...this.outPutStr]; + this.saveLogFile(this.outPutLogPath, 'output'); + + // save log file + let outPutFinalPath = this.outPutLogFilePath + "/outPut.log"; + fs.writeFile(outPutFinalPath, logContent.join("\n")); + + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] HardWare: ${resHardWare}.`, + `[NeuralCoder INFO] The log was saved to:`, + `[NeuralCoder INFO] ${outPutFinalPath}`, + ]); + // TreeData + this.registerTreeDataProvider( + "Auto_Log_File", + this.autoSaveFinalLogPath + ); + } + } + } + } + } +} + +export class NeuralCodeOptimizer extends CodeOptimizer { + constructor() { + super(); + } + + public async optimizeCodes( + currentPythonPath: string, + feature?: string, + path?: string, + args?: string, + opc?: vscode.OutputChannel + ) { + if (this.working) { + vscode.window.showInformationMessage("Not done yet"); + return; + } + + this.working = true; + const optimizeType = + feature !== undefined ? feature : "pytorch_mixed_precision_cpu"; + if (feature === "auto-quant") { + // outPut init + if (opc) { + opc.clear(); + opc.show(); + + this.outPutFunc(opc, this.outPutStr, [ + "[NeuralCoder INFO] Auto-Quant Started ......", + ]); + + // mkdir autoSaveLogs + this.saveLogFile(this.autoSaveLogPath, "Auto"); + + this.fpsList = []; + let pathFinal = path ? path : ""; + const currentFileName = pathFinal.split(/[\\\/]/).pop(); + + this.outPutFunc(opc, this.outPutStr, [ + `[NeuralCoder INFO] Code: User code from VS Code "${currentFileName}"`, + `[NeuralCoder INFO] Benchmark Mode: Throughput`, + ]); + + await this.optimizeCode( + "", //current feature + "The Original Model", //current feature name + "INC Enable INT8 (Static)", //next feature name + "auto", //normal or auto + args, //parameters + path, + currentPythonPath, + opc + ); + await this.optimizeCode( + "pytorch_inc_static_quant_fx", + "INC Enable INT8 (Static)", + "INC Enable INT8 (Dynamic)", + "auto", + args, + path, + currentPythonPath, + opc + ); + await this.optimizeCode( + "pytorch_inc_dynamic_quant", + "INC Enable INT8 (Dynamic)", + "INC Enable BF16", + "auto", + args, + path, + currentPythonPath, + opc + ); + await this.optimizeCode( + "pytorch_inc_bf16", + "INC Enable BF16", + "", + "auto", + args, + path, + currentPythonPath, + opc + ); + } + } else { + this.saveLogFile(this.enableSaveLogPath, "Enable"); + await this.optimizeCode( + optimizeType, + "", + "", + "normal", + args, + path, + currentPythonPath + ); + } + this.working = false; + } +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/src/sideBar.ts b/neural_coder/extensions/neural_compressor_ext_vscode/src/sideBar.ts new file mode 100644 index 00000000000..fd2a344e25c --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/src/sideBar.ts @@ -0,0 +1,72 @@ +import { TreeDataProvider, TreeItem, TreeItemCollapsibleState, ProviderResult, window } from "vscode"; +import * as fs from "fs"; +import * as path from "path"; + +export class MyTreeData implements TreeDataProvider{ + constructor(private rootPath: string){ + } + + getTreeItem(element: MyTreeItem) : MyTreeItem | Thenable { + return element; + } + + getChildren(element?: MyTreeItem | undefined): ProviderResult{ + if(!this.rootPath){ + window.showInformationMessage('No file in empty directory'); + return Promise.resolve([]); + } + if(element === undefined){ + return Promise.resolve(this.searchFiles(this.rootPath)); + } + else{ + return Promise.resolve(this.searchFiles(path.join(element.parentPath, element.label))); + } + } + //search file + private searchFiles(parentPath: string): MyTreeItem[] { + var treeDir: MyTreeItem[] = []; + if(this.pathExists(parentPath)){ + var fsReadDir = fs.readdirSync(parentPath, 'utf-8'); + fsReadDir.forEach(fileName => { + var filePath = path.join(parentPath, fileName);//absolute Path + if(fs.statSync(filePath).isDirectory()){//Directory + treeDir.push(new MyTreeItem(fileName, parentPath, TreeItemCollapsibleState.Collapsed)); + } + else{//file + treeDir.push(new MyTreeItem(fileName, parentPath, TreeItemCollapsibleState.None)); + } + }); + } + return treeDir; + } + //pathExists + private pathExists(filePath: string): boolean{ + try{ + fs.accessSync(filePath); + } + catch(err){ + return false; + } + return true; + } +} + +export class MyTreeItem extends TreeItem{ + constructor( + public readonly label: string, //save current label + public readonly parentPath: string, //save current label Path + public readonly collapsibleState: TreeItemCollapsibleState + ){ + super(label, collapsibleState); + } + //click method + command = { + title: "this.label", + command: 'MyTreeItem.itemClick', + arguments: [ //params + this.label, + path.join(this.parentPath, this.label) + ] + }; + contextValue = 'MyTreeItem';//provide for when +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/tsconfig.json b/neural_coder/extensions/neural_compressor_ext_vscode/tsconfig.json new file mode 100644 index 00000000000..965a7b4c23b --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ES2020", + "lib": [ + "ES2020" + ], + "sourceMap": true, + "rootDir": "src", + "strict": true /* enable all strict type-checking options */ + /* Additional Checks */ + // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ + // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ + // "noUnusedParameters": true, /* Report errors on unused parameters. */ + } +} diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/vsc-extension-quickstart.md b/neural_coder/extensions/neural_compressor_ext_vscode/vsc-extension-quickstart.md new file mode 100644 index 00000000000..b2eb4a435ce --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/vsc-extension-quickstart.md @@ -0,0 +1,47 @@ +# Welcome to your VS Code Extension + +## What's in the folder + +* This folder contains all of the files necessary for your extension. +* `package.json` - this is the manifest file in which you declare your extension and command. + * The sample plugin registers a command and defines its title and command name. With this information VS Code can show the command in the command palette. It doesn’t yet need to load the plugin. +* `src/extension.ts` - this is the main file where you will provide the implementation of your command. + * The file exports one function, `activate`, which is called the very first time your extension is activated (in this case by executing the command). Inside the `activate` function we call `registerCommand`. + * We pass the function containing the implementation of the command as the second parameter to `registerCommand`. + +## Setup + +* install the recommended extensions (amodio.tsl-problem-matcher and dbaeumer.vscode-eslint) + + +## Get up and running straight away + +* Press `F5` to open a new window with your extension loaded. +* Run your command from the command palette by pressing (`Ctrl+Shift+P` or `Cmd+Shift+P` on Mac) and typing `Hello World`. +* Set breakpoints in your code inside `src/extension.ts` to debug your extension. +* Find output from your extension in the debug console. + +## Make changes + +* You can relaunch the extension from the debug toolbar after changing code in `src/extension.ts`. +* You can also reload (`Ctrl+R` or `Cmd+R` on Mac) the VS Code window with your extension to load your changes. + + +## Explore the API + +* You can open the full set of our API when you open the file `node_modules/@types/vscode/index.d.ts`. + +## Run tests + +* Open the debug viewlet (`Ctrl+Shift+D` or `Cmd+Shift+D` on Mac) and from the launch configuration dropdown pick `Extension Tests`. +* Press `F5` to run the tests in a new window with your extension loaded. +* See the output of the test result in the debug console. +* Make changes to `src/test/suite/extension.test.ts` or create new test files inside the `test/suite` folder. + * The provided test runner will only consider files matching the name pattern `**.test.ts`. + * You can create folders inside the `test` folder to structure your tests any way you want. + +## Go further + +* Reduce the extension size and improve the startup time by [bundling your extension](https://code.visualstudio.com/api/working-with-extensions/bundling-extension). +* [Publish your extension](https://code.visualstudio.com/api/working-with-extensions/publishing-extension) on the VS Code extension marketplace. +* Automate builds by setting up [Continuous Integration](https://code.visualstudio.com/api/working-with-extensions/continuous-integration). diff --git a/neural_coder/extensions/neural_compressor_ext_vscode/webpack.config.js b/neural_coder/extensions/neural_compressor_ext_vscode/webpack.config.js new file mode 100644 index 00000000000..37d7024f924 --- /dev/null +++ b/neural_coder/extensions/neural_compressor_ext_vscode/webpack.config.js @@ -0,0 +1,48 @@ +//@ts-check + +'use strict'; + +const path = require('path'); + +//@ts-check +/** @typedef {import('webpack').Configuration} WebpackConfig **/ + +/** @type WebpackConfig */ +const extensionConfig = { + target: 'node', // VS Code extensions run in a Node.js-context 📖 -> https://webpack.js.org/configuration/node/ + mode: 'none', // this leaves the source code as close as possible to the original (when packaging we set this to 'production') + + entry: './src/extension.ts', // the entry point of this extension, 📖 -> https://webpack.js.org/configuration/entry-context/ + output: { + // the bundle is stored in the 'dist' folder (check package.json), 📖 -> https://webpack.js.org/configuration/output/ + path: path.resolve(__dirname, 'dist'), + filename: 'extension.js', + libraryTarget: 'commonjs2' + }, + externals: { + vscode: 'commonjs vscode' // the vscode-module is created on-the-fly and must be excluded. Add other modules that cannot be webpack'ed, 📖 -> https://webpack.js.org/configuration/externals/ + // modules added here also need to be added in the .vscodeignore file + }, + resolve: { + // support reading TypeScript and JavaScript files, 📖 -> https://github.com/TypeStrong/ts-loader + extensions: ['.ts', '.js'] + }, + module: { + rules: [ + { + test: /\.ts$/, + exclude: /node_modules/, + use: [ + { + loader: 'ts-loader' + } + ] + } + ] + }, + devtool: 'nosources-source-map', + infrastructureLogging: { + level: "log", // enables logging required for problem matchers + }, +}; +module.exports = [ extensionConfig ]; \ No newline at end of file diff --git a/neural_coder/interface.py b/neural_coder/interface.py index ac062b681bf..5adf748e29e 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -17,6 +17,7 @@ import logging import time import yaml +import re from . import globals @@ -65,6 +66,7 @@ def enable( test_code_line=False, # print code line info for debug use cache_load_transformers=True, optimum_quant_config="", # only for HF optimum optimizations, yaml or hub path + use_inc=False, ): """enable a feature or a couple of features for the code @@ -138,7 +140,10 @@ def enable( "pytorch_aliblade", "tensorflow_amp", "keras_amp", - "onnx_inc_static_quant_qlinear" + "tensorflow_inc", + "onnx_inc_static_quant_qlinear", + "onnx_inc_static_quant_qdq", + "onnx_inc_dynamic_quant", ] ''' @@ -184,6 +189,7 @@ def enable( "pytorch_cuda_to_cpu", "pytorch_lightning_bf16_cpu", "tensorflow_mixed_precision", + "change_trainer_to_nlptrainer", ] # # features that need creating dummy dataloader (when needed) first @@ -198,6 +204,10 @@ def enable( "pytorch_inc_static_quant_ipex" in features: features = ["pytorch_reclaim_inputs"] + features + # intel_extension_for_transformers + if "intel_extension_for_transformers" in features: + features = ["change_trainer_to_nlptrainer"] + features + transformed_list_code_path = [] ## Determine Code Domain @@ -276,7 +286,10 @@ def enable( "pytorch_inc_static_quant_ipex", "pytorch_inc_huggingface_optimum_static", "pytorch_inc_huggingface_optimum_dynamic", - "onnx_inc_static_quant_qlinear" + "onnx_inc_static_quant_qlinear", + "onnx_inc_static_quant_qdq", + "onnx_inc_dynamic_quant", + "intel_extension_for_transformers", ]: # determine domain @@ -284,12 +297,21 @@ def enable( globals.code_domain = determine_domain(globals.list_code_path[0]) # for transformers code, enable optimum-intel api by default - if "transformers" in globals.code_domain: + # if specify use_inc, then still use INC API + if "transformers" in globals.code_domain and not use_inc: if "static_quant" in feature: feature = "pytorch_inc_huggingface_optimum_static" elif "dynamic_quant" in feature: feature = "pytorch_inc_huggingface_optimum_dynamic" + # optimum-intel quantization config for static and dynamic + if feature == "pytorch_inc_huggingface_optimum_static": + globals.optimum_quant_config = "quantization/quant_config_static" + elif feature == "pytorch_inc_huggingface_optimum_dynamic": + globals.optimum_quant_config = "quantization/quant_config_dynamic" + else: + pass + from .coders.autoinc.autoinc_harness import AutoInc_Harness from .coders.autoinc.calib_dataloader import Calib_Dataloader from .coders.autoinc.eval_func import Eval_Func @@ -332,6 +354,13 @@ def enable( if "tensorflow_mixed_precision" in features: from .coders.tensorflow.amp import TensorFlowKerasAMP list_transformed_code[i] = TensorFlowKerasAMP(list_transformed_code[i]).transform() + if "tensorflow_inc" in features: + from .coders.tensorflow.inc import TensorFlowKerasINC + list_transformed_code[i] = TensorFlowKerasINC(list_transformed_code[i]).transform() + # Change Trainer to NLPTrainer (only for intel_extension_for_pytorch) + if "change_trainer_to_nlptrainer" in features: + from .coders.pytorch.change_trainer_to_nlptrainer import TrainerToNLPTrainer + list_transformed_code[i] = TrainerToNLPTrainer(list_transformed_code[i]).transform() logger.info(f"Code transformation for feature: [{feature}] finished.") @@ -634,8 +663,8 @@ def bench( pass if "Accuracy (int8|fp32)" in line: try: - acc_int8 = float(line[line.find("Accuracy")+22:line.find("Accuracy")+28]) - acc_fp32 = float(line[line.find("Accuracy")+29:line.find("Accuracy")+35]) + acc_int8 = float(re.search(r"\d+\.\d+", line).group()) + acc_fp32 = float(re.search(r"(?<=\|)\d+\.\d+", line).group()) acc_delta = round((acc_int8 - acc_fp32) / acc_fp32 * 100, 2) # percent of increase/decrease except ValueError as ve: pass @@ -700,6 +729,7 @@ def superbench( ncore_per_instance=-1, # only for "self_defined" mode ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode + use_inc=False, auto_quant=False, ): @@ -866,6 +896,7 @@ def superbench( ncore_per_instance=ncore_per_instance, ninstances=ninstances, bench_batch_size=bench_batch_size, + use_inc=use_inc, ) if dry_run: @@ -1002,6 +1033,7 @@ def remove_if_have(list, element): code=code, features=features_to_generate, save_patch_path="intel_optimization", + use_inc=use_inc, ) logger.info('The optimization patch was saved to "intel_optimziation.diff"') @@ -1061,6 +1093,7 @@ def remove_if_have(list, element): ncore_per_instance=ncore_per_instance, ninstances=ninstances, bench_batch_size=bench_batch_size, + use_inc=use_inc, ) if dry_run: @@ -1225,6 +1258,7 @@ def auto_quant( ncore_per_instance=-1, # only for "self_defined" mode ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode + use_inc=False, ): return superbench( code, @@ -1240,5 +1274,6 @@ def auto_quant( ncore_per_instance=ncore_per_instance, # only for "self_defined" mode ninstances=ninstances, # only for "self_defined" mode bench_batch_size=bench_batch_size, # only for "self_defined" mode + use_inc=use_inc, auto_quant=True, ) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index bc46fdbd916..bcd0491a646 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -15,17 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .quantization import Quantization from .pruning import Pruning -from .benchmark import benchmark, Benchmark +from .benchmark import Benchmark from .version import __version__ from .contrib import * # we need to set a global 'NA' backend, or Model can't be used -from .utils.utility import set_backend +from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options from .conf.config import conf from .conf.pythonic_config import config from .config import DistillationConfig, PostTrainingQuantConfig, \ PruningConfig, QuantizationAwareTrainingConfig - -set_backend('NA') \ No newline at end of file diff --git a/neural_compressor/adaptor/keras.py b/neural_compressor/adaptor/keras.py new file mode 100644 index 00000000000..ad8081d6b0d --- /dev/null +++ b/neural_compressor/adaptor/keras.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +import json +import yaml +import math +import numpy as np +from collections import OrderedDict, UserDict +from .query import QueryBackendCapability +from .adaptor import adaptor_registry, Adaptor +from ..utils.utility import LazyImport, CpuInfo, singleton, Dequantize, dump_elapsed_time +from ..utils.utility import Statistics, GLOBAL_STATE, MODE, version1_lt_version2 +from ..utils import logger +from ..conf.dotdict import deep_get +from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader +tf = LazyImport('tensorflow') + +def _add_supported_quantized_objects(custom_objects): + """Map all the quantized objects.""" + from neural_compressor.adaptor.keras_utils.quantizer import Quantize, DeQuantize + from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant, QConv2D, QDense + custom_objects["Quantize"] = Quantize + custom_objects["DeQuantize"] = DeQuantize + custom_objects["FakeQuant"] = FakeQuant + custom_objects["QConv2D"] = QConv2D + custom_objects["QDense"] = QDense + return custom_objects + +@adaptor_registry +class KerasAdaptor(Adaptor): + '''The keras class of framework adaptor layer. + + ''' + def __init__(self, framework_specific_info): + super(KerasAdaptor, self).__init__(framework_specific_info) + self.framework_specific_info = framework_specific_info + self.approach = deep_get(self.framework_specific_info, 'approach', False) + self.quantize_config = {'op_wise_config': {}} + self.device = self.framework_specific_info['device'] + #self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path']) + self.recipes = deep_get(self.framework_specific_info, 'recipes', {}) + #os.makedirs(self.work_dir, exist_ok=True) + + self.pre_optimized_model = None + self.pre_optimizer_handle = None + self.fp32_ops = [] + self.query_handler = KerasQuery(local_config_file=os.path.join( + os.path.dirname(__file__), 'keras.yaml')) + + self.fp32_results = [] + self.fp32_preds_as_label = False + self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.callbacks = [] + self.optype_statistics = None + + def tuning_cfg_to_fw(self, tuning_cfg): + self.quantize_config['calib_iteration'] = tuning_cfg['calib_iteration'] + self.quantize_config['device'] = self.device + self.quantize_config['advance'] = deep_get(tuning_cfg, 'advance') + fp32_ops = [] + dispatched_op_names = [j[0] for j in tuning_cfg['op']] + invalid_op_names = [i for i in self.quantize_config['op_wise_config'] + if i not in dispatched_op_names] + + for op_name in invalid_op_names: + self.quantize_config['op_wise_config'].pop(op_name) + + for each_op_info in tuning_cfg['op']: + op_name = each_op_info[0] + if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'fp32': + if op_name in self.quantize_config['op_wise_config']: + self.quantize_config['op_wise_config'].pop(op_name) + fp32_ops.append(op_name) + continue + + is_perchannel = False + bit = None + if 'weight' in tuning_cfg['op'][each_op_info]: + is_perchannel = tuning_cfg['op'][each_op_info]['weight'][ + 'granularity'] == 'per_channel' + #bit = tuning_cfg['op'][each_op_info]['weight']['bit'] + weight_bit = bit if bit else 7.0 + algorithm = tuning_cfg['op'][each_op_info]['activation']['algorithm'] + is_asymmetric = False + if 'activation' in tuning_cfg['op'][each_op_info]: + is_asymmetric = tuning_cfg['op'][each_op_info]['activation']['scheme'] == 'asym' + self.quantize_config['op_wise_config'][op_name] = (is_perchannel, + algorithm, + is_asymmetric, + weight_bit) + self.fp32_ops = fp32_ops + + @dump_elapsed_time("Pass quantize model") + def quantize(self, tune_cfg, model, dataloader, q_func=None): + '''Execute the quantize process on the specified model. + + Args: + tune_cfg(dict): The chosen tuning configuration. + model (object): The model to do quantization. + dataloader(object): The dataloader used to load quantization dataset. + q_func (optional): training function for quantization aware training mode. + ''' + self.tuning_cfg_to_fw(tune_cfg) + logger.debug("Dump quantization configurations:") + logger.debug(self.quantize_config) + calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + if isinstance(dataloader, BaseDataLoader): + batch_size = dataloader.batch_size + for i in range(batch_size): + if calib_sampling_size % (batch_size - i) == 0: + calib_batch_size = batch_size - i + if i != 0: # pragma: no cover + logger.warning("Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + + " to make sure the sampling_size is " + "divisible exactly by batch size") + break + tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) + dataloader.batch(calib_batch_size) + self.quantize_config['calib_iteration'] = tmp_iterations + + else: # pragma: no cover + if hasattr(dataloader, 'batch_size') and \ + calib_sampling_size % dataloader.batch_size != 0: + iter = self.quantize_config['calib_iteration'] + logger.warning( + "Please note that calibration sampling size {} " \ + "isn't divisible exactly by batch size {}. " \ + "So the real sampling size is {}.". + format(calib_sampling_size, dataloader.batch_size, + dataloader.batch_size * iter)) + q_layers = [] + for idx, layer in enumerate(self.fp32_layers): + layer_config = layer["config"] + if layer["class_name"] in ["Conv2D", "Dense"] and \ + layer['config']['name'] in self.quantize_config['op_wise_config']: + op_config = self.quantize_config['op_wise_config'][layer['config']['name']] + mode = 'per_channel' if op_config[0] else 'per_tensor' + #(TODO) support asym/sym + fake_quant_name = 'fake_quant_' + str(idx) + q_layers.append({'class_name': 'FakeQuant', + 'config': {'mode': 'per_tensor', 'name': fake_quant_name}}) + q_layers.append(layer) + else: + q_layers.append(layer) + + keras_object = model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + json_model['config']['layers'] = q_layers + quantized_model = self._restore_model_from_json(json_model) + + converted_model = self._calibrate(quantized_model, dataloader, + self.quantize_config['calib_iteration']) + + from neural_compressor.model.keras_model import KerasModel + converted_model = KerasModel(converted_model) + return converted_model + + def _calibrate(self, model, dataloader, calib_interation): + # run eagerly to fetch the numpy min/max + model.compile(run_eagerly=True) + results = {} + for idx, (inputs, labels) in enumerate(dataloader): + outputs = model.predict_on_batch(inputs) + json_model = copy.deepcopy(json.loads(model.to_json())) + config = json_model["config"] + layers = config["layers"] + for layer in layers: + if layer['class_name'] == 'FakeQuant': + min_value = layer['config']['min_value'] + max_value = layer['config']['max_value'] + if layer['config']['name'] not in results: + results[layer['config']['name']] = { + 'min': [min_value], 'max': [max_value]} + else: + results[layer['config']['name']]['min'].append(min_value) + results[layer['config']['name']]['max'].append(max_value) + if idx + 1 == calib_interation: + break + + # insert the calibrated min/max to Q/DQ + json_model = copy.deepcopy(json.loads(model.to_json())) + config = json_model["config"] + layers = config["layers"] + q_layers = [] + for layer in layers: + layer_config = copy.deepcopy(layer['config']) + if layer['class_name'] == 'FakeQuant': + min_value = min(results[layer['config']['name']]['min']) + max_value = max(results[layer['config']['name']]['max']) + q_layers.append({'class_name': 'Quantize', + 'config': {'min_range': min_value, + 'max_range': max_value, + }}) + q_layers.append({'class_name': 'DeQuantize', + 'config': {'min_range': min_value, + 'max_range': max_value, + }}) + elif layer['class_name'] == 'Conv2D' or layer['class_name'] == 'Dense': + # index 0 is weight, index 1 is bias + q_layer_name = 'Q' + layer['class_name'] + kernel = self.layer_weights[layer['config']['name']][0] + layer_config['min_value'] = str(kernel.min()) + layer_config['max_value'] = str(kernel.max()) + q_layers.append({'class_name': q_layer_name, 'config': layer_config}) + else: + q_layers.append(layer) + + json_model['config']['layers'] = q_layers + quantized_model = self._restore_model_from_json(json_model) + return quantized_model + + def _restore_model_from_json(self, json_model): + from tensorflow.keras.models import model_from_json + custom_objects = {} + # We need to keep a dictionary of custom objects as our quantized library + # is not recognized by keras. + custom_objects = _add_supported_quantized_objects(custom_objects) + qmodel = model_from_json(json.dumps(json_model), custom_objects=custom_objects) + qmodel = self._set_weights(qmodel, self.layer_weights) + return qmodel + + # set fp32 weights to qmodel + def _set_weights(self, qmodel, layer_weights): + for qlayer in qmodel.layers: + if qlayer.get_weights(): + if qlayer.name in layer_weights: + qlayer.set_weights(layer_weights[qlayer.name]) + else: + hit_layer = False + for sub_layer in qlayer.submodules: + if sub_layer.name in layer_weights: + qlayer.set_weights(layer_weights[sub_layer.name]) + hit_layer = True + break + if not hit_layer: + raise ValueError('Can not match the module weights....') + return qmodel + + @dump_elapsed_time(customized_msg="Model inference") + def evaluate(self, model, dataloader, postprocess=None, + metrics=None, measurer=None, iteration=-1, + tensorboard=False, fp32_baseline=False): + '''The function is used to run evaluation on validation dataset. + + Args: + model (object): The model to do calibration. + dataloader (generator): generate the data and labels. + postprocess (object, optional): process the result from the model + metric (object, optional): Depends on model category. Defaults to None. + measurer (object, optional): for precise benchmark measurement. + iteration(int, optional): control steps of mini-batch + tensorboard (boolean, optional): for tensorboard inspect tensor. + fp32_baseline (boolen, optional): only for compare_label=False pipeline + ''' + # use keras object + keras_model = model.model + logger.info("Start to evaluate the Keras model.") + results = [] + for idx, (inputs, labels) in enumerate(dataloader): + # use predict on batch + if measurer is not None: + measurer.start() + predictions = keras_model.predict_on_batch(inputs) + measurer.end() + else: + predictions = keras_model.predict_on_batch(inputs) + + if self.fp32_preds_as_label: + self.fp32_results.append(predictions) if fp32_baseline else \ + results.append(predictions) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + if metrics: + for metric in metrics: + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + if idx + 1 == iteration: + break + return results + + def query_fw_capability(self, model): + '''The function is used to return framework tuning capability. + + Args: + model (object): The model to query quantization tuning capability. + ''' + self.pre_optimized_model = model + fp32_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} + int8_type = self.query_handler.get_op_types_by_precision(precision='int8') + op_capability = self.query_handler.get_quantization_capability() + conv_config = copy.deepcopy(op_capability['int8']['Conv2D']) + dense_config = copy.deepcopy(op_capability['int8']['Dense']) + other_config = copy.deepcopy(op_capability['int8']['default']) + + # get the layers info + keras_object = model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + config = json_model["config"] + self.fp32_layers = config["layers"] + + # get fp32 layer weights + self.layer_weights = {} + for layer in keras_object.layers: + if layer.get_weights(): + self.layer_weights[layer.name] = copy.deepcopy(layer.get_weights()) + + quantizable_op_details = OrderedDict() + for details in self.fp32_layers: + node_op = details['class_name'] + node_name = details['config']['name'] + if node_op == 'Conv2D': + quantizable_op_details[(node_name, node_op)] = [conv_config, fp32_config] + elif node_op == 'Dense': + quantizable_op_details[(node_name, node_op)] = [dense_config, fp32_config] + else: + quantizable_op_details[(node_name, node_op)] = [fp32_config] + + capability = { + 'opwise': copy.deepcopy(quantizable_op_details), + 'optypewise': self.get_optype_wise_ability(quantizable_op_details), + } + logger.debug("Dump framework quantization capability:") + logger.debug(capability) + + return capability + + def get_optype_wise_ability(self, quantizable_op_details): + """Get the op type wise capability by generating the union value of each op type. + Returns: + [string dict]: the key is op type while the value is the + detail configurations of activation and weight for this op type. + """ + res = OrderedDict() + for op in quantizable_op_details: + if op[1] not in res: + res[op[1]] = {'activation': quantizable_op_details[op][0]['activation']} + if 'weight' in quantizable_op_details[op][0]: + res[op[1]]['weight'] = quantizable_op_details[op][0]['weight'] + return res + + def inspect_tensor(self, model, dataloader, op_list=[], iteration_list=[], + inspect_type='activation', save_to_disk=False): + '''The function is used by tune strategy class for dumping tensor info. + + Args: + model (object): The model to inspect. + dataloader (object): The dataloader used to feed into. + op_list (list): The op name in the fp32 model for dumpping. + iteration_list (list): The iteration list containing iterations to dump. + inspect_type (str): The valid value are 'weight', 'activation', 'all'. + save_to_disk (bool): Save to disk or memory. + + Return: + Numpy Array Dict + { + 'weight': { + 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, + 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, + ... + }, + 'activation': [ + # iter 0 + { + 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} + 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} + ... + }, + # iter 1 + ... + ] + } + ''' + pass + + def set_tensor(self, model, tensor_dict): + '''The function is used by tune strategy class for setting tensor back to model. + + Args: + model (object): The model to set tensor. Usually it is quantized model. + tensor_dict (dict): The tensor dict to set. Note the numpy array contains float + value, adaptor layer has the responsibility to quantize to + int8 or int32 to set into the quantized model if needed. + The dict format is something like: + { + 'weight0_name': numpy.array, + 'bias0_name': numpy.array, + ... + } + ''' + pass + + def quantize_input(self, model): + ''' quantize the model to be able to take quantized input + + Args: + model (object): The model to quantize input + + Return: + model (object): The quantized input model + scale (float): The scale for dataloader to generate quantized input + ''' + return model, 1. + + def _pre_eval_hook(self, model, *args, **kwargs): + '''The function is used to do some preprocession before evaluation phase. + + Return: + model + ''' + return model + + def _post_eval_hook(self, model, *args, **kwargs): + '''The function is used to do some post process after complete evaluation. + ''' + pass + + def save(self, model, path): + '''The function is used by tune strategy class for saving model. + + Args: + model (object): The model to saved. + path (string): The path where to save. + ''' + model.save(path) + + def convert(self, model, source, destinatin): + '''The function is used to convert a source model format to another. + + Args: + model (neural_compressor.model): base model to be converted. + source (string): The source model format. + destination (string): The destination model format. + ''' + pass + +class KerasQuery(QueryBackendCapability): + def __init__(self, local_config_file=None): + super().__init__() + self.version = tf.version.VERSION + self.cfg = local_config_file + self.cur_config = None + self._one_shot_query() + + def _one_shot_query(self): + with open(self.cfg) as f: + content = yaml.safe_load(f) + try: + self.cur_config = self._get_specified_version_cfg(content) + except Exception as e: + logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) + self.cur_config = None + raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". + format(self.cfg)) + + def _get_specified_version_cfg(self, data): + """Get the configuration for the current runtime. + If there's no matched configuration in the input yaml, we'll + use the `default` field of yaml. + + Args: + data (Yaml content): input yaml file. + + Returns: + [dictionary]: the content for specific version. + """ + default_config = None + for sub_data in data: + if sub_data['version']['name'] == self.version: + return sub_data + + if sub_data['version']['name'] == 'default': + default_config = sub_data + + return default_config + + def get_version(self): + """Get the current backend version infomation. + + Returns: + [string]: version string. + """ + return self.cur_config['version']['name'] + + def get_precisions(self): + """Get supported precisions for current backend. + + Returns: + [string list]: the precisions' name. + """ + return self.cur_config['precisions']['names'] + + def get_op_types(self): + """Get the supported op types by all precisions. + + Returns: + [dictionary list]: A list composed of dictionary which key is precision + and value is the op types. + """ + return self.cur_config['ops'] + + def get_quantization_capability(self): + """Get the supported op types' quantization capability. + + Returns: + [dictionary list]: A list composed of dictionary which key is precision + and value is a dict that describes all op types' quantization capability. + """ + return self.cur_config['capabilities'] + + def get_op_types_by_precision(self, precision): + """Get op types per precision + + Args: + precision (string): precision name + + Returns: + [string list]: A list composed of op type. + """ + assert precision in list(self.cur_config['ops'].keys()) + return self.cur_config['ops'][precision] diff --git a/neural_compressor/adaptor/keras.yaml b/neural_compressor/adaptor/keras.yaml new file mode 100644 index 00000000000..291eb43dc0d --- /dev/null +++ b/neural_compressor/adaptor/keras.yaml @@ -0,0 +1,69 @@ +## Copyright (c) 2021 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +# +- + version: + name: 'default' + + precisions: &common_precisions + names: int8, fp32 + valid_mixed_precisions: [] + + ops: &common_ops + int8: ['Conv2D', 'Dense'] + fp32: ['*'] # '*' means all op types + + capabilities: &common_capabilities + int8: { + 'Conv2D': { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + }, + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + } + }, + 'Dense': { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'], + }, + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'], + } + }, + 'default': { + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'] + } + }, + } diff --git a/neural_compressor/adaptor/keras_utils/__init__.py b/neural_compressor/adaptor/keras_utils/__init__.py new file mode 100644 index 00000000000..ed04d17bdbe --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/neural_compressor/adaptor/keras_utils/quantizer.py b/neural_compressor/adaptor/keras_utils/quantizer.py new file mode 100644 index 00000000000..76742001ded --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/quantizer.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import numpy as np +import tensorflow as tf + +from tensorflow.python.eager import context +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras import activations +from tensorflow.python.keras import backend +from tensorflow.python.keras import constraints +from tensorflow.python.keras import initializers +from tensorflow.python.keras import regularizers +from tensorflow.python.keras.engine.input_spec import InputSpec +from tensorflow.python.keras.utils import conv_utils +from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.util.tf_export import keras_export + +from tensorflow.keras.layers import Layer +from tensorflow.python.keras.layers.convolutional import Conv +from tensorflow.python.keras.layers.core import Dense + +class FakeQuant(Layer): + def __init__(self, mode='per_tensor', **kwargs): + super(FakeQuant, self).__init__(**kwargs) + self.mode = mode + self.axis = 1 if mode == 'per_channel' else 0 + self.min_value = tf.constant(np.finfo(np.float32).max, dtype=tf.float32) + self.max_value = tf.constant(np.finfo(np.float32).min, dtype=tf.float32) + + def call(self, inputs): + if self.mode == 'per_tensor': + self.min_value = tf.math.reduce_min(inputs) + self.max_value = tf.math.reduce_max(inputs) + else: + self.min_value = tf.math.reduce_min(inputs, axis=self.axis) + self.max_value = tf.math.reduce_max(inputs, axis=self.axis) + return inputs + + @classmethod + def from_config(cls, config): + return cls(**config) + + def get_config(self): + return {'mode': self.mode, + 'min_value': self.min_value.numpy(), + 'max_value': self.max_value.numpy(), + 'name': self.name} + +class Quantize(Layer): + def __init__(self, min_range, max_range, T=tf.qint8, mode='SCALED', + round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, axis=None): + super(Quantize, self).__init__() + self.min_range = float(min_range) + self.max_range = float(max_range) + self.T = T + self.mode = mode + self.round_mode = round_mode + self.narrow_range = narrow_range + self.axis = axis + + def call(self, inputs): + outputs, _, _ = tf.quantization.quantize(inputs, self.min_range, + self.max_range, self.T, + mode=self.mode, round_mode=self.round_mode, + narrow_range=self.narrow_range, axis=self.axis) + return outputs + + def get_config(self): + return {'min_range': self.min_range, 'max_range': self.max_range, + 'T': self.T, 'mode': self.mode, 'round_mode': self.round_mode, + 'narrow': self.narrow_range, 'axis': self.axis} + + @classmethod + def from_config(cls, config): + return cls(**config) + +class QConv2D(Conv): + def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', + data_format=None, dilation_rate=(1, 1), groups=1, activation=None, + use_bias=True, kernel_initializer='glorot_uniform', + bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, bias_constraint=None, + min_value=-10000, max_value=10000, **kwargs): + super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, + strides=strides, padding=padding, data_format=data_format, + dilation_rate=dilation_rate, groups=groups, + activation=activations.get(activation), + use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), **kwargs) + self.weight_quantizer = Quantize(float(min_value), float(max_value)) + self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) + + def call(self, inputs): + input_shape = inputs.shape + + if self._is_causal: # Apply causal padding to inputs for Conv1D. + inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) + + # add the Q/DQ here + kernel = self.weight_quantizer(self.kernel) + kernel = self.weight_dequantizer(kernel) + outputs = self._convolution_op(inputs, kernel) + + if self.use_bias: + output_rank = outputs.shape.rank + if self.rank == 1 and self._channels_first: + # nn.bias_add does not accept a 1D input tensor. + bias = array_ops.reshape(self.bias, (1, self.filters, 1)) + outputs += bias + else: + # Handle multiple batch dimensions. + if output_rank is not None and output_rank > 2 + self.rank: + + def _apply_fn(o): + return nn.bias_add(o, self.bias, data_format=self._tf_data_format) + + outputs = conv_utils.squeeze_batch_dims( + outputs, _apply_fn, inner_rank=self.rank + 1) + else: + outputs = nn.bias_add( + outputs, self.bias, data_format=self._tf_data_format) + + if not context.executing_eagerly(): + # Infer the static output shape: + out_shape = self.compute_output_shape(input_shape) + outputs.set_shape(out_shape) + + if self.activation is not None: + return self.activation(outputs) + return outputs + +class QDense(Dense): + def __init__(self, + units, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + min_value=-10000, + max_value=10000, + **kwargs): + super(QDense, self).__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs) + self.weight_quantizer = Quantize(float(min_value), float(max_value)) + self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) + + def call(self, inputs): + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = math_ops.cast(inputs, dtype=self._compute_dtype_object) + + # add the Q/DQ here + # (TODO) we have not try sparse dense and may have issues + kernel = self.weight_quantizer(self.kernel) + kernel = self.weight_dequantizer(kernel) + rank = inputs.shape.rank + if rank == 2 or rank is None: + # We use embedding_lookup_sparse as a more efficient matmul operation for + # large sparse input tensors. The op will result in a sparse gradient, as + # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense + # gradients. This can lead to sigfinicant speedups, see b/171762937. + if isinstance(inputs, sparse_tensor.SparseTensor): + # We need to fill empty rows, as the op assumes at least one id per row. + inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) + # We need to do some munging of our input to use the embedding lookup as + # a matrix multiply. We split our input matrix into separate ids and + # weights tensors. The values of the ids tensor should be the column + # indices of our input matrix and the values of the weights tensor + # can continue to the actual matrix weights. + # The column arrangement of ids and weights + # will be summed over and does not matter. See the documentation for + # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation + # of the inputs to both ops. + ids = sparse_tensor.SparseTensor( + indices=inputs.indices, + values=inputs.indices[:, 1], + dense_shape=inputs.dense_shape) + weights = inputs + outputs = embedding_ops.embedding_lookup_sparse_v2( + kernel, ids, weights, combiner='sum') + else: + outputs = gen_math_ops.MatMul(a=inputs, b=kernel) + # Broadcast kernel to inputs. + else: + outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]]) + # Reshape the output back to the original ndim of the input. + if not context.executing_eagerly(): + shape = inputs.shape.as_list() + output_shape = shape[:-1] + [kernel.shape[-1]] + outputs.set_shape(output_shape) + + if self.use_bias: + outputs = nn_ops.bias_add(outputs, self.bias) + + if self.activation is not None: + outputs = self.activation(outputs) + return outputs + + +class DeQuantize(Layer): + def __init__(self, min_range, max_range, mode='SCALED', + narrow_range=False, axis=None): + super(DeQuantize, self).__init__() + self.min_range = min_range + self.max_range = max_range + self.mode = mode + self.narrow_range = narrow_range + self.axis = axis + + def call(self, inputs): + return tf.quantization.dequantize(inputs, float(self.min_range), + float(self.max_range), mode=self.mode, + narrow_range=self.narrow_range, axis=self.axis) + def get_config(self): + return {'min_range': self.min_range, 'max_range': self.max_range, + 'mode': self.mode, 'narrow': self.narrow_range, 'axis': self.axis, + 'dtype': self.dtype} + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/neural_compressor/adaptor/mxnet_utils/util.py b/neural_compressor/adaptor/mxnet_utils/util.py index 2808103c565..4aad408179c 100644 --- a/neural_compressor/adaptor/mxnet_utils/util.py +++ b/neural_compressor/adaptor/mxnet_utils/util.py @@ -24,7 +24,7 @@ from enum import Enum from tempfile import TemporaryDirectory from neural_compressor.utils.utility import LazyImport -from neural_compressor.model.model import MXNetModel as NCModel +from neural_compressor.model.mxnet_model import MXNetModel as NCModel mx = LazyImport("mxnet") @@ -126,16 +126,16 @@ def fuse(sym_model, ctx): assert isinstance(sym_model, tuple) and isinstance(sym_model[0], mx.symbol.Symbol) symnet, args, auxs = sym_model - backend = get_backend_name(ctx) - if backend is not None: + framework = get_framework_name(ctx) + if framework is not None: if check_mx_version('2.0.0'): - symnet = symnet.optimize_for(backend) + symnet = symnet.optimize_for(framework) else: - symnet = symnet.get_backend_symbol(backend) + symnet = symnet.get_backend_symbol(framework) return (symnet, args, auxs) -def get_backend_name(ctx): +def get_framework_name(ctx): if 'cpu' in ctx.device_type: if check_mx_version('2.0.0'): return 'ONEDNN_QUANTIZE' diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index 553169e0cf9..a0df0445556 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -27,11 +27,13 @@ from importlib.util import find_spec from neural_compressor.adaptor.adaptor import adaptor_registry, Adaptor from neural_compressor.adaptor.query import QueryBackendCapability +from neural_compressor.adaptor.ox_utils.util import PROVIDERS, ONNXRT_BACKENDS from neural_compressor.utils.utility import LazyImport, dump_elapsed_time, \ GLOBAL_STATE, MODE from neural_compressor.utils.utility import Statistics from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader from neural_compressor.conf.dotdict import deep_get +from neural_compressor.utils.utility import CpuInfo import math import sys @@ -43,7 +45,8 @@ logger = logging.getLogger("neural_compressor") -class ONNXRTAdaptor(Adaptor): +@adaptor_registry +class ONNXRUNTIMEAdaptor(Adaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. Args: @@ -57,24 +60,62 @@ def __init__(self, framework_specific_info): self.device = framework_specific_info["device"] self.static = framework_specific_info["approach"] == "post_training_static_quant" self.dynamic = framework_specific_info["approach"] == "post_training_dynamic_quant" - self.backend = framework_specific_info["backend"] + self.backend = PROVIDERS[framework_specific_info["backend"]] + + if self.backend not in ort.get_all_providers(): + logger.warning("{} backend is not supported in current environment, " + "supported backends: {}".format(ONNXRT_BACKENDS[self.backend], + [ONNXRT_BACKENDS[i] for i in ort.get_all_providers()])) + + if (not self.dynamic and "format" in framework_specific_info and \ + framework_specific_info["format"].lower() == 'qdq') or \ + self.backend == 'TensorrtExecutionProvider': + self.query_handler = ONNXRTQuery(local_config_file=os.path.join( + os.path.dirname(__file__), "onnxrt_qdq.yaml")) + self.format = "qdq" + else: + if not self.dynamic: + self.query_handler = ONNXRTQuery(local_config_file=os.path.join( + os.path.dirname(__file__), "onnxrt_qlinear.yaml")) + self.format = "qlinearops" + else: + self.query_handler = ONNXRTQuery(local_config_file=os.path.join( + os.path.dirname(__file__), "onnxrt_integer.yaml")) + self.format = "integerops" + if "format" in framework_specific_info and \ + framework_specific_info["format"].lower() == 'qdq': + logger.warning("Dynamic approach doesn't support QDQ format.") + self.work_space = framework_specific_info["workspace_path"] self.graph_optimization = framework_specific_info["graph_optimization"] self.recipes = deep_get(framework_specific_info, 'recipes', {}) self.reduce_range = framework_specific_info["reduce_range"] if \ - "reduce_range" in framework_specific_info else None + "reduce_range" in framework_specific_info else not CpuInfo().vnni self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) os.makedirs(self.work_space, exist_ok=True) self.pre_optimized_model = None self.quantizable_op_types = [] self.query_handler_ext = None + if framework_specific_info["approach"] == "post_training_auto_quant" and \ + self.format != "integerops": + self.query_handler_ext = ONNXRTQuery(local_config_file=os.path.join( + os.path.dirname(__file__), "onnxrt_integer.yaml")) + for precision in self.query_handler.get_precisions(): if precision != 'fp32': - if self.device == 'cpu' and precision == 'fp16': - continue self.quantizable_op_types += \ self.query_handler.get_op_types_by_precision(precision=precision) + if self.backend == 'TensorrtExecutionProvider': + from neural_compressor import options + options.onnxrt.qdq_setting.AddQDQPairToWeight = True + options.onnxrt.qdq_setting.DedicatedQDQPair = True + options.onnxrt.graph_optimization.level = 'DISABLE_ALL' + options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin = \ + ['Conv', 'Gemm', 'Add', 'MatMul'] + self.static = True + self.dynamic = False + self.evaluate_nums = 0 self.fp32_results = [] @@ -109,17 +150,13 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): if model.model.opset_import[0].version < 11: # pragma: no cover logger.warning("Quantize input needs model opset 11 or newer.") from neural_compressor.adaptor.ox_utils.util import QuantizationMode - if self.backend in ["qlinearops", "qoperator"]: - backend = QuantizationMode.QLinearOps - if self.backend == "qlinearops": - logger.warning("onnxrt_qlinearops uses the same model representation format as " - "onnxrt_qoperator. Recommended to use onnxrt_qoperator to align " - "with ONNXRUNTIME QuantFormat") - elif self.backend == "qdq": + if self.format == "qlinearops": + format = QuantizationMode.QLinearOps + elif self.format == "qdq": assert ort_version >= ONNXRT170_VERSION, 'QDQ mode needs onnxruntime1.7.0 or newer' - backend = "qdq" + format = "qdq" else: - backend = QuantizationMode.IntegerOps + format = QuantizationMode.IntegerOps self.quantizable_ops = self._query_quantizable_ops(model.model) tmp_model = copy.deepcopy(model) @@ -154,7 +191,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): format(batch_size, 1)) data_loader.batch(1) quantize_params = self._get_quantize_params(tmp_model, data_loader, \ - quantize_config, calib_sampling_size) + quantize_config, calib_sampling_size) else: # pragma: no cover if hasattr(data_loader, 'batch_size') and \ calib_sampling_size % data_loader.batch_size != 0: @@ -165,14 +202,14 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): format(calib_sampling_size, data_loader.batch_size, data_loader.batch_size * iterations)) quantize_params = self._get_quantize_params(tmp_model, data_loader, \ - quantize_config, iterations) + quantize_config, iterations) else: quantize_params = None self.quantize_params = quantize_params from neural_compressor.adaptor.ox_utils.quantizer import Quantizer quantizer = Quantizer(copy.deepcopy(model), quantize_config, - backend, + format, self.static, quantize_params, self.quantizable_op_types, @@ -204,7 +241,8 @@ def _generate_qconfig(self, model, tune_cfg, quantize_params): fwk_info = {} fwk_info['approach'] = "post_training_static_quant" if self.static else \ "post_training_dynamic_quant" - fwk_info['backend'] = self.backend + fwk_info['format'] = self.format + fwk_info['backend'] = ONNXRT_BACKENDS[self.backend] fwk_info['workspace_path'] = self.work_space fwk_info['graph_optimization'] = self.graph_optimization fwk_info['device'] = self.device @@ -232,20 +270,20 @@ def recover(self, model, q_config): logger.warning("Quantize input needs model opset 11 or newer.") from neural_compressor.adaptor.ox_utils.util import QuantizationMode - if self.backend in ["qlinearops", "qoperator"]: - backend = QuantizationMode.QLinearOps - elif self.backend == "qdq": + if self.format in ["qlinearops"]: + format = QuantizationMode.QLinearOps + elif self.format == "qdq": assert ort_version >= ONNXRT170_VERSION, 'QDQ mode needs onnxruntime1.7.0 or newer' - backend = self.backend + format = self.format else: - backend = QuantizationMode.IntegerOps + format = QuantizationMode.IntegerOps from neural_compressor.adaptor.ox_utils.quantizer import Quantizer self.quantizable_ops = self._query_quantizable_ops(model.model) quantize_params, tune_cfg = self._parse_qconfig(q_config) quantize_config = self._cfg_to_quantize_config(tune_cfg) quantizer = Quantizer(model.model, quantize_config, - backend, + format, self.static, quantize_params, self.quantizable_op_types, @@ -339,9 +377,10 @@ def _get_quantize_params(self, model, data_loader, quantize_config, iterations): augment = ONNXRTAugment(model, \ data_loader, self.quantizable_op_types, \ black_nodes=black_nodes, white_nodes=white_nodes, \ - iterations=list(range(0, quantize_config['calib_iteration']))) + iterations=list(range(0, quantize_config['calib_iteration'])), + backend=self.backend, reduce_range=self.reduce_range) self.min_max = augment.dump_minmax() - quantize_params = augment.dump_calibration() + quantize_params = augment.dump_calibration(quantize_config) return quantize_params def inspect_tensor(self, model, dataloader, op_list=[], @@ -362,7 +401,8 @@ def inspect_tensor(self, model, dataloader, op_list=[], op_list = [item[0] for item in op_list] augment = ONNXRTAugment(model, dataloader, [], \ iterations=iteration_list, - white_nodes=op_list) + white_nodes=op_list, + backend=self.backend) tensors = augment.dump_tensor(activation=(inspect_type!='weight'), weight=(inspect_type!='activation')) if save_to_disk: @@ -460,9 +500,13 @@ def _pre_optimize(self, model, level=1): from onnxruntime_extensions import get_library_path sess_options.register_custom_ops_library(get_library_path()) if not model.large_size: - ort.InferenceSession(model.model.SerializeToString(), sess_options) + ort.InferenceSession(model.model.SerializeToString(), + sess_options, + providers=[self.backend]) elif model.model_path is not None: # pragma: no cover - ort.InferenceSession(model.model_path, sess_options) + ort.InferenceSession(model.model_path, + sess_options, + providers=[self.backend]) else: # pragma: no cover logger.warning('Please use model path instead of onnx model object to quantize') @@ -475,10 +519,44 @@ def _pre_optimize(self, model, level=1): if self.graph_optimization.gemm2matmul else tmp_model model.model = self._rename_node(model.model) model = self._revert_fusedconv(model) + if self.backend == 'TensorrtExecutionProvider': + model = self._revert_conv_add_fusion(model) model = split_shared_bias(model) model.topological_sort() self.pre_optimized_model = copy.deepcopy(model) + def _revert_conv_add_fusion(self, model): + from onnx import numpy_helper + from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg + add_nodes = [] + remove_nodes = [] + for node in model.model.graph.node: + if node.op_type == 'Conv' and len(node.input) == 3: + bias_tensor = model.get_initializer(node.input[2]) + bias_array = numpy_helper.to_array(bias_tensor).reshape((-1, 1, 1)) + model.remove_initializer(bias_tensor) + model.add_initializer(numpy_helper.from_array(bias_array, bias_tensor.name)) + kwargs = {} + activation_params = None + for attr in node.attribute: + kwargs.update(attribute_to_kwarg(attr)) + conv = onnx.helper.make_node( + 'Conv', + node.input[0:2], + [node.name + '_revert'], + node.name, **kwargs) + add = onnx.helper.make_node( + 'Add', + [conv.output[0], node.input[2]], + node.output, + node.name + '_add') + add_nodes.extend([conv, add]) + + model.remove_nodes(remove_nodes) + model.add_nodes(add_nodes) + model.update() + return model + def _revert_fusedconv(self, model): from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg from onnx import onnx_pb as onnx_proto @@ -630,28 +708,32 @@ def query_fw_capability(self, model): precisions = query.get_precisions() for precision in precisions: - if precision == 'fp16' and self.device == 'cpu' and os.getenv('FORCE_FP16') != '1': - continue - if precision in query.get_quantization_capability(): - special_config_types = list(query.get_quantization_capability() \ - [precision].keys()) - default_config = query.get_quantization_capability() \ - [precision]['default'] - else: - special_config_types = {} - default_config = {'weight': {'dtype': precision}, - 'activation': {'dtype': precision}} + # get supported optype for target precision optypes = query.get_op_types_by_precision(precision) if \ query.get_op_types_by_precision(precision) != ['*'] else \ optype_wise.keys() + + if self.backend in query.get_quantization_capability(): + configs = query.get_quantization_capability()[self.backend] if \ + precision in query.get_quantization_capability() else \ + {'default': {'weight': {'dtype': precision}, 'activation': {'dtype': precision}}} + else: + continue + + if self.backend == 'TensorrtExecutionProvider' and \ + precision not in query.get_fallback_list(): + optypes.append('Add') + for op in optypes: if op not in quantizable_optype: continue - if op not in special_config_types: - op_capability = copy.deepcopy(default_config) + if op not in configs: + if 'default' in configs: + op_capability = copy.deepcopy(configs['default']) + else: + continue else: - op_capability = copy.deepcopy( - query.get_quantization_capability()[precision][op]) + op_capability = copy.deepcopy(configs[op]) if precision in ['int8', 'uint8']: if self.static: @@ -694,6 +776,14 @@ def query_fw_capability(self, model): all_conv_matmul.append(node) for _, node in enumerate(self.pre_optimized_model.nodes()): + # for TRT EP, only insert Q/DQ to inputs of Add nodes followed by ReduceMean + if node.op_type == 'Add' and self.backend == 'TensorrtExecutionProvider': + children = self.pre_optimized_model.get_children(node) + if 'ReduceMean' not in [i.op_type for i in children]: + op_wise.update({(node.name, node.op_type): + [{'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}]}) + continue + if node.op_type in optype_wise: if (exclude_first_quantizable_op and node.name in first_quantizable_node) \ or (exclude_last_quantizable_op and node.name in last_quantizable_node): @@ -745,6 +835,8 @@ def _cfg_to_quantize_config(self, tune_cfg): from onnx import onnx_pb as onnx_proto for _, op in enumerate(self.quantizable_ops): + if (op.name, op.op_type) not in tune_cfg['op']: + continue if tune_cfg['op'][(op.name, op.op_type)]['activation']['dtype'] in \ self.query_handler.get_fallback_list(): quantize_config[op.name] = \ @@ -805,6 +897,8 @@ def evaluate(self, input_graph, dataloader, postprocess=None, location="weights.pb", convert_attribute=False) sess_options = ort.SessionOptions() + if self.backend == 'TensorrtExecutionProvider': + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL if measurer: # https://github.com/microsoft/onnxruntime/issues/7347 cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) @@ -813,9 +907,12 @@ def evaluate(self, input_graph, dataloader, postprocess=None, if sys.version_info < (3,10) and find_spec('onnxruntime_extensions'): # pragma: no cover from onnxruntime_extensions import get_library_path sess_options.register_custom_ops_library(get_library_path()) - session = ort.InferenceSession(self.work_space + 'eval.onnx', sess_options) if \ - input_graph.large_size else \ - ort.InferenceSession(input_graph.model.SerializeToString(), sess_options) + session = ort.InferenceSession(self.work_space + 'eval.onnx', + sess_options, + providers=[self.backend]) if input_graph.large_size else \ + ort.InferenceSession(input_graph.model.SerializeToString(), + sess_options, + providers=[self.backend]) results = [] if metrics: for metric in metrics: @@ -900,10 +997,10 @@ def eval_func(dataloader): def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None): from neural_compressor.utils.utility import dump_data_to_local from neural_compressor.adaptor.ox_utils.util import find_by_name - if self.backend in ["qlinearops", "qoperator"]: + if self.format == "qlinearops": supported_optype = ['Conv', 'MatMul', 'Concat', 'Attention', 'FusedConv', 'Add', 'Mul', 'LeakyRelu', 'Sigmoid', 'GlobalAveragePool', 'AveragePool'] - elif self.backend == "qdq": + elif self.format == "qdq": supported_optype = ['Conv', 'MatMul', 'Concat', 'Attention', 'FusedConv', 'LeakyRelu', 'Sigmoid', 'GlobalAveragePool', 'AveragePool'] else: @@ -937,24 +1034,7 @@ def save(self, model, path): @adaptor_registry -class ONNXRT_QLinearOpsAdaptor(ONNXRTAdaptor): - """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. - - Args: - framework_specific_info (dict): framework specific configuration for quantization. - """ - - def __init__(self, framework_specific_info): - self.query_handler = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_qlinear.yaml")) - self.backend = "qlinearops" - super().__init__(framework_specific_info) - if framework_specific_info["approach"] == "post_training_auto_quant": - self.query_handler_ext = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_integer.yaml")) - -@adaptor_registry -class ONNXRT_QOperatorAdaptor(ONNXRTAdaptor): +class ONNXRT_QLinearOpsAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. Args: @@ -962,13 +1042,10 @@ class ONNXRT_QOperatorAdaptor(ONNXRTAdaptor): """ def __init__(self, framework_specific_info): - self.query_handler = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_qlinear.yaml")) - self.backend = "qlinearops" super().__init__(framework_specific_info) @adaptor_registry -class ONNXRT_IntegerOpsAdaptor(ONNXRTAdaptor): +class ONNXRT_IntegerOpsAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. Args: @@ -976,13 +1053,10 @@ class ONNXRT_IntegerOpsAdaptor(ONNXRTAdaptor): """ def __init__(self, framework_specific_info): - self.query_handler = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_integer.yaml")) - self.backend = "integerops" super().__init__(framework_specific_info) @adaptor_registry -class ONNXRT_QDQAdaptor(ONNXRTAdaptor): +class ONNXRT_QDQAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. Args: @@ -990,13 +1064,7 @@ class ONNXRT_QDQAdaptor(ONNXRTAdaptor): """ def __init__(self, framework_specific_info): - self.query_handler = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_qdq.yaml")) - self.backend = "qdq" super().__init__(framework_specific_info) - if framework_specific_info["approach"] == "post_training_auto_quant": - self.query_handler_ext = ONNXRTQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "onnxrt_integer.yaml")) class ONNXRTQuery(QueryBackendCapability): diff --git a/neural_compressor/adaptor/onnxrt_integer.yaml b/neural_compressor/adaptor/onnxrt_integer.yaml index c7321f4bc25..1c3e6f646c0 100644 --- a/neural_compressor/adaptor/onnxrt_integer.yaml +++ b/neural_compressor/adaptor/onnxrt_integer.yaml @@ -84,6 +84,8 @@ } } } + CPUExecutionProvider: *ref_1_6 + CUDAExecutionProvider: *ref_1_6 graph_optimization: &default_optimization # from onnxruntime graph_optimization_level level: 'ENABLE_EXTENDED' # choices are ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL'] @@ -98,7 +100,7 @@ <<: *common_ops capabilities: - int8: { + int8: &ref_1_8 { 'Conv': { 'weight': { 'dtype': ['uint8'], @@ -156,6 +158,8 @@ } } } + CPUExecutionProvider: *ref_1_8 + CUDAExecutionProvider: *ref_1_8 graph_optimization: <<: *default_optimization @@ -170,7 +174,7 @@ <<: *common_ops capabilities: - int8: { + int8: &ref_1_9 { 'Conv': { 'weight': { 'dtype': ['uint8'], @@ -242,6 +246,8 @@ } } } + CPUExecutionProvider: *ref_1_9 + CUDAExecutionProvider: *ref_1_9 graph_optimization: <<: *default_optimization diff --git a/neural_compressor/adaptor/onnxrt_qdq.yaml b/neural_compressor/adaptor/onnxrt_qdq.yaml index 596ecb87e18..33d3ba90ba9 100644 --- a/neural_compressor/adaptor/onnxrt_qdq.yaml +++ b/neural_compressor/adaptor/onnxrt_qdq.yaml @@ -24,39 +24,39 @@ ops: &common_ops int8: ['Conv', 'MatMul', 'Attention', 'Relu', 'Clip', 'LeakyRelu', 'Gather', 'Sigmoid', 'MaxPool', 'EmbedLayerNormalization', - 'FusedConv', 'GlobalAveragePool', 'Pad', 'Split', 'Add'] + 'FusedConv', 'GlobalAveragePool', 'Pad', 'Split'] fp32: ['*'] # '*' means all op types capabilities: &common_capabilities int8: &ref_1_7 { - 'Conv': &key_1_7_0 { - 'weight': { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax'] - }, - 'activation': { - 'dtype': ['uint8'], - 'scheme': ['asym'], - 'granularity': ['per_tensor'], - 'algorithm': ['minmax'] - } - }, - 'Gather': { - 'weight': { - 'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['minmax'], - 'granularity': ['per_channel', 'per_tensor'], - }, - 'activation': { - 'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['minmax'], - 'granularity': ['per_tensor'], - } - }, + 'Conv': &key_1_7_0 { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax'] + }, + 'activation': { + 'dtype': ['uint8'], + 'scheme': ['asym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + } + }, + 'Gather': { + 'weight': { + 'dtype': ['uint8'], + 'scheme': ['asym'], + 'algorithm': ['minmax'], + 'granularity': ['per_channel', 'per_tensor'], + }, + 'activation': { + 'dtype': ['uint8'], + 'scheme': ['asym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'], + } + }, 'default': { 'weight': { 'dtype': ['int8'], @@ -70,9 +70,51 @@ 'algorithm': ['minmax'], 'granularity': ['per_tensor'] } - }, - } - + } + } + CPUExecutionProvider: *ref_1_7 + CUDAExecutionProvider: *ref_1_7 + TensorrtExecutionProvider: { + 'Conv': &cap_s8_sym_default { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor', 'per_channel'], + 'algorithm': ['minmax'] + }, + 'activation': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + } + }, + 'MatMul': &cap_s8_sym_pertensor_default { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + }, + 'activation': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + } + }, + 'Attention': *cap_s8_sym_pertensor_default, + 'LeakyRelu': *cap_s8_sym_pertensor_default, + 'Gather': *cap_s8_sym_default, + 'Sigmoid': *cap_s8_sym_pertensor_default, + 'MaxPool': *cap_s8_sym_pertensor_default, + 'EmbedLayerNormalization': *cap_s8_sym_pertensor_default, + 'GlobalAveragePool': *cap_s8_sym_pertensor_default, + 'Pad': *cap_s8_sym_pertensor_default, + 'Split': *cap_s8_sym_pertensor_default, + 'Add': *cap_s8_sym_pertensor_default, + } + graph_optimization: &default_optimization # from onnxruntime graph_optimization_level level: 'ENABLE_EXTENDED' # choices are ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL'] @@ -91,8 +133,31 @@ fp32: ['*'] # '*' means all op types capabilities: - <<: *common_capabilities - + int8: *ref_1_7 + CPUExecutionProvider: *ref_1_7 + CUDAExecutionProvider: *ref_1_7 + TensorrtExecutionProvider: &ref_1_8 { + 'Conv': *cap_s8_sym_default, + 'MatMul': *cap_s8_sym_pertensor_default, + 'Attention': *cap_s8_sym_pertensor_default, + 'LeakyRelu': *cap_s8_sym_pertensor_default, + 'Gather': *cap_s8_sym_default, + 'Sigmoid': *cap_s8_sym_pertensor_default, + 'MaxPool': *cap_s8_sym_pertensor_default, + 'EmbedLayerNormalization': *cap_s8_sym_pertensor_default, + 'GlobalAveragePool': *cap_s8_sym_pertensor_default, + 'Pad': *cap_s8_sym_pertensor_default, + 'Split': *cap_s8_sym_pertensor_default, + 'Squeeze': *cap_s8_sym_pertensor_default, + 'Reshape': *cap_s8_sym_pertensor_default, + 'Concat': *cap_s8_sym_pertensor_default, + 'AveragePool': *cap_s8_sym_pertensor_default, + 'Unsqueeze': *cap_s8_sym_pertensor_default, + 'Transpose': *cap_s8_sym_pertensor_default, + 'Resize': *cap_s8_sym_pertensor_default, + 'Add': *cap_s8_sym_pertensor_default, + } + graph_optimization: <<: *default_optimization @@ -111,7 +176,7 @@ fp32: ['*'] # '*' means all op types capabilities: - int8: { + int8: &ref_1_10 { 'Conv': { 'weight': { 'dtype': ['int8'], @@ -169,7 +234,9 @@ } }, } - + CPUExecutionProvider: *ref_1_10 + CUDAExecutionProvider: *ref_1_10 + TensorrtExecutionProvider: *ref_1_7 graph_optimization: <<: *default_optimization @@ -190,7 +257,7 @@ fp32: ['*'] # '*' means all op types capabilities: - int8: { + int8: &ref_1_11 { 'Conv': { 'weight': { 'dtype': ['int8'], @@ -262,7 +329,31 @@ } }, } - + CPUExecutionProvider: *ref_1_11 + CUDAExecutionProvider: *ref_1_11 + TensorrtExecutionProvider: { + 'Conv': *cap_s8_sym_default, + 'MatMul': *cap_s8_sym_default, + 'Attention': *cap_s8_sym_pertensor_default, + 'LeakyRelu': *cap_s8_sym_pertensor_default, + 'Gather': *cap_s8_sym_default, + 'Sigmoid': *cap_s8_sym_pertensor_default, + 'MaxPool': *cap_s8_sym_pertensor_default, + 'EmbedLayerNormalization': *cap_s8_sym_pertensor_default, + 'GlobalAveragePool': *cap_s8_sym_pertensor_default, + 'Pad': *cap_s8_sym_pertensor_default, + 'Split': *cap_s8_sym_pertensor_default, + 'Squeeze': *cap_s8_sym_pertensor_default, + 'Reshape': *cap_s8_sym_pertensor_default, + 'Concat': *cap_s8_sym_pertensor_default, + 'AveragePool': *cap_s8_sym_pertensor_default, + 'Unsqueeze': *cap_s8_sym_pertensor_default, + 'Transpose': *cap_s8_sym_pertensor_default, + 'Resize': *cap_s8_sym_pertensor_default, + 'Gemm': *cap_s8_sym_default, + 'Add': *cap_s8_sym_pertensor_default, + } + graph_optimization: <<: *default_optimization diff --git a/neural_compressor/adaptor/onnxrt_qlinear.yaml b/neural_compressor/adaptor/onnxrt_qlinear.yaml index b95be5d8ea1..403870e59f2 100644 --- a/neural_compressor/adaptor/onnxrt_qlinear.yaml +++ b/neural_compressor/adaptor/onnxrt_qlinear.yaml @@ -100,6 +100,8 @@ } }, } + CPUExecutionProvider: *ref_1_6 + CUDAExecutionProvider: *ref_1_6 graph_optimization: &default_optimization # from onnxruntime graph_optimization_level level: 'ENABLE_EXTENDED' # choices are ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL'] @@ -138,7 +140,7 @@ fp32: ['*'] # '*' means all op types capabilities: - int8: { + int8: &ref_1_8 { 'FusedConv': { 'weight': { 'dtype': ['int8'], @@ -210,6 +212,8 @@ } }, } + CPUExecutionProvider: *ref_1_8 + CUDAExecutionProvider: *ref_1_8 graph_optimization: <<: *default_optimization @@ -225,7 +229,7 @@ <<: *common_ops capabilities: - int8: { + int8: &ref_1_9 { 'FusedConv': { 'weight': { 'dtype': ['int8'], @@ -311,6 +315,8 @@ } }, } + CPUExecutionProvider: *ref_1_9 + CUDAExecutionProvider: *ref_1_9 graph_optimization: <<: *default_optimization @@ -332,7 +338,7 @@ fp32: ['*'] # '*' means all op types capabilities: - int8: { + int8: &ref_1_11 { 'FusedConv': { 'weight': { 'dtype': ['int8'], @@ -432,6 +438,8 @@ } }, } + CPUExecutionProvider: *ref_1_11 + CUDAExecutionProvider: *ref_1_11 graph_optimization: <<: *default_optimization diff --git a/neural_compressor/adaptor/ox_utils/calibration.py b/neural_compressor/adaptor/ox_utils/calibration.py index aa55ae4a47f..3a2261af688 100644 --- a/neural_compressor/adaptor/ox_utils/calibration.py +++ b/neural_compressor/adaptor/ox_utils/calibration.py @@ -34,7 +34,8 @@ from packaging.version import Version from importlib.util import find_spec from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.adaptor.ox_utils.util import make_dquant_node, is_B_transposed +from neural_compressor.adaptor.ox_utils.util import make_dquant_node, is_B_transposed, \ + _get_qrange_for_qType, calculate_scale_zp logger = logging.getLogger("neural_compressor") ONNX18_VERSION = Version("1.8.0") @@ -48,7 +49,9 @@ def __init__(self, model_wrapper, dump_op_types, black_nodes=[], white_nodes=[], - iterations=[]): + iterations=[], + backend=['CPUExecutionProvider'], + reduce_range=False): ''' :param model: ONNX model to calibrate :param dataloader: user implemented object to read in and preprocess calibration dataset @@ -56,6 +59,8 @@ def __init__(self, model_wrapper, :param black_nodes: operator names that should not be quantized, default = '' :param white_nodes: operator names that force to be quantized, default = '' :param iterations: tensor of which iteration will be collected. + :param backend: execution provider for onnxruntime + :reduce_range: use 7 bit or not ''' self.model_wrapper = model_wrapper self.model = model_wrapper.model @@ -68,12 +73,14 @@ def __init__(self, model_wrapper, self.white_nodes = white_nodes self.augmented_model = None self.iterations = iterations + self.backend = backend self.augment_nodes = [] self.dequantized_output = {} self.already_quantized = 'DequantizeLinear' in \ [node.op_type for node in self.model.graph.node] self.dynamically_quantized = False self.ort_version = Version(onnxruntime.__version__) + self.reduce_range = reduce_range def augment_graph(self, activation_only=False, weight_only=False): ''' @@ -205,9 +212,14 @@ def get_intermediate_outputs(self, calib_mode=None): from onnxruntime_extensions import get_library_path so.register_custom_ops_library(get_library_path()) - session = onnxruntime.InferenceSession(self.augmented_model.SerializeToString(), so) if \ - not self.model_wrapper.large_size else \ - onnxruntime.InferenceSession(self.model_wrapper.model_path + '_augment.onnx', so) + session = onnxruntime.InferenceSession( + self.augmented_model.SerializeToString(), + so, + provider=self.backend) if not self.model_wrapper.large_size else \ + onnxruntime.InferenceSession( + self.model_wrapper.model_path + '_augment.onnx', + so, + provider=self.backend) intermediate_outputs = [] len_inputs = len(session.get_inputs()) @@ -366,7 +378,7 @@ def dump_minmax(self, calib_mode='naive'): return self._map_calibration(node_output_names, output_dicts, calib_mode=calib_mode) - def dump_calibration(self, calib_mode='naive'): + def dump_calibration(self, q_config, calib_mode='naive'): ''' Gather calibration params for quantization parameter calib_mode: type 'naive' gives (Min, Max) pairs @@ -376,9 +388,9 @@ def dump_calibration(self, calib_mode='naive'): second element is a maximum of all values; :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs } ''' - return self.calculate_quantization_params(self.dump_minmax(calib_mode)) + return self.calculate_quantization_params(q_config, self.dump_minmax(calib_mode)) - def calculate_quantization_params(self, quantization_thresholds): + def calculate_quantization_params(self, q_config, quantization_thresholds): ''' Given quantization thresholds, calculate the quantization params. :param quantization_thresholds: @@ -417,11 +429,19 @@ def calculate_quantization_params(self, quantization_thresholds): if len(children) == 1: child = children[0] parent = None + scheme = 'asym' + qType = 2 # uint8 if tensor_name in output_name_to_nodes: parent = output_name_to_nodes[tensor_name] + if parent and parent.name in q_config and q_config[parent.name] not in ['fp32']: + scheme = q_config[parent.name]['activation']['scheme'] + qType = q_config[parent.name]['activation']['dtype'] + elif self.backend in ['TensorrtExecutionProvider']: + scheme = 'sym' + qType = 3 node_thresholds = quantization_thresholds[tensor_name] node_params = self.calculate_scale_zeropoint(parent, child, node_thresholds[0], - node_thresholds[1]) + node_thresholds[1], scheme, qType, _get_qrange_for_qType(qType, self.reduce_range)) quantization_params[tensor_name] = node_params return quantization_params @@ -478,7 +498,7 @@ def dump_tensor(self, activation=True, weight=False): dumped_tensors_map.update({"activation": map_node_activation}) return dumped_tensors_map - def calculate_scale_zeropoint(self, last_node, next_node, rmin, rmax): + def calculate_scale_zeropoint(self, last_node, next_node, rmin, rmax, scheme, qType, quantize_range): ''' Given the source and destination node of tensor, \ return calculated zero point and scales. @@ -524,12 +544,12 @@ def calculate_scale_zeropoint(self, last_node, next_node, rmin, rmax): clip_params = attrs[attrs_names.index('activation_params')].floats rmin = min(rmin, clip_params[0], clip_params[1]) rmax = max(rmax, clip_params[0], clip_params[1]) - - scale = np.float32((rmax - rmin) / 255 if rmin != rmax else 1) - initial_zero_point = (0 - rmin) / scale - zero_point = np.uint8(round(max(0, min(255, initial_zero_point)))) - - zp_and_scale.append(zero_point) - zp_and_scale.append(scale) + + scale, zp = calculate_scale_zp(rmin, rmax, quantize_range, qType, scheme) + if qType == 2: + zp_and_scale.append(np.uint8(zp)) + else: + zp_and_scale.append(np.int8(zp)) + zp_and_scale.append(np.float32(scale)) return zp_and_scale diff --git a/neural_compressor/adaptor/ox_utils/operators/__init__.py b/neural_compressor/adaptor/ox_utils/operators/__init__.py index da48d428ac4..7b17ff45b5b 100644 --- a/neural_compressor/adaptor/ox_utils/operators/__init__.py +++ b/neural_compressor/adaptor/ox_utils/operators/__init__.py @@ -18,7 +18,7 @@ from os.path import dirname, basename, isfile, join import glob -from .ops import OPERATORS +from .ops import OPERATORS, QOPERATORS modules = glob.glob(join(dirname(__file__), "*.py")) @@ -26,4 +26,4 @@ if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["OPERATORS"] \ No newline at end of file +__all__ = ["OPERATORS", "QOPERATORS"] diff --git a/neural_compressor/adaptor/ox_utils/operators/activation.py b/neural_compressor/adaptor/ox_utils/operators/activation.py index 5339f6834ad..cf677e61881 100644 --- a/neural_compressor/adaptor/ox_utils/operators/activation.py +++ b/neural_compressor/adaptor/ox_utils/operators/activation.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="LeakyRelu, Sigmoid") @@ -87,4 +87,40 @@ def quantize(self): self.quantizer.dequantize_tensor(node, node.input[0]) else: self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0]) - self.quantizer.remove_nodes.append(node) \ No newline at end of file + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QLinearLeakyRelu, QLinearSigmoid") +class QActivationOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant'], + node.name + '_in_dequant') + inputs = [node.name + '_in_dequant'] + add_nodes.append(in_dq) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[3], node.input[4]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + activation_node = onnx.helper.make_node( + node.op_type.split('QLinear')[-1], inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(activation_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/argmax.py b/neural_compressor/adaptor/ox_utils/operators/argmax.py index 9344498698e..65daf5b5523 100644 --- a/neural_compressor/adaptor/ox_utils/operators/argmax.py +++ b/neural_compressor/adaptor/ox_utils/operators/argmax.py @@ -15,9 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - - -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry @op_registry(op_types="ArgMax") class ArgMaxOperator(Operator): @@ -35,5 +33,9 @@ def convert(self, convert_format): origin_name = node.input[0].split('_argmax_node')[0] if origin_name in self.quantizer.quantized_value_map: - node.input[0] = self.quantizer.quantized_value_map[origin_name].q_name - node.name = node.name + '_quant' \ No newline at end of file + node.name = node.name + '_quant' + +@qop_registry(op_types="ArgMax") +class QArgMaxOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) diff --git a/neural_compressor/adaptor/ox_utils/operators/attention.py b/neural_compressor/adaptor/ox_utils/operators/attention.py index 9bd33ae4c26..26030e9284a 100644 --- a/neural_compressor/adaptor/ox_utils/operators/attention.py +++ b/neural_compressor/adaptor/ox_utils/operators/attention.py @@ -17,8 +17,8 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator -from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, find_by_name @op_registry(op_types="Attention") class AttentionOperator(Operator): @@ -74,3 +74,46 @@ def convert(self, convert_format): self.quantizer.new_nodes.append(qattention_node) self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QAttention") +class QAttentionOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inputs = [] + inits = [] + if find_by_name(node.input[3], self.initializers) is None: + return False, add_nodes, inits + # input dq + in_dq1 = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[0], node.input[3], node.input[6]], + [node.name + '_in_dequant1'], + node.name + '_in_dequant1') + + in_dq2 = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[1], node.input[4], node.input[7]], + [node.name + '_in_dequant2'], + node.name + '_in_dequant2') + inputs = [node.name + '_in_dequant1', + node.name + '_in_dequant2', + node.input[2], + node.input[5]] + + add_nodes.extend([in_dq1, in_dq2]) + + outputs = node.output + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + kwargs["domain"] = ms_domain + + binary_node = onnx.helper.make_node( + 'Attention', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(binary_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/binary_op.py b/neural_compressor/adaptor/ox_utils/operators/binary_op.py index 3848cd6ee9b..72c92da3dcf 100644 --- a/neural_compressor/adaptor/ox_utils/operators/binary_op.py +++ b/neural_compressor/adaptor/ox_utils/operators/binary_op.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="Add, Mul") @@ -77,4 +77,47 @@ def convert(self, convert_format): self.quantizer.new_nodes += [qlinear_binary_math_node] self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) - self.quantizer.remove_nodes.append(node) \ No newline at end of file + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QLinearAdd, QLinearMul") +class QBinaryOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq1 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant1'], + node.name + '_in_dequant1') + + in_dq2 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[3:6], + [node.name + '_in_dequant2'], + node.name + '_in_dequant2') + inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] + + add_nodes.extend([in_dq1, in_dq2]) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[6], node.input[7]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + binary_node = onnx.helper.make_node( + node.op_type.split('QLinear')[-1], inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(binary_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/concat.py b/neural_compressor/adaptor/ox_utils/operators/concat.py index 763ac8e6541..eb85155421c 100644 --- a/neural_compressor/adaptor/ox_utils/operators/concat.py +++ b/neural_compressor/adaptor/ox_utils/operators/concat.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="Concat") @@ -96,3 +96,42 @@ def cast(self): # pragma: no cover if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]: return self.quantizer.dtype_cast(self.node, self.dtype) + +@qop_registry(op_types="QLinearConcat") +class QConcatOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inputs = [] + inits = [] + # input dq + for i in range(int((len(node.input) - 2) / 3 - 1)): + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + node.input[2 + i*3 : 2 + (i+1)*3], + [node.name + '_in_dequant_' + str(i)], + node.name + '_in_dequant_' + str(i)) + inputs.append(node.name + '_in_dequant_' + str(i)) + add_nodes.append(in_dq) + + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[0], node.input[1]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + concat_node = onnx.helper.make_node( + 'Concat', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(concat_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/conv.py b/neural_compressor/adaptor/ox_utils/operators/conv.py index 90b849bd9e6..7f95d548b2a 100644 --- a/neural_compressor/adaptor/ox_utils/operators/conv.py +++ b/neural_compressor/adaptor/ox_utils/operators/conv.py @@ -19,7 +19,7 @@ import onnx from onnx import onnx_pb as onnx_proto -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg @op_registry(op_types="Conv, FusedConv") @@ -156,6 +156,7 @@ def convert(self, convert_format): if attribute.name == 'activation_params': # pragma: no cover continue kwargs.update(attribute_to_kwarg(attribute)) + qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs, [qlinear_conv_output], node.name, **kwargs) @@ -164,4 +165,71 @@ def convert(self, convert_format): self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(node) +@qop_registry(op_types="QLinearConv") +class QConvOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq1 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant1'], + node.name + '_in_dequant1') + + in_dq2 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[3:6], + [node.name + '_in_dequant2'], + node.name + '_in_dequant2') + + add_nodes.extend([in_dq1, in_dq2]) + inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] + if len(node.input) == 9: + import numpy as np + input_scale = onnx.numpy_helper.to_array( + find_by_name(node.input[1], self.initializers)) + weight_scale = onnx.numpy_helper.to_array( + find_by_name(node.input[4], self.initializers)) + bias_scale = input_scale * weight_scale + + # update scale initializer + bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1) + bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, + node.input[8] + '_scale') + inits.extend([bias_scale_initializer]) + + # update zero initializer + bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1) + bias_zp_initializer = onnx.numpy_helper.from_array( + bias_zp_data, node.input[8] + '_zero_point') + inits.extend([bias_zp_initializer]) + in_dq3 = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[8], bias_scale_initializer.name, bias_zp_initializer.name], + [node.name + '_in_dequant3'], + node.name + '_in_dequant3') + inputs.append(in_dq3.name) + add_nodes.append(in_dq3) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[6], node.input[7]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + binary_node = onnx.helper.make_node( + node.op_type.split('QLinear')[-1], inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(binary_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py index 00522c178a1..08a6e5a326b 100644 --- a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +++ b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py @@ -16,7 +16,7 @@ # limitations under the License. # -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator @op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze") class Direct8BitOperator(Operator): @@ -83,3 +83,8 @@ def cast(self): if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]: return self.quantizer.dtype_cast(self.node, self.dtype) + +@qop_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze") +class QDirectOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py b/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py index 256298b7142..91310f9e15d 100644 --- a/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +++ b/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="EmbedLayerNormalization") @@ -69,4 +69,38 @@ def convert(self, convert_format): inputs, node.output, node.name, **kwargs) self.quantizer.new_nodes.append(qembed_layer_norm_node) - self.quantizer.remove_nodes.extend(parents) \ No newline at end of file + self.quantizer.remove_nodes.extend(parents) + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QEmbedLayerNormalization") +class QEmbedLayerNormalizationOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + inputs = [node.input[0], node.input[1]] + # input dq + for i in range(5): + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[2+i], node.input[-10+i], node.input[-5+i]], + [node.name + '_in_dequant_' + str(i)], + node.name + '_in_dequant_' + str(i)) + inputs.append(node.name + '_in_dequant_' + str(i)) + add_nodes.append(in_dq) + if len(node.input) > 17: + inputs.append(node.input[7]) + + outputs = node.output + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + binary_node = onnx.helper.make_node( + 'EmbedLayerNormalization', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(binary_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/gather.py b/neural_compressor/adaptor/ox_utils/operators/gather.py index 93f98823047..7c3c6285b45 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gather.py +++ b/neural_compressor/adaptor/ox_utils/operators/gather.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg @op_registry(op_types="Gather") @@ -89,4 +89,9 @@ def convert(self, convert_format): for n in self.quantizer.model.get_children(child): self.quantizer.model.replace_node_input(n, child.output[0], gather_new_output) - self.quantizer.remove_nodes.extend([node, parents[0]]) \ No newline at end of file + self.quantizer.remove_nodes.extend([node, parents[0]]) + +@qop_registry(op_types="Gather") +class QGatherOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) diff --git a/neural_compressor/adaptor/ox_utils/operators/gavgpool.py b/neural_compressor/adaptor/ox_utils/operators/gavgpool.py index b4bafcafeae..eec48e6af19 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +++ b/neural_compressor/adaptor/ox_utils/operators/gavgpool.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="GlobalAveragePool") @@ -58,4 +58,37 @@ def convert(self, convert_format): self.quantizer.new_nodes += [qnode] self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(parent) - self.quantizer.remove_nodes.append(node) \ No newline at end of file + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QLinearGlobalAveragePool") +class QGlobalAveragePoolOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant'], + node.name + '_in_dequant') + inputs = [node.name + '_in_dequant'] + add_nodes.append(in_dq) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[3], node.input[4]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + activation_node = onnx.helper.make_node( + 'GlobalAveragePool', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(activation_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/gemm.py b/neural_compressor/adaptor/ox_utils/operators/gemm.py index 65aca2e8a7d..49f8eeaa6c7 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gemm.py +++ b/neural_compressor/adaptor/ox_utils/operators/gemm.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import find_by_name, ms_domain, \ attribute_to_kwarg, is_B_transposed @@ -91,4 +91,73 @@ def convert(self, convert_format): self.quantizer.new_nodes.append(qgemm_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) - self.quantizer.remove_nodes.append(node) \ No newline at end of file + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QGemm") +class QGemmOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + import numpy as np + node = self.node + add_nodes = [] + inits = [] + + input_scale = onnx.numpy_helper.to_array( + find_by_name(node.input[1], self.initializers)) + weight_scale = onnx.numpy_helper.to_array( + find_by_name(node.input[4], self.initializers)) + bias_scale = input_scale * weight_scale + + # input dq + in_dq1 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant1'], + node.name + '_in_dequant1') + + + in_dq2 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[3:6], + [node.name + '_in_dequant2'], + node.name + '_in_dequant2') + + # update scale initializer + bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1) + bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, + node.input[6] + '_scale') + inits.extend([bias_scale_initializer]) + + # update zero initializer + bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1) + bias_zp_initializer = onnx.numpy_helper.from_array( + bias_zp_data, node.input[6] + '_zero_point') + inits.extend([bias_zp_initializer]) + in_dq3 = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[8], bias_scale_initializer.name, bias_zp_initializer.name], + [node.name + '_in_dequant3']) + + inputs = [in_dq1.name, in_dq2.name, in_dq3.name] + add_nodes.extend([in_dq1, in_dq2, in_dq3]) + + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[6], node.input[7]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + gemm_node = onnx.helper.make_node( + 'Gemm', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(gemm_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/matmul.py b/neural_compressor/adaptor/ox_utils/operators/matmul.py index 988e157e323..fbf6558bb02 100644 --- a/neural_compressor/adaptor/ox_utils/operators/matmul.py +++ b/neural_compressor/adaptor/ox_utils/operators/matmul.py @@ -17,8 +17,8 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator -from neural_compressor.adaptor.ox_utils.util import find_by_name +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry +from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg from onnx import onnx_pb as onnx_proto @op_registry(op_types="MatMul") @@ -122,4 +122,47 @@ def convert(self, convert_format): self.quantizer.new_nodes.append(qlinear_matmul_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) - self.quantizer.remove_nodes.append(node) \ No newline at end of file + self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QLinearMatMul") +class QMatMulOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq1 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant1'], + node.name + '_in_dequant1') + + in_dq2 = onnx.helper.make_node( + 'DequantizeLinear', + node.input[3:6], + [node.name + '_in_dequant2'], + node.name + '_in_dequant2') + inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] + + add_nodes.extend([in_dq1, in_dq2]) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[6], node.input[7]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + matmul_node = onnx.helper.make_node( + 'MatMul', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(matmul_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/maxpool.py b/neural_compressor/adaptor/ox_utils/operators/maxpool.py index f93befc9a4f..3180a6a49f1 100644 --- a/neural_compressor/adaptor/ox_utils/operators/maxpool.py +++ b/neural_compressor/adaptor/ox_utils/operators/maxpool.py @@ -16,7 +16,7 @@ # limitations under the License. # -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry @op_registry(op_types="MaxPool") class MaxPoolOperator(Operator): @@ -67,4 +67,9 @@ def convert(self, convert_format): self.quantizer.model.replace_node_input(n, child.output[0], node.output[0]) - self.quantizer.remove_nodes.append(parent) \ No newline at end of file + self.quantizer.remove_nodes.append(parent) + +@qop_registry(op_types="MaxPool") +class QMaxPoolOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/ops.py b/neural_compressor/adaptor/ox_utils/operators/ops.py index 33d4ecf7c5d..ad6237b2d41 100644 --- a/neural_compressor/adaptor/ox_utils/operators/ops.py +++ b/neural_compressor/adaptor/ox_utils/operators/ops.py @@ -15,8 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from neural_compressor.utils.utility import LazyImport +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg +onnx = LazyImport('onnx') OPERATORS = {} +QOPERATORS= {} def op_registry(op_types): '''The class decorator used to register all Operator subclasses. @@ -34,6 +38,27 @@ def decorator_op(cls): return cls return decorator_op +def qop_registry(op_types): + '''The class decorator used to register all qOperator subclasses. + + Args: + cls (class): The class of register. + ''' + def decorator_op(cls): + assert cls.__name__.endswith( + 'Operator'), "The name of subclass of QOperator should end with \'Operator\' substring." + if cls.__name__[:-len('Operator')] in QOPERATORS: # pragma: no cover + raise ValueError('Cannot have two operators with the same name.') + for single_op_type in [op_type.strip() for op_type in op_types.split(',')]: + if single_op_type.startswith('QLinear') or \ + single_op_type in ['QGemm', 'QAttention', 'QEmbedLayerNormalization', 'ArgMax', + 'Reshape', 'Transpose', 'Squeeze', 'Unsqueeze', 'Gather', + 'MaxPool', 'Pad', 'Resize', 'Split']: + QOPERATORS[single_op_type] = cls + return cls + return decorator_op + + class Operator(object): def __init__(self, onnx_quantizer, onnx_node): self.quantizer = onnx_quantizer @@ -81,4 +106,55 @@ def convert(self, convert_format): return def cast(self): # pragma: no cover - self.quantizer.dtype_cast(self.node, self.dtype) \ No newline at end of file + self.quantizer.dtype_cast(self.node, self.dtype) + +class QOperator(object): + def __init__(self, onnx_node, children, initializers): + self.node = onnx_node + self.children = children + self.initializers = initializers + self.qop_list = ['QGemm', 'QAttention', 'QEmbedLayerNormalization', + 'QLinearLeakyRelu', 'QLinearSigmoid', 'QLinearAdd','QLinearMul', + 'QLinearConcat', 'QLinearConv', 'QLinearGlobalAveragePool', + 'QLinearMatMul', 'QLinearAveragePool'] + + def convert(self): + node = self.node + add_nodes = [] + inputs = [] + inits = [] + if all([child.op_type not in self.qop_list or \ + child.op_type != 'DequantizeLinear' for child in self.children]): + return False, add_nodes, inits + + # input dq + for child in self.children: + if child.op_type == 'DequantizeLinear': + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[0], child.input[1], child.input[2]], + [node.name + '_in_dequant'], + node.name + '_in_dequant') + inputs.append(node.name + '_in_dequant') + add_nodes.append(in_dq) + break + + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', in_dq.input[1], in_dq.input[2]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + inputs.append(node.input[1:]) + new_node = onnx.helper.make_node( + node.op_type, inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(new_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/pad.py b/neural_compressor/adaptor/ox_utils/operators/pad.py index 0f0acfcbec7..00bb38a3bbd 100644 --- a/neural_compressor/adaptor/ox_utils/operators/pad.py +++ b/neural_compressor/adaptor/ox_utils/operators/pad.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, quantize_nparray @op_registry(op_types="Pad") @@ -93,4 +93,9 @@ def convert(self, convert_format): # Create an entry for output quantized value node.input[0] = parent.input[0] node.output[0] = child.output[0] - self.quantizer.remove_nodes.extend([parent, child]) \ No newline at end of file + self.quantizer.remove_nodes.extend([parent, child]) + +@qop_registry(op_types="Pad") +class QPadOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/pooling.py b/neural_compressor/adaptor/ox_utils/operators/pooling.py index bba746129e6..a794dec7018 100644 --- a/neural_compressor/adaptor/ox_utils/operators/pooling.py +++ b/neural_compressor/adaptor/ox_utils/operators/pooling.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain @op_registry(op_types="AveragePool") @@ -80,3 +80,39 @@ def convert(self, convert_format): self.quantizer.new_nodes.append(qnode) self.quantizer.remove_nodes.append(node) + +@qop_registry(op_types="QLinearAveragePool") +class QPoolOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inits = [] + # input dq + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + node.input[:3], + [node.name + '_in_dequant'], + node.name + '_in_dequant') + inputs = [node.name + '_in_dequant'] + add_nodes.append(in_dq) + # output q + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out', node.input[3], node.input[4]], + node.output, + node.name + '_out_quant') + outputs = [node.name + '_out'] + add_nodes.append(out_q) + + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + activation_node = onnx.helper.make_node( + 'AveragePool', inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(activation_node) + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/resize.py b/neural_compressor/adaptor/ox_utils/operators/resize.py index d5f906f8372..7d266c7a5a5 100644 --- a/neural_compressor/adaptor/ox_utils/operators/resize.py +++ b/neural_compressor/adaptor/ox_utils/operators/resize.py @@ -16,7 +16,7 @@ # limitations under the License. # -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry @op_registry(op_types="Resize") class ResizeOperator(Operator): @@ -70,3 +70,7 @@ def convert(self, convert_format): child.output[0], node.output[0] + '_quantized') node.output[0] = node.output[0] + '_quantized' +@qop_registry(op_types="Resize") +class QResizeOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/operators/split.py b/neural_compressor/adaptor/ox_utils/operators/split.py index a5ec5532711..d022fd3d4c1 100644 --- a/neural_compressor/adaptor/ox_utils/operators/split.py +++ b/neural_compressor/adaptor/ox_utils/operators/split.py @@ -17,7 +17,7 @@ # import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg @@ -81,3 +81,51 @@ def cast(self): # pragma: no cover if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]: return self.quantizer.dtype_cast(self.node, self.dtype) + +@qop_registry(op_types="Split") +class QSplitOperator(QOperator): + def __init__(self, onnx_node, children, initializers): + super().__init__(onnx_node, children, initializers) + + def convert(self): + node = self.node + add_nodes = [] + inputs = [] + inits = [] + + if all([child.op_type not in self.qop_list or \ + child.op_type != 'DequantizeLinear' for child in self.children]): + return False, add_nodes, inits + + # input dq + for child in self.children: + if child.op_type == 'DequantizeLinear': + in_dq = onnx.helper.make_node( + 'DequantizeLinear', + [node.input[0], child.input[1], child.input[2]], + [node.name + '_in_dequant'], + node.name + '_in_dequant') + inputs.append(node.name + '_in_dequant') + add_nodes.append(in_dq) + break + + outputs = [] + for i, out in enumerate(node.output): + out_q = onnx.helper.make_node( + 'QuantizeLinear', + [node.name + '_out_' + str(i), in_dq.input[1], in_dq.input[2]], + [node.output[i]], + node.name + '_out_quant_' + str(i)) + outputs.append([node.name + '_out_quant_' + str(i)]) + add_nodes.append(out_q) + + outputs = node.output + kwargs = {} + for attribute in node.attribute: # pragma: no cover + kwargs.update(attribute_to_kwarg(attribute)) + + gather_node = onnx.helper.make_node( + node.op_type, inputs, + outputs, node.name + '_convert', **kwargs) + add_nodes.append(gather_node) + return True, add_nodes, inits \ No newline at end of file diff --git a/neural_compressor/adaptor/ox_utils/quantizer.py b/neural_compressor/adaptor/ox_utils/quantizer.py index bb2a9a6227d..5129cd95dfe 100644 --- a/neural_compressor/adaptor/ox_utils/quantizer.py +++ b/neural_compressor/adaptor/ox_utils/quantizer.py @@ -37,7 +37,6 @@ from neural_compressor.adaptor.ox_utils.util import __producer__, __version__ from neural_compressor.adaptor.ox_utils.util import quantize_data, dtype_mapping, support_pair, ValueInfo from neural_compressor import options -from neural_compressor.utils.utility import CpuInfo from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.adaptor.ox_utils.operators import OPERATORS @@ -50,8 +49,7 @@ def __init__(self, model, q_config, mode, static, quantization_params, model = onnx.shape_inference.infer_shapes(self.model.model) if \ not self.model.large_size else self.model.model self.config = q_config - self.reduce_range = reduce_range if reduce_range is not None \ - else False if CpuInfo().vnni else True + self.reduce_range = reduce_range self.mode = mode # QuantizationMode.Value self.static = static # use static quantization for inputs. self.fuse_dynamic_quant = False @@ -840,12 +838,12 @@ def _get_quantization_params(self, param_name): raise ValueError("Quantization parameters should contain zero point and scale. " "Specified values for output {}: {}".format(param_name, params)) - zero_point_values = [params[0].item()] + zero_point_values = [params[0]] zero_point_shape = [] zero_point_name = param_name + "_zero_point" zero_point_type = onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[params[0].dtype] - scale_values = [params[1].item()] + scale_values = [params[1]] scale_shape = [] scale_name = param_name + "_scale" diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index d5041204efd..b7a55eb6ee9 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -18,12 +18,15 @@ import os import numpy as np -from onnx import helper, numpy_helper -from onnx import onnx_pb as onnx_proto +from neural_compressor.utils.utility import LazyImport from enum import Enum from pathlib import Path import abc +helper = LazyImport('onnx.helper') +numpy_helper = LazyImport('onnx.numpy_helper') +onnx_proto = LazyImport('onnx.onnx_pb') + __producer__ = "onnx.quantize" __version__ = "0.1.0" onnx_domain = "ai.onnx" @@ -58,6 +61,18 @@ 'complex128': 15, } +PROVIDERS = { + 'default': 'CPUExecutionProvider', + 'onnxrt_trt_ep': 'TensorrtExecutionProvider', + 'onnxrt_cuda_ep': 'CUDAExecutionProvider', +} + +ONNXRT_BACKENDS = { + 'CPUExecutionProvider': 'default', + 'TensorrtExecutionProvider': 'onnxrt_trt_ep', + 'CUDAExecutionProvider': 'onnxrt_cuda_ep' +} + def dtype_to_name(dtype_mapping, dtype): return list(dtype_mapping.keys())[list(dtype_mapping.values()).index(dtype)] @@ -175,6 +190,23 @@ def quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point): qType, scheme)) return quantized_data +def calculate_scale_zp(rmin, rmax, quantize_range, qType, scheme): + if scheme == 'sym': + max_range = max(abs(rmin), abs(rmax)) + scale = (float(max_range) * 2) / quantize_range if max_range > 0 else 1 + else: + scale = (float(rmax) - rmin) / quantize_range if rmin != rmax else 1 + + if scale == 1 or (scheme == 'sym' and qType == onnx_proto.TensorProto.INT8): + zero_point = 0 + elif qType == onnx_proto.TensorProto.UINT8: + zero_point = round((0 - rmin) / scale) + zero_point = np.uint8(round(max(0, min(255, zero_point)))) + else: + zero_point = round((-64 - rmin) / scale) if quantize_range == 128 \ + else round((-127 - rmin) / scale) + return scale, zero_point + def quantize_data(data, quantize_range, qType, scheme): ''' :parameter data: data to quantize @@ -196,17 +228,7 @@ def quantize_data(data, quantize_range, qType, scheme): rmin = min(min(data), 0) rmax = max(max(data), 0) - if scheme == 'sym' and qType == onnx_proto.TensorProto.INT8: - max_range = max(abs(rmin), abs(rmax)) - scale = (float(max_range) * 2) / quantize_range if max_range > 0 else 1 - zero_point = 0 - elif scheme == 'asym' and qType == onnx_proto.TensorProto.UINT8: - scale = (float(rmax) - rmin) / quantize_range if rmin != rmax else 1 - zero_point = round((0 - rmin) / scale) - else: - raise ValueError("Unexpected combination of data type {} and scheme {}.".format( - qType, scheme)) - + scale, zero_point = calculate_scale_zp(rmin, rmax, quantize_range, qType, scheme) quantized_data = quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point) return rmin, rmax, zero_point, scale, quantized_data diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 0a44fe2f5a3..23e12ab40f6 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -30,7 +30,6 @@ from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader - torch = LazyImport("torch") json = LazyImport("json") hvd = LazyImport("horovod.torch") @@ -372,10 +371,12 @@ def _cfgs_to_fx_cfgs(op_cfgs, observer_type='post_training_static_quant'): if version.release >= Version("1.13.0").release: # pragma: no cover from torch.ao.quantization import QConfigMapping fx_op_cfgs = QConfigMapping() - fx_op_cfgs.set_global(model_qconfig) + if observer_type != 'post_training_dynamic_quant': + fx_op_cfgs.set_global(model_qconfig) else: fx_op_cfgs = dict() - fx_op_cfgs[""] = model_qconfig + if observer_type != 'post_training_dynamic_quant': + fx_op_cfgs[""] = model_qconfig op_tuple_cfg_list = [] for key, value in op_cfgs.items(): @@ -393,8 +394,7 @@ def _cfgs_to_fx_cfgs(op_cfgs, observer_type='post_training_static_quant'): if version.release < Version("1.13.0").release: # pragma: no cover fx_op_cfgs["module_name"] = op_tuple_cfg_list - - if version.release >= Version("1.13.0").release: # pragma: no cover + elif observer_type != 'post_training_dynamic_quant': from torch.ao.quantization import get_default_qconfig_mapping for name, q_config in get_default_qconfig_mapping().to_dict()['object_type']: fx_op_cfgs.set_object_type(name, q_config) @@ -865,37 +865,89 @@ def model_calibration(self, def eval_func(self, model, dataloader, postprocess, metrics, measurer, iteration, conf=None): results = [] - for idx, (input, label) in enumerate(dataloader): - if measurer is not None: - measurer.start() - - output = pytorch_forward_wrapper(model, input, device=self.device, conf=conf) - if self.device != "cpu": # pragma: no cover - output = output.to("cpu") - label = label.to("cpu") - if measurer is not None: - measurer.end() - if postprocess is not None: - output, label = postprocess((output, label)) - if metrics: - for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): - metric.update(output, label) - - # If distributed dataloader, gather all outputs to update metric - if getattr(dataloader, 'distributed', False) or \ - isinstance(dataloader.sampler, \ - torch.utils.data.distributed.DistributedSampler): - hvd.init() + try: + for idx, (input, label) in enumerate(dataloader): + if measurer is not None: + measurer.start() + + output = pytorch_forward_wrapper(model, input, device=self.device, conf=conf) + if self.device != "cpu": # pragma: no cover + output = output.to("cpu") + label = label.to("cpu") + if measurer is not None: + measurer.end() + if postprocess is not None: + output, label = postprocess((output, label)) + if metrics: for metric in metrics: - metric.hvd = hvd + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(output, label) + + # If distributed dataloader, gather all outputs to update metric + if getattr(dataloader, 'distributed', False) or \ + isinstance(dataloader.sampler, \ + torch.utils.data.distributed.DistributedSampler): + hvd.init() + for metric in metrics: + metric.hvd = hvd + + if self.fp32_preds_as_label: + self.fp32_results.append(output) if self.is_baseline else \ + results.append(output) + if idx + 1 == iteration: + break + except Exception as e: + logger.warning("The dataloader didn't include label, will try input without label!") + for idx, input in enumerate(dataloader): + if (isinstance(input, dict) or isinstance(input, UserDict)): + if not self.benchmark: + assert "label" in input, \ + "The dataloader must include label to measure the metric!" + label = input["label"].to("cpu") + elif not self.benchmark: + assert False, "The dataloader must include label to measure the metric!" + + if measurer is not None: + measurer.start() + + output = pytorch_forward_wrapper(model, input, device=self.device, conf=conf) + + if measurer is not None: + measurer.end() + + if self.device != "cpu" and not self.benchmark: # pragma: no cover + if isinstance(output, dict) or isinstance(input, UserDict): + for key in output: + output[key] = output[key].to("cpu") + elif isinstance(output, list) or isinstance(output, tuple): + for tensor in output: + tensor = tensor.to("cpu") + else: + output = output.to("cpu") - if self.fp32_preds_as_label: - self.fp32_results.append(output) if self.is_baseline else \ - results.append(output) - if idx + 1 == iteration: - break + if postprocess is not None and not self.benchmark: + output, label = postprocess((output, label)) + + if metrics and not self.benchmark: + for metric in metrics: + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(output, label) + + # If distributed dataloader, gather all outputs to update metric + if getattr(dataloader, 'distributed', False) or \ + isinstance(dataloader.sampler, \ + torch.utils.data.distributed.DistributedSampler): + hvd.init() + for metric in metrics: + metric.hvd = hvd + + if self.fp32_preds_as_label: + self.fp32_results.append(output) if self.is_baseline else \ + results.append(output) + if idx + 1 == iteration: + break return results def model_eval(self, @@ -1029,7 +1081,7 @@ def _get_quantizable_ops(self, model): # get bf16 capability - if (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \ + if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \ (self.version.release >= Version("1.11.0").release): self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") bf16_ops = [] @@ -1093,6 +1145,34 @@ def is_fused_module(self, module): return True else: return False + + def calculate_hessian_trace(self, + fp32_model, + dataloader, + q_model, + criterion, + enable_act = False + ): + """Calculate hessian trace. + + Args: + fp32_model: The original fp32 model. + criterion: The loss function for calculate the hessian trace. # loss = criterion(output, target) + dataloader: The dataloader for calculate the gradient. + q_model: The INT8 AMAP model. + enable_act: Enabling quantization error or not. + + Return: + hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. + """ + from .torch_utils.hawq_metric import hawq_top + op_to_traces=hawq_top(fp32_model=fp32_model, + dataloader=dataloader, + q_model=q_model, + criterion=criterion, + enable_act=enable_act) + return op_to_traces + pass unify_op_type_mapping = { @@ -1308,19 +1388,34 @@ def _pre_hook_for_qat(self, dataloader=None): qscheme=torch.per_tensor_affine, reduce_range=REDUCE_RANGE), weight=torch.quantization.default_weight_fake_quant) + self.non_quant_dict = self.get_non_quant_modules(self.model.kwargs) + quantizable_ops = [] + self._get_quantizable_ops_recursively(self.model._model, '', quantizable_ops) + self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") + bf16_ops = [] + if self.version.release >= Version("1.11.0").release and self.use_bf16 and \ + (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + self._get_bf16_ops_recursively(self.model._model, '', bf16_ops) + bf16_ops_list = [(op) for op in bf16_ops if op not in quantizable_ops] self.model.model.training = True torch.quantization.prepare_qat(self.model._model, inplace=True) - def _post_hook_for_qat(self): - torch.quantization.convert(self.model._model, inplace=True) # This is a flag for reloading self.model.q_config = { 'is_oneshot': True, 'framework': 'pytorch', 'reduce_range': REDUCE_RANGE, - 'approach': 'quant_aware_training' + 'approach': 'quant_aware_training', + 'bf16_ops_list': bf16_ops_list, } + def _post_hook_for_qat(self): + torch.quantization.convert(self.model._model, inplace=True) + if self.model.q_config is not None and len(self.model.q_config['bf16_ops_list']) > 0 and \ + self.version.release >= Version("1.11.0").release and self.use_bf16 and \ + (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + self.model._model = torch_utils.bf16_convert.Convert(self.model._model, self.model.q_config) + def _pre_hook_for_hvd(self, dataloader=None): # TODO: lazy init here hvd.init() @@ -2220,7 +2315,8 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): self.model_calibration(q_model, dataloader, iterations, None, tune_cfg.get('calib_sampling_size', 1)) q_model.save_qconf_summary(qconf_summary=self.ipex_config_path) - if self.use_bf16: + if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \ + (self.version.release >= Version("1.11.0").release): with torch.no_grad(): with torch.cpu.amp.autocast(): q_model = ipex.quantization.convert(q_model) @@ -2487,7 +2583,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): if isinstance(self.q_dataloader, BaseDataLoader): self.q_dataloader.batch(batch_size) logger.info('Recovery `calibration.dataloader.batchsize` {} according \ - to config.yaml'.format(batch_size)) + to config.yaml' .format(batch_size)) del init_model with open(self.ipex_config_path, 'r') as f: self.cfgs = json.load(f) @@ -2722,6 +2818,8 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): # q_func can be created by neural_compressor internal or passed by user. It's critical to # distinguish how q_func is passed since neural_compressor built-in functions accept # neural_compressor model and user defined func should accept framework model. + # For export API + hook_list = torch_utils.util._set_input_scale_hook(q_model._model, op_cfgs) q_model._model = q_func( q_model if getattr(q_func, 'builtin', None) else q_model._model) assert q_model._model is not None, "Please return a trained model in train function!" @@ -2729,7 +2827,7 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): else: if self.sub_module_list is None: tmp_model = q_model._model - if self.version > Version("1.12.1"): # pragma: no cover + if self.version.release >= Version("1.13.0").release: # pragma: no cover # pylint: disable=E1123 q_model._model = prepare_fx( q_model._model, @@ -2750,6 +2848,8 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): prefix='', example_inputs=example_inputs) if self.approach in ['post_training_static_quant', 'post_training_auto_quant']: + # For export API + hook_list = torch_utils.util._set_input_scale_hook(q_model._model, op_cfgs) iterations = tune_cfg.get('calib_iteration', 1) if q_func is not None: q_func(q_model._model) @@ -2758,8 +2858,13 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): dataloader, iterations, calib_sampling_size=tune_cfg.get('calib_sampling_size', 1)) + + if self.approach != 'post_training_dynamic_quant': + # For export API + scale_info = torch_utils.util._get_input_scale(q_model._model, hook_list) + if self.sub_module_list is None: - if self.version > Version("1.12.1"): # pragma: no cover + if self.version.release >= Version("1.13.0").release: # pragma: no cover # pylint: disable=E1123 q_model._model = convert_fx(q_model._model, convert_custom_config=self.convert_custom_config_dict) @@ -2773,13 +2878,14 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): q_model._model, prefix='') if len(self.tune_cfg['bf16_ops_list']) > 0 and \ - self.version.release >= Version("1.11.0").release and \ + self.version.release >= Version("1.11.0").release and self.use_bf16 and \ (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg) q_model.q_config = copy.deepcopy(self.tune_cfg) if self.approach != 'post_training_dynamic_quant': self._get_scale_zeropoint(q_model._model, q_model.q_config) + q_model.q_config['scale_info'] = scale_info self._dump_model_op_stats(q_model._model, q_model.q_config, self.approach) torch_utils.util.get_embedding_contiguous(q_model._model) @@ -2843,6 +2949,12 @@ def _pre_hook_for_qat(self, dataloader=None): quantizable_ops = [] tmp_model = self.fuse_fx_model(self.model, is_qat=True) self._get_quantizable_ops_recursively(tmp_model, '', quantizable_ops) + self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") + bf16_ops = [] + if self.version.release >= Version("1.11.0").release and self.use_bf16 and \ + (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + self._get_bf16_ops_recursively(tmp_model, '', bf16_ops) + bf16_ops_list = [(op) for op in bf16_ops if op not in quantizable_ops] quantized_ops = OrderedDict() for op in quantizable_ops: if op[1] in [ @@ -2851,10 +2963,11 @@ def _pre_hook_for_qat(self, dataloader=None): quantized_ops[op[0]] = torch.quantization.default_dynamic_qconfig else: quantized_ops[op[0]] = q_cfgs - # build for fetching scale and zeropoint + # build op_config_dict to save module scale and zeropoint op_config_dict = {} for op in quantizable_ops: op_config_dict[op] = {'weight': {'dtype': 'int8'}, 'activation': {'dtype': 'uint8'}} + if self.version.release < Version("1.11.0").release: quantized_ops["default_qconfig"] = None else: @@ -2901,12 +3014,19 @@ def _pre_hook_for_qat(self, dataloader=None): 'framework': 'pytorch_fx', 'reduce_range': REDUCE_RANGE, 'quantizable_ops': quantizable_ops, + 'bf16_ops_list': bf16_ops_list, 'op': op_config_dict, 'sub_module_list': self.sub_module_list, 'approach': 'quant_aware_training' } + # For export API + global hook_list + hook_list = torch_utils.util._set_input_scale_hook(self.model._model, quantized_ops) def _post_hook_for_qat(self): + # For export API + scale_info = torch_utils.util._get_input_scale(self.model._model, hook_list) + self.model.q_config['scale_info'] = scale_info from torch.quantization.quantize_fx import convert_fx if self.sub_module_list is None: if self.version > Version("1.12.1"): # pragma: no cover @@ -2926,6 +3046,10 @@ def _post_hook_for_qat(self): if self.approach != 'post_training_dynamic_quant': self._get_scale_zeropoint(self.model._model, self.model.q_config) + if len(self.model.q_config['bf16_ops_list']) > 0 and \ + self.version.release >= Version("1.11.0").release and self.use_bf16 and \ + (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + self.model._model = torch_utils.bf16_convert.Convert(self.model._model, self.model.q_config) self._dump_model_op_stats(self.model._model, self.model.q_config, self.approach) torch_utils.util.get_embedding_contiguous(self.model._model) @@ -3102,7 +3226,7 @@ def _dump_model_op_stats(self, model, tune_cfg, approach): res = dict() self._get_sub_module_op_stats(model, tune_cfg, approach, res) - if (self.version.release >= Version("1.11.0").release) and \ + if self.use_bf16 and (self.version.release >= Version("1.11.0").release) and \ (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover bf16_ops_list = tune_cfg['bf16_ops_list'] if len(bf16_ops_list) > 0: @@ -3137,7 +3261,6 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): Returns: None """ - module_dict = dict(model.named_modules()) for op_name, child in model.named_modules(): if self.is_fused_module(child): @@ -3380,7 +3503,7 @@ def fuse_fx_model(self, model, is_qat): try: tracer = QuantizationTracer(skipped_module_names, skipped_module_classes) graph_module = GraphModule(tmp_model, tracer.trace(tmp_model)) - if self.version > Version("1.12.1"): # pragma: no cover + if self.version.release >= Version("1.13.0").release: # pragma: no cover # pylint: disable=E1124, E1123 fused_model = _fuse_fx(graph_module, is_qat, @@ -3462,6 +3585,28 @@ def _check_dynamic_control(module): logger.info('Module has no forward function') return False + def get_output_op_names(self, *args, **kwargs): + return None + + def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, + confidence_batches, fallback=True, requantize_cfgs=None): + """This is a helper function for `query_fw_capability`, + and it will get all quantizable ops from model. + + Args: + model (object): INC model containing fp32 model + dataloader (string): dataloader contains real data. + tune_cfg (dict): dictionary of tune configure for each op. + fallback (bool): switch method in fallback stage and re-quantize stage + + Returns: + ops_lst (list): sorted op list by sensitivity + """ + from .torch_utils.util import get_fallback_order + ordered_ops = get_fallback_order(self, model.model, dataloader, tune_cfg, + confidence_batches, fallback, requantize_cfgs) + return ordered_ops + class PyTorchQuery(QueryBackendCapability): def __init__(self, local_config_file=None): diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py index f9f229e96b2..64335c3f374 100644 --- a/neural_compressor/adaptor/tensorflow.py +++ b/neural_compressor/adaptor/tensorflow.py @@ -64,8 +64,11 @@ def __init__(self, framework_specific_info): self.recipes = deep_get(self.framework_specific_info, 'recipes', {}) self.performance_only = deep_get(self.framework_specific_info, 'performance_only', False) self.use_bf16 = deep_get(self.framework_specific_info, 'use_bf16', False) + self.backend = self.framework_specific_info['backend'] + self.format = self.framework_specific_info['format'] os.makedirs(self.work_dir, exist_ok=True) + self.model = None self.pre_optimized_model = None self.pre_optimizer_handle = None @@ -76,12 +79,12 @@ def __init__(self, framework_specific_info): cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[:-len('Adaptor')].lower()) self.query_handler = TensorflowQuery(local_config_file=os.path.join( os.path.dirname(__file__), cfg_yaml_name), performance_only=self.performance_only) - self.itex_mode = cfg_yaml_name == 'tensorflow_itex.yaml' + self.itex_mode = self.backend == 'itex' or cfg_yaml_name == 'tensorflow_itex.yaml' from pkg_resources import parse_version import tensorflow as tf - self.new_api = True if parse_version(tf.version.VERSION) == parse_version('2.11.0202242') else False - self.qdq_enabled = cfg_yaml_name == 'tensorflow_itex.yaml' or self.new_api + self.new_api = tf.version.VERSION in ('2.11.0202242', '2.11.0202250') + self.qdq_enabled = self.itex_mode or self.format == 'QDQ' or self.new_api self.op_wise_sequences = self.query_handler.get_eightbit_patterns(self.qdq_enabled) self.optimization = self.query_handler.get_grappler_optimization_cfg() @@ -92,6 +95,8 @@ def __init__(self, framework_specific_info): self.optype_statistics = None + self._last_dequantize_ops = None + def log_histogram(self, writer, tag, values, step=0, bins=1000): import tensorflow as tf # Convert to a numpy array @@ -131,7 +136,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, postprocess, **kwargs): # check model is savedmodel or not import tensorflow as tf - from neural_compressor.model.model import get_model_type + from neural_compressor.model.tensorflow_model import get_model_type tf.random.set_seed(1) self.model_type = get_model_type(model._model) optimizer = optimizer_tuple[0](**optimizer_tuple[1]) @@ -1210,7 +1215,7 @@ def inspect_tensor(self, model, dataloader=None, op_list=[], iteration_list=[], ] } """ - from neural_compressor.model.model import TensorflowBaseModel + from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.utils.utility import load_data_from_pkl, dump_data_to_local from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from .tf_utils.util import int8_node_name_reverse @@ -1366,6 +1371,12 @@ def get_optype_wise_ability(self): res[op[1]] = {'activation': {'dtype': ['bf16']}, 'weight': {'dtype': ['bf16']}} return res + def _pre_hook_for_qat(self, dataloader=None): + self.model.model = self.qat_convert(self.model.model) + + def _post_hook_for_qat(self): + pass + def _pre_eval_hook(self, model): return model @@ -1376,6 +1387,8 @@ def _post_eval_hook(self, model, **kwargs): def save(self, model, path): pass + # this function is used to convert keras QAT model to pb in old QAT implementation, + # and it's not used in refactored QAT def convert(self, model, source, destination): '''The function is used to convert a source model format to another. @@ -1422,6 +1435,31 @@ def convert(self, model, source, destination): return converter.convert() + def qat_convert(self, model, quantize_recipe=None): + """ + Convert a fp32 'tf.keras' model to be a int8 one with quantization aware training implementation. + + Args: + model (tf.keras.Model): The model to be quantized, expected to be a Keras Functional or Sequential model. + quantize_recipe (dict): A dict that decide whether given layers should be quantized. + + Returns: + converted_model (tf.keras.Model): Quantized model with fake quant nodes inserted. + """ + import tensorflow as tf + assert isinstance(model, tf.keras.Model), ("The model to be converted is expected to be " + "a `tf.keras.Model` instance. You should not pass an instance of type: {input}.".format( + input=model.__class__.__name__)) + + assert ( + model.__class__.__name__ in ['Functional', 'Sequential'] + ), "Only `Functional` or `Sequential` keras model is supported for QAT." + + from .tf_utils.quantize_graph.qat.quantize_helper import init_quantize_config, qat_clone_function + config = init_quantize_config(model, quantize_recipe) + q_model = tf.keras.models.clone_model(model, input_tensors=None, clone_function=qat_clone_function) + return q_model + @dump_elapsed_time("Pass recover model") def recover_tuned_model(self, model, q_config): """Execute the recover process on the specified model. @@ -1451,8 +1489,162 @@ def recover_tuned_model(self, model, q_config): def diagnosis_helper(self, fp32_model, quan_model, tune_cfg, save_path): from .tf_utils.util import tf_diagnosis_helper return tf_diagnosis_helper(fp32_model, quan_model, tune_cfg, save_path) + + def get_output_op_names(self, qmodel): + from .tf_utils.graph_util import GraphAnalyzer + + graph_def = GraphAnalyzer().parse_graph(qmodel.graph_def) + output_op_names = set() + + for output_opname in qmodel.output_node_names: + op_count = 0 + stack = [output_opname] + while stack: + opname = stack.pop() + while True: + op_count += 1 + if opname not in graph_def: + break + op = graph_def[opname] + if op.node.op == 'Dequantize': + output_op_names.add(opname) + break + next_opnames = op.node.input + if not next_opnames: + break + elif len(next_opnames) > 1: + stack += next_opnames[1:] + opname = next_opnames[0] + output_op_names = list(output_op_names) + logger.debug(f"output op names: {output_op_names}") + return output_op_names + + def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, + confidence_batches, fallback=True, requantize_cfgs=None): + """Compute the op sensitivity. + + The sensitivity metric is the mse between the output of the last quantized op of + the quantized model and the output of its corresponding op in the fp32 model. + + 1. Backup the tune cfg + 2. Fallback each int8 op and compute its mse if use fallback (with 'fallback == True'), + or re-quantize each fp32 op(fallen back in the previous stage) and compute its MSE if not. + 3. Sorted op name list according to its MSE + + Args: + fp32_model: The fp32 model. + dataloader: the dataloader with full dataset. + tune_cfg: tuning config + fallback: denote fallback stage or re-quantize stage + requantize_cfgs: the dict of tuning configs for all re-quantizable ops + + Returns: + A list of op names, sorted by its MSE sensitivity. + """ + from copy import deepcopy + + fp32_op_cfg = {'activation': {'dtype': 'fp32', 'quant_mode': 'fp32'}, + 'weight': {'dtype': 'fp32'}} + + if fallback: + ops_list = [op for op, config in tune_cfg['op'].items() + if config['activation']['quant_mode'] in ('static', 'dynamic')] + replace_cfgs = {op : fp32_op_cfg for op in tune_cfg['op']} + else: + ops_list = [op for op, config in tune_cfg['op'].items() + if config['activation']['quant_mode'] == 'fp32' and op in requantize_cfgs] + replace_cfgs = requantize_cfgs + + # Step2. compute mse + mse_result = self._get_mse_order( + model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, + output_op_names, confidence_batches) + + # Step3. sort + mse_order = [op for op, _ in sorted(mse_result.items(), key=lambda i: i[1])] + logger.debug("Dump MSE order:") + for op in mse_order: + logger.debug(f"{op}: {mse_result[op]}") + return mse_order + + def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, + output_op_names, confidence_batches): + op_cfg = tune_cfg['op'] + mse_result = {} + partial_dataloader = self._partial_dataloader(dataloader, confidence_batches) + + fp32_output = self._inference_model_on_batches( + fp32_model, tune_cfg, partial_dataloader, output_op_names) + + for op in ops_lst: + # backup and set replace tuning config + backup_cfg = op_cfg[op] + op_cfg[op] = replace_cfgs[op] + + # quantize and inference the model + q_model = self.quantize(tune_cfg, fp32_model, partial_dataloader) + q_output = self._inference_model_on_batches( + q_model, tune_cfg, partial_dataloader, output_op_names) + + mse_result[op] = self._calculate_mse(fp32_output, q_output) + + # recover tune_cfg + op_cfg[op] = backup_cfg + + return mse_result + + def _partial_dataset_of(self, dataloader, confidence_batches): + from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + from neural_compressor.data.datasets.dummy_dataset import DummyDataset as DummyDataset_v2_x + if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDataset_v2_x): + assert(isinstance(confidence_batches, int)) + ds = copy.deepcopy(dataloader.dataset) + ds.dataset = ds.dataset[:confidence_batches] + return ds + else: + return dataloader.dataset.take(confidence_batches) + + def _partial_dataloader(self, dataloader, confidence_batches): + return type(dataloader)( + dataset=self._partial_dataset_of(dataloader, confidence_batches), + batch_size=dataloader.batch_size, + last_batch=dataloader.last_batch, + collate_fn=dataloader.collate_fn, + sampler=dataloader.sampler, + batch_sampler=dataloader.batch_sampler, + num_workers=dataloader.num_workers, + pin_memory=dataloader.pin_memory, + shuffle=dataloader.shuffle, + distributed=dataloader.distributed) + + def _calculate_mse(self, fp32_output, q_output): + result = [] + for i, j in zip(fp32_output, q_output): + result.append(np.square(i - j).mean()) + return np.array(result).mean() + + def _inference_model_on_batches(self, model, tune_cfg, dataloader, + output_op_names): + from .tf_utils.util import generate_feed_dict + + input_tensors = model.input_tensor + output_tensors = [] + for op in output_op_names: + for tensor in model.graph.get_operation_by_name(op).outputs: + output_tensors.append(tensor) + + predictions = [] + for index, (inputs, _) in enumerate(dataloader): + feed_dict = generate_feed_dict(input_tensors, inputs) + + pred = model.sess.run(output_tensors, feed_dict) + for item in pred: + predictions.append(item) + + return predictions + @adaptor_registry class Tensorflow_ITEXAdaptor(TensorFlowAdaptor): def __init__(self, framework_specific_info): diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index 62524f544db..cbe91e7d016 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -16,7 +16,7 @@ --- - version: - name: ['2.11.0202242'] + name: ['2.11.0202242', '2.11.0202250'] precisions: names: int8, uint8, bf16, fp32 @@ -35,7 +35,7 @@ "Erf", "FusedBatchNormV2", "FusedBatchNormGradV2", "FusedBatchNormV3", "FusedBatchNormGradV3", "LeakyRelu", "LeakyReluGrad", "Mean", "Mul", "Sub", "Elu", "EluGrad", "FloorDiv", "_FusedBatchNormEx", "Log", "Log1p", "LogSoftmax", "Prod", "RealDiv", "Reciprocal", "Rsqrt", "Selu", "SeluGrad", "Sigmoid", "SigmoidGrad", "Softmax", "Softplus", "SoftplusGrad", "Softsign", - "SoftsignGrad", "Sqrt", "SquaredDifference", "Tanh", "TanhGrad", #infer_list + "SoftsignGrad", "Sqrt", "Square", "SquaredDifference", "Sum", "Tanh", "TanhGrad", "SparseSegmentSqrtN", # infer_list "Abs", "ArgMax","ArgMin","BatchToSpace","BatchToSpaceND","BroadcastTo","Ceil","CheckNumerics","ClipByValue","Concat","ConcatV2", "DepthToSpace","DynamicPartition","DynamicStitch","EnsureShape","Enter","Equal","Exit","ExpandDims","Fill","Floor","Gather", "GatherNd","GatherV2","Greater","GreaterEqual","Identity","IsFinite","IsInf","IsNan","Less","LessEqual","Max","Maximum","MaxPool", @@ -273,6 +273,10 @@ 'Dequantize + Conv2D + BiasAdd + LeakyRelu + Add + QuantizeV2', 'Dequantize + Conv2D + LeakyRelu + AddV2 + QuantizeV2', 'Dequantize + Conv2D + LeakyRelu + Add + QuantizeV2', + 'Dequantize + Conv2D + BiasAdd + Relu + AddV2 + QuantizeV2', + 'Dequantize + Conv2D + BiasAdd + Relu + Add + QuantizeV2', + 'Dequantize + Conv2D + Relu + AddV2 + QuantizeV2', + 'Dequantize + Conv2D + Relu + Add + QuantizeV2', 'Dequantize + Conv2D + Add + QuantizeV2', 'Dequantize + Conv2D + AddV2 + QuantizeV2', 'Dequantize + Conv2D + AddV2 + Add + QuantizeV2', diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 1b834b5ae46..1f359b41034 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -34,8 +34,9 @@ from .transform_graph.insert_logging import InsertLogging from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat from .transform_graph.bias_correction import BiasCorrection -from .util import iterator_sess_run,version1_gt_version2,version1_eq_version2 +from .util import generate_feed_dict, iterator_sess_run,version1_gt_version2,version1_eq_version2 from .util import version1_gte_version2,version1_lte_version2,version1_lt_version2 +from .util import TF_SPR_BASE_VERSIONS from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from .quantize_graph_common import QuantizeGraphHelper from .quantize_graph.qdq.optimize_qdq import OptimizeQDQGraph @@ -160,6 +161,10 @@ def _inference(self, model): Args: model(TensorflowBaseModel): input TensorflowBaseModel """ + # ITEX optimization has broken INC calibration process. + # INC needs turn off ITEX optimization pass in calibration stage. + # TODO ITEX will provide API to replace setting environment variable. + os.environ["ITEX_REMAPPER"] = "0" sess = model.sess iter_op = model.iter_op input_tensor = model.input_tensor @@ -220,24 +225,25 @@ def check_shape(tensor, data): return True disorder_tensors = [] - disorder_inputs = [] + disorder_inputs = [] for idx, sort_tensor in enumerate(input_tensor): sort_input = inputs[idx] if check_shape(sort_tensor, sort_input): - feed_dict.update({sort_tensor: sort_input}) + feed_dict.update({sort_tensor: sort_input}) else: disorder_tensors.append(sort_tensor) disorder_inputs.append(sort_input) for i, dis_tensor in enumerate(disorder_tensors): - for j, dis_input in enumerate(disorder_inputs): - if check_shape(dis_tensor, dis_input): - feed_dict.update({dis_tensor: dis_input}) - break + for j, dis_input in enumerate(disorder_inputs): + if check_shape(dis_tensor, dis_input): + feed_dict.update({dis_tensor: dis_input}) + break _ = sess.run(output_tensor, feed_dict) if iter_op==[] \ else iterator_sess_run(sess, iter_op, \ feed_dict, output_tensor, self.calib_iteration) if idx + 1 == self.calib_iteration: break + os.environ["ITEX_REMAPPER"] = "1" def _check_tf_version(self): is_supported_version = False @@ -263,7 +269,7 @@ def _check_tf_version(self): if version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): is_supported_version = True - if version1_eq_version2(tf.version.VERSION, '2.11.0202242'): + if tf.version.VERSION in TF_SPR_BASE_VERSIONS: is_supported_version = True is_sprbase_version = True @@ -346,7 +352,8 @@ def convert(self): model = self.bf16_convert() if self.new_api: - model.graph_def = FuseConvRedundantDequantizeTransformer(model.graph_def).do_transformation() + if self.performance_only: + model.graph_def = FuseConvRedundantDequantizeTransformer(model.graph_def).do_transformation() model.graph_def = FuseMatMulRedundantDequantizeTransformer(model.graph_def).do_transformation() post_cse_graph_def = PostCseOptimizer(model.graph_def).do_transformation() post_hostconst_graph_def = PostHostConstConverter(post_cse_graph_def).do_transformation() diff --git a/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py b/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py index f7006b2edbf..a795d5b19b4 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py @@ -44,7 +44,8 @@ from .graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer from .graph_rewriter.int8.meta_op_optimizer import MetaInfoChangingMemOpOptimizer from .graph_rewriter.int8.rnn_convert import QuantizedRNNConverter -from .util import version1_gte_version2,version1_gt_version2,version1_eq_version2, version1_lt_version2 +from .util import version1_gte_version2,version1_gt_version2,version1_eq_version2,version1_lt_version2 +from .util import TF_SPR_BASE_VERSIONS TF_SUPPORTED_MAX_VERSION = '2.11.0' TF_SUPPORTED_MIN_VERSION = '1.14.0' @@ -118,8 +119,8 @@ def _check_tf_version(self): if version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): is_supported_version = True - - if version1_eq_version2(tf.version.VERSION, '2.11.0202242'): + + if tf.version.VERSION in TF_SPR_BASE_VERSIONS: is_supported_version = True is_sprbase_version = True diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py index 1141674c276..99d529fec97 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py @@ -20,7 +20,7 @@ from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from tensorflow.python.framework import dtypes -from neural_compressor.adaptor.tf_utils.util import version1_eq_version2 +from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS class FuseGeluOptimizer(GraphRewriterBase): # pragma: no cover @@ -29,7 +29,7 @@ class FuseGeluOptimizer(GraphRewriterBase): # pragma: no cover def do_transformation(self): if not (tf.version.VERSION in ('1.15.0-up2','1.15.0-up3') or \ - version1_eq_version2(tf.version.VERSION, '2.11.0202242')): + tf.version.VERSION in TF_SPR_BASE_VERSIONS): return self.model cur_graph = GraphAnalyzer() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py index 042c89769d9..e5f1da798ca 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py @@ -45,6 +45,7 @@ def do_transformation(self): target_nodes = cur_graph.query_fusion_pattern_nodes( [["Pad"], ["Conv2D", "Conv3D", "DepthwiseConv2dNative"], ('BiasAdd', 'Add', 'AddV2')]) + padding_tensor_dict = {} for node_combination in target_nodes: conv_name = node_combination[1] @@ -70,21 +71,26 @@ def do_transformation(self): continue padding_tensor = None - pad_node = graph_info[node_combination[0]].node - if graph_info[pad_node.input[1]].node.op != 'Const': - input_node = graph_info[pad_node.input[1]].node - if input_node.op == 'DataFormatVecPermute': - parent_input_node = graph_info[input_node.input[0]].node - if parent_input_node.op == 'Const': - padding_tensor = tensor_util.MakeNdarray( \ - parent_input_node.attr["value"].tensor).flatten() + pad_node = None + if node_combination[0] not in padding_tensor_dict: + pad_node = graph_info[node_combination[0]].node + if graph_info[pad_node.input[1]].node.op != 'Const': + input_node = graph_info[pad_node.input[1]].node + if input_node.op == 'DataFormatVecPermute': + parent_input_node = graph_info[input_node.input[0]].node + if parent_input_node.op == 'Const': + padding_tensor = tensor_util.MakeNdarray( \ + parent_input_node.attr["value"].tensor).flatten() + else: + continue else: continue else: - continue + padding_tensor = tensor_util.MakeNdarray( + graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + padding_tensor_dict[node_combination[0]] = padding_tensor else: - padding_tensor = tensor_util.MakeNdarray( - graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + padding_tensor = padding_tensor_dict[node_combination[0]] if self.itex_qdq_mode: enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or \ @@ -95,12 +101,13 @@ def do_transformation(self): if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover continue - if graph_info[pad_node.input[1]].node.op != 'Const': - cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) - cur_graph.remove_node_with_single_input_output(pad_node.name) - else: - cur_graph.remove_node_with_single_input_output(pad_node.name) - cur_graph.remove_node(pad_node.input[1]) + if pad_node: + if graph_info[pad_node.input[1]].node.op != 'Const': + cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) + cur_graph.remove_node_with_single_input_output(pad_node.name) + else: + cur_graph.remove_node_with_single_input_output(pad_node.name) + cur_graph.remove_node(pad_node.input[1]) conv_node = graph_info[node_combination[1]].node if self.itex_qdq_mode: if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py index 8b63b17ff31..2866a40ec04 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py @@ -46,6 +46,7 @@ def do_transformation(self): target_nodes = cur_graph.query_fusion_pattern_nodes( [["Pad"], ["Conv2D", "DepthwiseConv2dNative"], ('BiasAdd', 'Add', 'AddV2')]) + padding_tensor_dict = {} for node_combination in target_nodes: conv_name = node_combination[1] @@ -71,21 +72,26 @@ def do_transformation(self): continue padding_tensor = None - pad_node = graph_info[node_combination[0]].node - if graph_info[pad_node.input[1]].node.op != 'Const': - input_node = graph_info[pad_node.input[1]].node - if input_node.op == 'DataFormatVecPermute': - parent_input_node = graph_info[input_node.input[0]].node - if parent_input_node.op == 'Const': - padding_tensor = tensor_util.MakeNdarray( \ - parent_input_node.attr["value"].tensor).flatten() + pad_node = None + if node_combination[0] not in padding_tensor_dict: + pad_node = graph_info[node_combination[0]].node + if graph_info[pad_node.input[1]].node.op != 'Const': + input_node = graph_info[pad_node.input[1]].node + if input_node.op == 'DataFormatVecPermute': + parent_input_node = graph_info[input_node.input[0]].node + if parent_input_node.op == 'Const': + padding_tensor = tensor_util.MakeNdarray( \ + parent_input_node.attr["value"].tensor).flatten() + else: + continue else: continue else: - continue + padding_tensor = tensor_util.MakeNdarray( + graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + padding_tensor_dict[node_combination[0]] = padding_tensor else: - padding_tensor = tensor_util.MakeNdarray( - graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + padding_tensor = padding_tensor_dict[node_combination[0]] if self.itex_qdq_mode: enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or \ @@ -95,12 +101,14 @@ def do_transformation(self): if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover continue - if graph_info[pad_node.input[1]].node.op != 'Const': - cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) - cur_graph.remove_node_with_single_input_output(pad_node.name) - else: - cur_graph.remove_node_with_single_input_output(pad_node.name) - cur_graph.remove_node(pad_node.input[1]) + + if pad_node: + if graph_info[pad_node.input[1]].node.op != 'Const': + cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) + cur_graph.remove_node_with_single_input_output(pad_node.name) + else: + cur_graph.remove_node_with_single_input_output(pad_node.name) + cur_graph.remove_node(pad_node.input[1]) conv_node = graph_info[node_combination[1]].node # Helper.set_attr_int_list(conv_node, "padding_list", padding_tensor) # only when padding attr is explicit, the explicit_paddings is not empty diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py index 4b59de5ecb7..d7c2e33ca83 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py @@ -146,16 +146,16 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def = ConvertPlaceholderToConst(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = RemoveTrainingNodesOptimizer( - self._tmp_graph_def, protected_nodes=input_output_names).do_transformation() - self._tmp_graph_def = SwitchOptimizer(self._tmp_graph_def).do_transformation() + self._tmp_graph_def = GrapplerOptimizer( + self._tmp_graph_def, input_output_names, self.optimization).do_transformation() + self._tmp_graph_def = StripUnusedNodesOptimizer(self._tmp_graph_def, input_node_names, output_node_names).do_transformation() - self._tmp_graph_def = GrapplerOptimizer( - self._tmp_graph_def, input_output_names, self.optimization).do_transformation() + self._tmp_graph_def = RemoveTrainingNodesOptimizer( + self._tmp_graph_def, protected_nodes=input_output_names).do_transformation() self._tmp_graph_def = SplitSharedInputOptimizer(self._tmp_graph_def).do_transformation() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py index 04159144253..a8b793729f4 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py @@ -33,6 +33,14 @@ class FuseConvRedundantDequantizeTransformer(GraphRewriterBase): "_FusedQuantizedDeconv3D" ], ['Dequantize']] + fuse_sum_op_types_str = ( + str([b'BiasAdd', b'Sum', b'Requantize']), + str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']), + str([b'BiasAdd', b'Sum', b'LeakyRelu', b'Requantize']), + str([b'BiasAdd', b'Relu', b'Sum', b'Requantize']), + str([b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize']) + ) + def __init__(self, model, device='cpu'): super().__init__(model) self.device = device @@ -52,7 +60,7 @@ def do_transformation(self): dtypes.float32.as_datatype_enum: dtypes.float32, dtypes.qint32.as_datatype_enum: dtypes.qint32, dtypes.bfloat16.as_datatype_enum: dtypes.bfloat16 - } + } target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(self.fuse_patterns) for i in target_nodes: @@ -64,9 +72,9 @@ def do_transformation(self): if len(self.graph_info[quantized_node_name].outputs) > 3: continue - # QuantizedConv only supports {"Dequantize"} and {"BiasAdd", "Dequantize"} - if str(quantized_node.attr['fused_ops'].list.s) != str([b"BiasAdd", b"Requantize"]) and \ - str(quantized_node.attr['fused_ops'].list.s) != str([b"Requantize"]): + # QuantizedConv doesn't support {"BiasAdd", "Sum", "Activation", "Dequantize"}, + # {"BiasAdd", "Activation", "Sum", "Dequantize"} and {"BiasAdd", "Sum", "Dequantize"} + if str(quantized_node.attr['fused_ops'].list.s) in self.fuse_sum_op_types_str: continue new_node = node_def_pb2.NodeDef() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py index 02c55233af0..d2c7703184e 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py @@ -593,8 +593,6 @@ def do_transformation(self): self.fused_ops = [b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize'] elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Sum']): self.fused_ops = [b'BiasAdd', b'Relu', b'Sum', b'Requantize'] - elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'LeakyRelu', b'Sum']): - self.fused_ops = [b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize'] #Current fusion requires summand has same dtype as output if output is qint8 Helper.set_attr_dtype(new_node, "Tsummand", \ dtype_map_dict[requantize_node.attr['out_type'].type]) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py index d13060cd5c0..19c624e2860 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py @@ -50,6 +50,15 @@ def do_transformation(self): dequantize_node_name = i[1] dequantize_node = self.graph_info[dequantize_node_name].node + if len(self.graph_info[quantized_node_name].outputs) > 3: + need_drop = False + for output in self.graph_info[quantized_node_name].outputs: + if self.graph_info[output].node.op != 'Dequantize': + need_drop = True + break + if need_drop: + continue + # ignore shared output case for license-plate-recognition-barrier-0007 model if len(self.graph_info[dequantize_node_name].outputs) == 2 and \ self.graph_info[self.graph_info[dequantize_node_name].outputs[0]].node.op == 'Reshape' and \ diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py index 62269322bc5..14020b7cc12 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py @@ -182,7 +182,7 @@ def _check_op_list(self, node_type): "MaxPool", "MaxPool3D", "FusedBatchNormV3", "Requantize", "RequantizePerChannel", "AvgPool", "Pad", "CropAndResize", "Dequantize", "Mean", "MatMul", "BatchMatMul", "BatchMatMulV2", "FakeQuantWithMinMaxVars", "_MklFusedInstanceNorm", - "Conv2DBackpropInput", "Conv3DBackpropInputV2") + "Conv2DBackpropInput", "Conv3DBackpropInputV2", "Sigmoid", "BiasAdd") return any([node_type.find(i) != -1 for i in op_list]) def _find_relu_node(self, node): @@ -198,7 +198,7 @@ def _find_relu_node(self, node): or len(self.node_name_mapping \ [Helper.node_name_from_input(node.input[0])].output) > 1): return True - elif 'T' in node.attr and node.attr['T'].type in (dtypes.quint8, dtypes.uint8): + elif 'T' in node.attr and dtypes.DType(node.attr['T'].type) in (dtypes.quint8, dtypes.uint8): return True elif (node.op.find("QuantizedConv") != -1 or node.op.find("QuantizedDepthwiseConv") != -1 or @@ -568,28 +568,43 @@ def _insert_qdq_pattern_for_weight_node(self, self.g_weight.add_node(reshape_3to4_node, dequant_node.name, [computational_node.name]) computational_node.input[1] = reshape_3to4_node.name else: - if weight_node.name in self.g.parent_frame_details and self.g.parent_frame_details[weight_node.name]: + if computational_node.name in self.g.parent_frame_details and \ + self.g.parent_frame_details[computational_node.name]: + weight_enter_node = Helper.create_node('Enter', \ + weight_node.name + '_enter', [weight_node.name]) + Helper.set_attr_string(weight_enter_node, 'frame_name', + self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) + Helper.set_attr_dtype(weight_enter_node, 'T', dtypes.float32) + Helper.set_attr_bool(weight_enter_node, 'is_constant', True) + Helper.set_attr_int(weight_enter_node, 'parallel_iterations', \ + self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) + min_enter_node = Helper.create_node('Enter', min_name + '_enter', [min_name]) Helper.set_attr_string(min_enter_node, 'frame_name', - self.g.parent_frame_details[weight_node.name].attr['frame_name'].s) + self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) Helper.set_attr_dtype(min_enter_node, 'T', dtypes.float32) Helper.set_attr_bool(min_enter_node, 'is_constant', True) Helper.set_attr_int(min_enter_node, 'parallel_iterations', \ - self.g.parent_frame_details[weight_node.name].attr['parallel_iterations'].i) + self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) max_enter_node = Helper.create_node('Enter', max_name + '_enter', [max_name]) Helper.set_attr_string(max_enter_node, 'frame_name', - self.g.parent_frame_details[weight_node.name].attr['frame_name'].s) + self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) Helper.set_attr_dtype(max_enter_node, 'T', dtypes.float32) Helper.set_attr_bool(max_enter_node, 'is_constant', True) Helper.set_attr_int(max_enter_node, 'parallel_iterations',\ - self.g.parent_frame_details[weight_node.name].attr['parallel_iterations'].i) + self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) self.g_weight.add_node(quant_node, weight_name, []) self.g_weight.add_node(min_node, None, [min_enter_node.name]) self.g_weight.add_node(max_node, None, [max_enter_node.name]) self.g_weight.add_node(min_enter_node, min_node.name, [quant_node.name]) self.g_weight.add_node(max_enter_node, max_node.name, [quant_node.name]) + self.g_weight.add_node(weight_enter_node, weight_node.name, [quant_node.name]) + quant_node.input[0] = weight_enter_node.name + quant_node.input[1] = min_enter_node.name + quant_node.input[2] = max_enter_node.name + self.g_weight.add_node(quant_node, weight_enter_node.name, []) self.g_weight.add_node(dequant_node, quant_node.name, [computational_node.name]) computational_node.input[1] = dequant_node.name else: diff --git a/neural_compressor/adaptor/tf_utils/graph_util.py b/neural_compressor/adaptor/tf_utils/graph_util.py index 77903d4b62c..d810f1d87a1 100644 --- a/neural_compressor/adaptor/tf_utils/graph_util.py +++ b/neural_compressor/adaptor/tf_utils/graph_util.py @@ -918,11 +918,13 @@ def gen_per_iter(data): if i.startswith(first_line): iterations += 1 - step = len(valid_data) / iterations + step = int(len(valid_data) / iterations) final_res = [] for i in range(iterations): final_res.extend(gen_per_iter(valid_data[int(i*step): int(step*( i+ 1))])) + if i + 1 == iterations and int(step*( i+ 1)) < len(valid_data): + final_res.extend(gen_per_iter(valid_data[int(step*( i+ 1)): len(valid_data)])) return final_res diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py new file mode 100644 index 00000000000..369707c0ef6 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py new file mode 100644 index 00000000000..ffa016a1888 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import six +import tensorflow as tf + +@six.add_metaclass(abc.ABCMeta) +class FakeQuantizeBase(object): + """ABC interface class for applying fake quantization by insert qdq.""" + + @abc.abstractmethod + def __call__(self, inputs, range, training, **kwargs): + """Apply quantization to the input tensor. + This is the main logic of the 'FakeQuantize' which implements the core logic + to quantize the tensor. It is invoked during the `call` stage of the layer, + and allows modifying the tensors used in graph construction. + + Args: + inputs (tf.Tensor): Input tensor to be quantized. + range (dict): The min-max range of input tensor. + training (bool): Whether the graph is currently training. + **kwargs: Additional variables which may be passed to the FakeQuantize class. + + Returns: + output (tf.Tensor): The tensor to be quantized. + """ + raise NotImplementedError + + @abc.abstractmethod + def get_config(self): + """Returns the config used to serialize the 'FakeQuantize'.""" + raise NotImplementedError('FakeQuantize should implement get_config().') + + @classmethod + def from_config(cls, config): + """Instantiates a 'FakeQuantize' from its config. + + Args: + config (dict): A dict containing required information. + + Returns: + output (FakeQuantize): A 'FakeQuantize' instance. + """ + return cls(**config) + +class FakeQuantize(FakeQuantizeBase): + """The class that applies fake quantization.""" + + def __init__( + self, + per_channel=False, + num_bits=8, + channel_axis=-1, + symmetric=True, + narrow_range=True + ): + """Initialize a FakeQuantize class. + + Args: + per_channel (bool): Whether to apply per_channel quantization. The last dimension is + used as the channel. + num_bits (int): Number of bits for quantization + symmetric (bool): If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. + narrow_range (bool): In case of 8 bits, narrow_range nudges the quantized range + to be [-127, 127] instead of [-128, 127]. This ensures symmetric range + has 0 as the centre. + """ + self.num_bits = num_bits + self.per_channel = per_channel + self.symmetric = symmetric + self.narrow_range = narrow_range + self.channel_axis = channel_axis + self.name_prefix = 'FakeQuantize' + + def __call__(self, inputs, ranges, training, **kwargs): + """Applying fake quantization by insert qdq. + The quantized tensor is calculated based on range of the last batch of values. + + Args: + inputs (tf.Tensor): Input tensor to be quantized. + range (dict): The min-max range of input tensor. + training (bool): Whether the graph is currently training. + **kwargs: Additional variables which may be passed to the FakeQuantize class. + + Returns: + output (tf.Tensor): The tensor to be quantized. + """ + with tf.name_scope(self.name_prefix): + input_shape = inputs.get_shape() + input_dim = len(input_shape) + if self.channel_axis == -1: + self.channel_axis += input_dim + + if not training: + return self._insert_qdq(inputs, ranges["min_var"], ranges["max_var"]) + + if self.per_channel: + if input_dim == 2: + reduce_dims = [0] + elif input_dim == 4: + reduce_dims = [i for i in range(input_dim) if i != self.channel_axis] + + if self.per_channel: + if input_dim >= 2: + batch_min = tf.math.reduce_min( + inputs, axis=reduce_dims, name="BatchMin" + ) + else: + batch_min = inputs + else: + batch_min = tf.math.reduce_min(inputs, name="BatchMin") + + if self.per_channel: + if input_dim >= 2: + batch_max = tf.math.reduce_max( + inputs, axis=reduce_dims, name="BatchMax" + ) + else: + batch_max = inputs + else: + batch_max = tf.math.reduce_max(inputs, name="BatchMax") + + if self.symmetric: + if self.narrow_range: + min_max_ratio = -1 + else: + min_max_ratio = -((1 << self.num_bits) - 2) / (1 << self.num_bits) + + range_min = tf.math.minimum(batch_min, batch_max / min_max_ratio) + range_max = tf.math.maximum(batch_max, batch_min * min_max_ratio) + else: + range_min = tf.math.minimum(batch_min, 0.0) + range_max = tf.math.maximum(batch_max, 0.0) + + assign_min = ranges["min_var"].assign(range_min, name="AssignMinLast") + assign_max = ranges["max_var"].assign(range_max, name="AssignMaxLast") + + return self._insert_qdq(inputs, assign_min, assign_max) + + def _insert_qdq(self, inputs, min_var, max_var): + """Adds a fake quantization operation. + Depending on value of self.per_channel, this operation may do global quantization + or per channel quantization. min_var and max_var should have corresponding + shapes: [1] when per_channel == False and [d] when per_channel == True. + + Args: + inputs (tf.Tensor): A tensor containing values to be quantized. + min_var (tf.Variable): A variable containing quantization range lower end(s). + max_var (tf.Variable): A variable containing quantization range upper end(s). + + Returns: + outputs (tf.Tensor): A tensor containing quantized values. + """ + if self.per_channel: + + return tf.quantization.quantize_and_dequantize_v2( + inputs, + min_var, + max_var, + num_bits=self.num_bits, + narrow_range=self.narrow_range, + axis=self.channel_axis, + range_given=True, + ) + else: + assert min_var.get_shape() == [] + assert max_var.get_shape() == [] + + return tf.quantization.quantize_and_dequantize_v2( + inputs, + min_var, + max_var, + num_bits=self.num_bits, + narrow_range=self.narrow_range, + range_given=True, + ) + + def get_config(self): + """Returns the config used to serialize the 'FakeQuantize'. + + Returns: + config (dict): A dict containing required information. + """ + return { + 'num_bits': self.num_bits, + 'per_channel': self.per_channel, + 'symmetric': self.symmetric, + 'narrow_range': self.narrow_range + } + + def __eq__(self, other): + """Check if this instance is equal to another instance. + + Args: + other (FakeQuantize): Another instance to be checked. + + Returns: + is_equal (bool): If the two instances are equal. + """ + if not isinstance(other, FakeQuantize): + return False + + return (self.num_bits == other.num_bits and + self.per_channel == other.per_channel and + self.symmetric == other.symmetric and + self.narrow_range == other.narrow_range) + + def __ne__(self, other): + """Check if this instance is not equal to another instance. + + Args: + other (FakeQuantize): Another instance to be checked. + + Returns: + not_equal (bool): If the two instances are not equal. + """ + return not self.__eq__(other) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py new file mode 100644 index 00000000000..278f79c28ac --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +global_config = {} +logger = logging.getLogger("neural_compressor") + +class QuantizeConfig(): + """Class for building custom quantize config. + There should be only one QuantizeConfig instance for global setting. + """ + + def __new__(cls): + """Created a QuantizeConfig instance and add it to the global_config dict. + + Returns: + instance (QuantizeConfig) : The created QuantizeConfig instance. + """ + instance = super().__new__(cls) + global_config['quantize_config'] = instance + return instance + + def __init__(self): + """Initialize QuantizeConfig instance.""" + self.quantize_recipe = {} + self.model_name = None + + def add_quantize_recipe(self, quantize_recipe): + """Add custom recipe for quantization to the QuantizeConfig instance. + + Args: + quantize_recipe (dict): A dict that decide whether given layers should be quantized. + A typical quantize_recipe will be a dict of layer_name and + dict as key-value pairs. In each value dict, there should be + a {'quantize': bool} key-value pair and a {'index': list} pair. + The latter one is used to decide which inputs should be quantized + in some layers with multiple inputs. + For example: + {'conv5_block3_3_conv': {'quantize': Flase} + 'conv5_block3_3_add' : {'quantize': True, 'index': [1, 3]} + } + """ + self.quantize_recipe.update(quantize_recipe) + + def query_layer(self, layer_name): + """Query if a specific layer is in the quantize_recipe dict. + + Args: + layer_name (string): The input layer name. + Returns: + layer_recipe (dict): The quantize recipe for this input layer. + """ + if layer_name in self.quantize_recipe: + return self.quantize_recipe[layer_name] + return {} + + def remove_layer(self, layer_name): + """Remove a specific layer from the quantize_recipe dict. + + Args: + layer_name (string): The name of layer to be removed. + """ + if layer_name in self.quantize_recipe: + del self.quantize_recipe[layer_name] + + def remove_layers(self, layer_names): + """Remove a batch of layers from the quantize_recipe dict. + + Args: + layers_names (List): The names of layers to be removed. + """ + for layer_name in layer_names: + self.remove_layer(layer_name) + + def get_quantize_recipe(self): + """Get the current recipe dict for quantization. + + Returns: + quantize_recipe (dict): A dict that decide whether given layers should be quantized. + """ + return self.quantize_recipe + + def is_empty(self): + """Check if the recipe of quantization is an empty dict. + + Returns: + is_empty (bool): True if no custom recipe is updated to this class. + """ + if self.quantize_recipe: + return False + return True + + def clear_quantize_recipe(self): + """Clear recipe of quantization to be an empty dict.""" + self.quantize_recipe.clear() + +layer_wise_config = { + 'quantize_layers': {'Conv2D', 'Dense', 'DepthwiseConv2D', 'MaxPooling2D', + 'AveragePooling2D', 'GlobalAveragePooling2D'}, + 'possible_quantize_layers': {'Multiply', 'Concatenate', 'Add', 'BatchNormalization'}, + 'weighted_layers': {'Conv2D', 'Dense', 'DepthwiseConv2D'}, + 'multiple_inputs_layers': {'Multiply', 'Concatenate', 'Add'} +} + diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py new file mode 100644 index 00000000000..26faf2ada1e --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .quantize_wrapper import QuantizeWrapper +from .quantize_layers.optimize_layer import config_quantizable_layers +from .quantize_config import layer_wise_config, global_config, QuantizeConfig + +def init_quantize_config(model, quantize_recipe=None): + """Initialize quantization config at the beginning of QAT process. + + Args: + model_name (string): Special pre-optimized model name. + quantize_recipe (dict): A dict that decide whether given layers should be quantized. + + Returns: + config (QuantizeConfig): QuantizeConfig instance used to decide whether a specific layer + should be quantized. + """ + assert 'quantize_config' not in global_config, ("quantize_config has been unexpectedly" + "created. Please check your QAT workflow") + + config = QuantizeConfig() + config_quantizable_layers(model) + + if quantize_recipe: + config.add_quantize_recipe(quantize_recipe) + + return config + +def _is_quantizable_layer(layer): + """Query if the input layer should be quantized. + + Args: + layer (tf.keras.layers.Layer): input Keras layer + + Returns: + capability (bool): whether the input layer is capable of quantization. + """ + quantizable = True + layer_class = layer.__class__.__name__ + + quantize_config = global_config['quantize_config'] + specific_layer_config = quantize_config.query_layer(layer) + if specific_layer_config: + # the layer is set to be unquantizable by QuantizeConfig + if not specific_layer_config['quantize']: + return False + else: + if layer_class in layer_wise_config['quantize_layers'] or \ + layer_class in layer_wise_config['possible_quantize_layers']: + return True + + if layer_class not in layer_wise_config['quantize_layers']: + quantizable = False + + return quantizable + +def qat_clone_function(layer): + """Wrap or leave given layer based on quantize config object parameters. + + Args: + layer (tf.keras.layers.Layer): input Keras layer + + Returns: + wrapped_layer (QuantizeWrapper): layer wrapped by QuantizeWrapper class. + """ + wrapped_layer= layer + if _is_quantizable_layer(layer): + wrapped_layer = QuantizeWrapper(layer) + + return wrapped_layer \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py new file mode 100644 index 00000000000..369707c0ef6 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py new file mode 100644 index 00000000000..5d5a87083a6 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .quantize_layer_add import QuantizeLayerAdd +from .quantize_layer_bn import QuantizeLayerBatchNormalization + +def config_quantizable_layers(model): + quantize_layer_mapping = { + 'Add': QuantizeLayerAdd, + 'BatchNormalization': QuantizeLayerBatchNormalization + } + + for layer_class, quantize_layer in quantize_layer_mapping.items(): + quantize_layer_mapping[layer_class] = quantize_layer() + + for layer in model.layers: + if layer.__class__.__name__ in quantize_layer_mapping: + quantize_layer_mapping[layer.__class__.__name__](layer) \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py new file mode 100644 index 00000000000..f2e413d8b16 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from .quantize_layer_base import QuantizeLayerBase + +logger = logging.getLogger("neural_compressor") + +class QuantizeLayerAdd(QuantizeLayerBase): # pragma: no cover + """The class for quantization of Add.""" + + def __init__(self): + """Initialize QuantizeLayerAdd class.""" + self.quantize_patterns = [ + ['Conv', 'BatchNorm', 'Add'], + ['Conv', 'BatchNorm', 'Activation', 'Add'], + ['Conv', 'BatchNorm', 'Activation', 'Dropout', 'Add'] + ] + + super().__init__() + + def _quantizable_add(self): + """Check if the input layer meets criteria of quantization. + + Args: + layer (tf.keras.layers.Layer): The input layer. + + Returns: + quantizable (bool): If this layer should be quantized. + """ + input_layer = self._find_input_layers(self.layer) + if len(input_layer) == 1: + logger.warning("The layer 'Add' should have more than one input. " + "You input a model with layer {} which has only one input".format(self.layer.name)) + return False + + return True + + def __call__(self, layer): + """The main logic of QuantizeLayerAdd. + Neural Compressor will enumerate all layers of the input model to check + if there are any layer meeting the criteria. The choosen ones will be marked + as quantizable by QuantizeConfig. + + Args: + layer (tf.keras.layers.Layer): The keras layer to be estimated. + """ + self.layer = layer + if self._quantizable_add(): + input_layers = self._find_input_layers(self.layer) + fused_conv_index = None + for i, input_layer in enumerate(input_layers): + # Check that the input is a Conv pattern + if 'Conv' in input_layer.__class__.__name__ or self._find_patterns(input_layer): + if hasattr(input_layer, 'outbound_nodes') and \ + len(getattr(input_layer, 'outbound_nodes')) == 1: + fused_conv_index = i + break + + input_indexes = [i for i in range(0, len(input_layers))] + if fused_conv_index: + del input_indexes[fused_conv_index] + + self.quantize_config.add_quantize_recipe({self.layer.name: {'quantize': True, + 'index': input_indexes}}) \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py new file mode 100644 index 00000000000..e57970703c7 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..quantize_config import global_config + +class QuantizeLayerBase(): # pragma: no cover + + def __init__(self): + """Initialize QuantizeLayerBase class.""" + self.quantize_patterns = [] + assert 'quantize_config' in global_config, \ + "QuantizeConfig is not correctly created." + self.quantize_config = global_config['quantize_config'] + + def _find_input_layers(self, layer): + """Find all inputs of a specific layer. + + Args: + layer (tf.keras.layers.Layer): The target keras layer that this method + is to find its input layers. + + Returns: + input_layers (list): List of input layers found by this method. + """ + input_layers = [] + if isinstance(layer.input, list): + for input_tensor in layer.input: + input_layer = input_tensor._keras_history.layer + input_layers.append(input_layer) + else: + input_layer = layer.input._keras_history.layer + input_layers.append(input_layer) + return input_layers + + def _find_patterns(self, layer): + """ Checks if the input layer can satisfy the patterns. + + Args: + layer (tf.keras.layers.Layer): The input keras layer that this method + is to find patterns. + + Returns: + valid_patterns (bool): If the input layer can satisfy any pattern. + """ + if not self.quantize_patterns: + return False + + for quantize_pattern in self.quantize_patterns: + index = len(quantize_pattern) - 2 + previous_layer = layer + while(index >= 0): + previous_layer = self._find_input_layers(previous_layer) + if quantize_pattern[index] not in previous_layer.__class__.__name__: + break + index -= 1 + if index == -1: + return True + + return False + + def __call__(self, layer): + """The main logic of QuantizeLayerBase. + Neural Compressor will enumerate all layers of the input model to check + if there are any layer meeting the criteria. The choosen ones will be marked + as quantizable by QuantizeConfig. + + Args: + layer (tf.keras.layers.Layer): The keras layer to be estimated. + """ + raise NotImplementedError() diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py new file mode 100644 index 00000000000..840e91addb5 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .quantize_layer_base import QuantizeLayerBase + +class QuantizeLayerBatchNormalization(QuantizeLayerBase): # pragma: no cover + """The class for quantization of BatchNormalization.""" + + def __init__(self): + """Initialize QuantizeLayerBatchNormalization class.""" + super().__init__() + + def _quantizable_bn(self): + """Check if the input layer meets criteria of quantization. + + Args: + layer (tf.keras.layers.Layer): The input layer. + + Returns: + quantizable (bool): If this layer should be quantized. + """ + input_layer = self._find_input_layers(self.layer) + assert len(input_layer) == 1, "BatchNormalization only has one input." + input_layer_class = input_layer.__class__.__name__ + if 'Conv' not in input_layer_class: + return True + + return False + + def __call__(self, layer): + """The main logic of QuantizeLayerBatchNormalization. + Neural Compressor will enumerate all layers of the input model to check + if there are any layer meeting the criteria. The choosen ones will be marked + as quantizable by QuantizeConfig. + + Args: + layer (tf.keras.layers.Layer): The keras layer to be estimated. + """ + self.layer = layer + if self._quantizable_bn(): + self.quantize_config.add_quantize_recipe({self.layer.name: {'quantize': True}}) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py new file mode 100644 index 00000000000..d05f47e9300 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from abc import abstractmethod +from .fake_quantize import FakeQuantize +from tensorflow.python.util import tf_inspect +from .quantize_config import layer_wise_config, global_config + +class QuantizeWrapperBase(tf.keras.layers.Wrapper): + """Base class for quantize wrapper""" + + def __init__(self, layer, **kwargs): + """Create a quantize wrapper for a keras layer. + This wrapper provides options to quantize inputs and weights of the layer. + + Args: + layer (tf.keras.layers.Layer): The keras layer to be wrapped. + **kwargs: Additional keyword arguments to be passed. + """ + assert layer is not None, "'layer' should not be None." + + assert isinstance(layer, tf.keras.layers.Layer) or isinstance(layer, + tf.keras.Model),("'layer' can only be a 'tf.keras.layers.Layer' instance." + " You passed an instance of type: {input}.".format(input=layer.__class__.__name__)) + + if "name" not in kwargs: + kwargs["name"] = self._make_layer_name(layer) + + super(QuantizeWrapperBase, self).__init__(layer, **kwargs) + + self.index = None + self._layer_class = layer.__class__.__name__ + self._track_trackable(layer, name="layer") + + @staticmethod + def _make_layer_name(layer): + """Modify the layer name to be quantized layer.""" + return "{}_{}".format("quant", layer.name) + + @staticmethod + def _weight_name(name): + """Extracts the weight name from the full TensorFlow variable name. + For example, returns 'kernel' for 'dense_2/kernel:0'. + + Args: + name (string): TensorFlow variable name. + + Returns: + weight_name (string): Extracted weight name. + """ + return name.split(":")[0].split("/")[-1] + + def build(self, input_shape): + """Creates the variables of the layer. + + Args: + input_shape (tf.TensorShape or list): shapes of input tensors + """ + super(QuantizeWrapperBase, self).build(input_shape) + + self.optimizer_step = self.add_weight( + "optimizer_step", + initializer=tf.keras.initializers.Constant(-1), + dtype=tf.dtypes.int32, + trainable=False, + ) + + def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + This method will cause the layer's state to be built, if that has not + happened before. This requires that the layer will later be used with + inputs that match the input shape provided here. + + Args: + input_shape (tuple of integers or tf.TensorShape): input shape of the layer. + + Returns: + output_shape(tf.TensorShape) : output shape of the layer. + """ + return self.layer.compute_output_shape(self.layer.input_shape) + + def _init_min_max_variables(self, name, shape): + """Initialize the minimum and maximum values of variables to the wrapped layer. + + Args: + name (string): Name prefix of the variables. + shape (tf.TensorShape): shape of variables to be added. + + Returns: + min_variable (tf.Variable) : The initialized minimum value of given variables. + min_variable (tf.Variable) : The initialized maximum value of given variables. + """ + min_variable = self.layer.add_weight( + name + "_min", + shape = (shape), + trainable = False, + initializer = tf.keras.initializers.Constant(-6.0), + ) + max_variable = self.layer.add_weight( + name + "_max", + shape = (shape), + trainable = False, + initializer = tf.keras.initializers.Constant(6.0), + ) + + return min_variable, max_variable + + def query_input_index(self): + """Query QuantizeConfig to check if there is any designated input index for this layer.""" + quantize_config = global_config['quantize_config'] + custom_layer_config = quantize_config.query_layer(self.layer) + if custom_layer_config and 'index' in custom_layer_config: + self.index = custom_layer_config['index'] + + @abstractmethod + def call(self, inputs, training=None): + """This is where the quantize wrapper's logic lives. + + Args: + inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer. + + Returns: + outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer. + """ + raise NotImplementedError + + def get_config(self): + """Get the config of the quantize wrapper. + + Returns: + config (dict): dict of wrapper config. + """ + base_config = super(QuantizeWrapperBase, self).get_config() + config = {"quantize_config": None} + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + """Creates a quantize wrapper instance from its config. + + Args: + config (dict): A Python dictionary, typically the output of get_config. + + Returns: + output_obj: (QuantizeWrapperBase): A quantize wrapper instance. + """ + config = config.copy() + quantize_config = tf.keras.utils.deserialize_keras_object( + config.pop("quantize_config"), module_objects=globals(), custom_objects=None + ) + + layer = tf.keras.layers.deserialize(config.pop("layer")) + + return cls(layer=layer, quantize_config=quantize_config, **config) + + @property + def trainable(self): + """Get trainable attribute for the layer and its sublayers.""" + return self.layer.trainable + + @trainable.setter + def trainable(self, value): + """Set trainable attribute for the layer and its sublayers. + + Args: + value (Boolean): The desired state for the layer's trainable attribute. + """ + self.layer.trainable = value + + @property + def trainable_weights(self): + """List of all trainable weights tracked by this layer. + Trainable weights are updated via gradient descent during training. + + Returns: + trainable_weights (list): A list of trainable variables. + """ + return self.layer.trainable_weights + self._trainable_weights + + @property + def non_trainable_weights(self): + """List of all non-trainable weights tracked by this layer. + Non-trainable weights are *not* updated during training. They are + expected to be updated manually in `call()`. + + Returns: + non_trainable_weights (list): A list of non-trainable variables. + """ + return self.layer.non_trainable_weights + self._non_trainable_weights + + @property + def updates(self): + """update layer """ + return self.layer.updates + self._updates + + @property + def losses(self): + """List of losses added using the `add_loss()` API. + Variable regularization tensors are created when this property is + accessed, so it is eager safe: accessing `losses` under a + `tf.GradientTape` will propagate gradients back to the corresponding + variables. + + Returns: + losses (list): A list of tensors. + """ + return self.layer.losses + self._losses + +class QuantizeWrapper(QuantizeWrapperBase): + """General QuantizeWrapper for quantizable layers. Weights and inputs will be quantized + according to the layer type and quantize config. + """ + + def __init__(self, layer, **kwargs): + """Create a quantize wrapper for a keras layer. + This wrapper provides options to quantize inputs and weights of the layer. + + Args: + layer (tf.keras.layers.Layer): The keras layer to be wrapped. + **kwargs: Additional keyword arguments to be passed. + """ + super().__init__(layer, **kwargs) + + self.kernel = 'kernel' + self.kernel_weights = None + self.channel_axis = kwargs.get("axis", -1) + if self._layer_class == 'DepthwiseConv2D': + self.kernel = 'depthwise_kernel' + self.channel_axis = 2 + if self._layer_class in layer_wise_config['multiple_inputs_layers']: + self.query_input_index() + + def build(self, input_shape): + """Creates the variables of the layer. + + Args: + input_shape (tf.TensorShape or list): shapes of input tensors + """ + super().build(input_shape) + + if self._layer_class in layer_wise_config['weighted_layers']: + self.kernel_weights = getattr(self.layer, self.kernel) + + weight_min, weight_max = self._init_min_max_variables( + name = self.kernel_weights.name.split(":")[0], + shape = self.kernel_weights.shape[self.channel_axis] + ) + + self.weight_range = {"min_var": weight_min, "max_var": weight_max} + self._trainable_weights.append(self.kernel_weights) + + num_input = 1 + if not isinstance(input_shape, tf.TensorShape): + num_input = len(input_shape) + if not self.index: + self.index = [i for i in range(num_input)] + + if num_input == 1: + inputs_min, inputs_max = self._init_min_max_variables( + name = self.layer.name + "_input{}".format(0), + shape = None + ) + self.inputs_range = {"min_var": inputs_min, "max_var": inputs_max} + else: + self.inputs_range = [] + for i in range(num_input): + self.inputs_range.append({}) + if i in self.index: + inputs_min, inputs_max = self._init_min_max_variables( + name = self.layer.name + "_input{}".format(i), + shape = None + ) + self.inputs_range[i] = {"min_var": inputs_min, "max_var": inputs_max} + + def call(self, inputs, training=None): + """This is where the quantize wrapper's logic lives. + + Args: + inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer. + + Returns: + outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer. + """ + if training is None: + training = tf.keras.backend.learning_phase() + + # Quantize all weights, and replace them in the underlying layer. + if self._layer_class in layer_wise_config['weighted_layers']: + weight_quantizer = FakeQuantize( + per_channel = True, + channel_axis = self.channel_axis, + ) + quantized_weight = weight_quantizer(self.kernel_weights, self.weight_range, training) + setattr(self.layer, self.kernel, quantized_weight) + + quantized_inputs = inputs + inputs_quantizer = FakeQuantize( + per_channel = False, + channel_axis = self.channel_axis, + ) + + if not isinstance(quantized_inputs, tf.Tensor): + for i in range(len(quantized_inputs)): + if i in self.index: + quantized_inputs[i] = inputs_quantizer(inputs[i], self.inputs_range[i], training) + else: + quantized_inputs = inputs_quantizer(inputs, self.inputs_range, training) + + args = tf_inspect.getfullargspec(self.layer.call).args + if "training" in args: + outputs = self.layer.call(quantized_inputs, training=training) + else: + outputs = self.layer.call(quantized_inputs) + + return outputs \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py index 3db96745ed6..0b1b712a627 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py @@ -58,12 +58,16 @@ def __init__(self, **kwargs): 'DequantizeConv2DSigmoidQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DBiasAddLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddLeakyReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DBiasAddReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DBiasAddReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddAddV2LeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddV2LeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DLeakyReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DAddReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DBiasAddAddRelu6MulMulQuantizeV2': self.apply_conv_biasadd_hardswish_fusion, @@ -1194,7 +1198,9 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): # Dequantize + Conv2D + BiasAdd + AddV2 + Relu6 + QuantizeV2 # Dequantize + Conv2D + BiasAdd + Add + Relu + QuantizeV2 # Dequantize + Conv2D + BiasAdd + LeakyRelu + AddV2 + QuantizeV2 + # Dequantize + Conv2D + BiasAdd + Relu + AddV2(Add) + QuantizeV2 # Dequantize + Conv2D + LeakyRelu + AddV2 + QuantizeV2 + # Dequantize + Conv2D + Relu + AddV2(Add) + QuantizeV2 # Dequantize + Conv2D + Add + Add + Relu + QuantizeV2 # Dequantize + Conv2D + BiasAdd + Add + Relu + QuantizeV2 skip_node_name = match_node_name[2:] @@ -1236,8 +1242,8 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) forth_node = self.node_name_mapping[match_node_name[4]].node - if forth_node.op != 'LeakyRelu': - if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node): + if forth_node.op not in ('LeakyRelu', 'Relu'): + if third_node.op not in ('LeakyRelu', 'Relu') and not self._find_relu_node(matched_node.node): return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1 @@ -1251,7 +1257,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): sum_node_name = self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if (deq_node.op != 'LeakyRelu' and deq_node.op != 'Dequantize') or \ + if (deq_node.op != 'LeakyRelu' and deq_node.op != 'Dequantize' and deq_node.op != 'BiasAdd') or \ deq_node.op.find("Quantize") != -1: return self.apply_newly_conv_biasadd_fusion(match_node_name[:3]+[match_node_name[-1]]) @@ -1350,7 +1356,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): self.add_output_graph_node(quantized_conv_node) - if is_leakyrelu_add_fusion or is_leakyrelu: + if is_leakyrelu_add_fusion or is_leakyrelu or is_relu_add_fusion: quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py index 1b95f743fc5..40183e427d2 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py @@ -963,12 +963,6 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): self.exclude_matmul_nodes.append(cur_node.name) continue - for i in self.node_name_mapping: - if weight_node.input and not weight_node.input[0].startswith('^') \ - and weight_node.name in self.node_name_mapping[i].output: - self.exclude_matmul_nodes.append(cur_node.name) - continue - for sub_rule in patterns: if sub_rule[0] != "Dequantize": self.exclude_matmul_nodes.append(cur_node.name) diff --git a/neural_compressor/adaptor/tf_utils/util.py b/neural_compressor/adaptor/tf_utils/util.py index f95ea4f2d80..750900c4ab8 100644 --- a/neural_compressor/adaptor/tf_utils/util.py +++ b/neural_compressor/adaptor/tf_utils/util.py @@ -16,6 +16,7 @@ # limitations under the License. # +from collections import OrderedDict, UserDict import os import numpy as np from google.protobuf import text_format @@ -29,6 +30,8 @@ from .graph_util import GraphRewriterHelper from pkg_resources import parse_version +TF_SPR_BASE_VERSIONS = ('2.11.0202242', '2.11.0202250') + def version1_lt_version2(version1, version2): return parse_version(version1) < parse_version(version2) @@ -493,3 +496,62 @@ def _parse_config(q_config, cfg, op_list): if op_name_and_type[0] in op_list: updated_cfg['op'][op_name_and_type] = cfg['op'][op_name_and_type] return dequan_min_max, updated_cfg + +def generate_feed_dict(input_tensor, inputs): + if len(input_tensor) == 1: + feed_dict = {} + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ + or isinstance(inputs, UserDict): + for name in inputs: + for tensor in input_tensor: + pos = tensor.name.rfind(":") + t_name = tensor.name if pos < 0 else tensor.name[:pos] + if name == t_name: + feed_dict[tensor] = inputs[name] + break + else: + feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] + else: + assert len(input_tensor) == len(inputs), \ + 'inputs len must equal with input_tensor' + feed_dict = {} + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ + or isinstance(inputs, UserDict): + for name in inputs: + for tensor in input_tensor: + pos = tensor.name.rfind(":") + t_name = tensor.name if pos < 0 else tensor.name[:pos] + if name in [tensor.name, t_name]: + feed_dict[tensor] = inputs[name] + break + else: + # sometimes the input_tensor is not the same order with inputs + # we should check and pair them + def check_shape(tensor, data): + # scalar or 1 dim default True + if tensor.shape == None or \ + len(tensor.shape.dims) == 1 or \ + not hasattr(data, 'shape'): + return True + tensor_shape = tuple(tensor.shape) + data_shape = tuple(data.shape) + for tensor_dim, data_dim in zip(tensor_shape, data_shape): + if tensor_dim is not None and tensor_dim != data_dim: + return False + return True + + disorder_tensors = [] + disorder_inputs = [] + for idx, sort_tensor in enumerate(input_tensor): + sort_input = inputs[idx] + if check_shape(sort_tensor, sort_input): + feed_dict.update({sort_tensor: sort_input}) + else: + disorder_tensors.append(sort_tensor) + disorder_inputs.append(sort_input) + for i, dis_tensor in enumerate(disorder_tensors): + for j, dis_input in enumerate(disorder_inputs): + if check_shape(dis_tensor, dis_input): + feed_dict.update({dis_tensor: dis_input}) + break + return feed_dict \ No newline at end of file diff --git a/neural_compressor/adaptor/torch_utils/bf16_convert.py b/neural_compressor/adaptor/torch_utils/bf16_convert.py index 93e1c311ae2..5c66929c3b9 100644 --- a/neural_compressor/adaptor/torch_utils/bf16_convert.py +++ b/neural_compressor/adaptor/torch_utils/bf16_convert.py @@ -36,7 +36,7 @@ def Convert(model, tune_cfg): fx_sub_module_list = tune_cfg['fx_sub_module_list'] \ if 'fx_sub_module_list' in tune_cfg.keys() else [] mixed_precision_model = bf16_wrapper_model(model, bf16_ops_list) - if len(fx_sub_module_list) > 0: + if fx_sub_module_list is not None and len(fx_sub_module_list) > 0: mixed_precision_model = bf16_symbolic_trace(mixed_precision_model, fx_sub_module_list) return mixed_precision_model diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py new file mode 100644 index 00000000000..f68a1234164 --- /dev/null +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -0,0 +1,582 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ...utils.utility import LazyImport +torch = LazyImport("torch") + +import copy +import numpy as np +from collections import OrderedDict +import torch.nn +from torch.quantization.quantize_fx import fuse_fx +import torch.nn.intrinsic.quantized as nniq +from torch.fx import symbolic_trace, graph_module +import torch.nn as nn +import logging +logger = logging.getLogger(__name__) +from typing import Dict, List, Optional, Any, Union, Callable, Set +# Define Collector based on hook, which is used to record the intermediate result +class Node_collector: + def __init__(self, m): + self.handle = m.register_forward_hook(self.hook_fn_act) + def hook_fn_act(self, m, inp, outp): + self.out_features = outp.clone() + self.in_features = inp + self.m = m + def remove(self): + self.handle.remove() +class HessianTrace: + """ + please refer to + Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." + 2020 IEEE international conference on big data (Big data). IEEE, 2020. + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." + Advances in neural information processing systems 33 (2020): 18518-18529. + https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py + """ + + def __init__(self, model, dataloader,q_model,criterion=None): + self.unfused_model = model.model + self.q_model=q_model + tmp_model=model.model + if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length + logger.info("This is aready fused model") + self.model=model.model + else: + logger.info("fusing model") + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused + self.dataloader = dataloader + self.max_iter = 500 + self.tolerance = 1e-5 + self.eps = 1e-6 + self.index = 0 + self.device = self.get_device(self.model) + self.criterion = criterion + if self.criterion == None: + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config + self.criterion = self.criterion.to(self.device) + self.weight_to_op, self.op_list = self.get_fused_mapping() + self.get_params() + + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def mapping_module_to_op(self, name): + # length = len("_model.") + # if len(name) < length: + # return name + # else: + return name + def mse_metric_gap(self,fp32_tensor, dequantize_tensor): + """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor + Args: + fp32_tensor (tensor): The FP32 tensor. + dequantize_tensor (tensor): The INT8 dequantize tensor. + """ + fp32_max = np.max(fp32_tensor) + fp32_min = np.min(fp32_tensor) + dequantize_max = np.max(dequantize_tensor) + dequantize_min = np.min(dequantize_tensor) + fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / \ + (dequantize_max - dequantize_min) + diff_tensor = fp32_tensor - dequantize_tensor + euclidean_dist = np.sum(diff_tensor ** 2) + return euclidean_dist / fp32_tensor.size + def get_fused_mapping(self): + model = self.model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + + weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) + break + else: + name = op_name + ".weight" + if name in weights_info and name not in weight_to_op.keys(): + weight_to_op[op_name + ".weight"] = op_name + op_list = [] + for key in weight_to_op.keys(): + op_list.append(weight_to_op[key]) + return weight_to_op, op_list + + def get_device(self, model: torch.nn.Module): + for n, p in model.named_parameters(): + return p.data.device + + def _get_act_grad_hook(self, name): + def act_grad_hook(model, grad_input, grad_output): + ##print(name, grad_input[0].shape, grad_output[0].shape) + if type(model) == torch.nn.Linear: ##TODO very tricky + self.layer_acts_grads[name] = grad_input[1] + else: + self.layer_acts_grads[name] = grad_input[0] + + return act_grad_hook + + def _get_enable_act_grad_hook(self, name): + def enable_act_grad_hook(model, inputs, outputs): + input = inputs[0] + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_act_grad_hook + + # def _get_disable_input_grad_hook(self, name): + # def disable_input_grad_hook(model, inputs, outputs): + # try: + # input = inputs[0] ##TODO check whether this is right + # except: + # input = inputs + # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables + # if input.requires_grad is True: + # input.requires_grad = False + # + # + # return disable_input_grad_hook + + def _unregister_hook(self): + for handel in self.hook_handles: + handel.remove() + + def register_act_grad_hooks(self, model): + for name, module in model.named_modules(): + if self.mapping_module_to_op(name) in self.op_list: + hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) + self.hook_handles.append(hook_handle) + hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) + self.hook_handles.append(hook_handle) + + def reset_act_gradient_and_hooks(self): + # tmp_input = torch.zeros(self._input_shape, device=self.device) + # for name, module in self.model.named_modules(): + # if name in self.op_list: + # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) + # self.hook_handles.append(hook_handle) + # self.model(tmp_input) + self._unregister_hook() + + def get_params(self): + weight_names = [n for n, p in self.model.named_parameters() if + p.requires_grad and "bias" not in n] ##remove bias + params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + self.weight_names = weight_names + self.params = params + + def forward_backward(self, model, data, create_graph=False, return_w_grad=True): + model.zero_grad() + input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation + target = data[1].to(self.device) + input.requires_grad = True + output = model(input) + loss = self.criterion(output, target) + torch.autograd.backward(loss, create_graph=create_graph) + ##loss.backward(create_graph=create_graph) + if return_w_grad: + gradients = [] + for n, p in self.model.named_parameters(): + if p.grad != None and n in self.weight_names: + gradient = p.grad + gradients.append(gradient + 0.0) ## add 0 to create a copy + model.zero_grad() + return gradients + else: + model.zero_grad() + + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def get_vtHv_weight(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] + cnt += batch_size + gradients = self.forward_backward(self.model, data, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + if cnt >= num_samples: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better + return v_t_H_v + + # def get_vtHv_act(self, params, num_samples): + # v = self.sample_rademacher(params) + # H_v = [0] * len(v) + # cnt = 0 + # for step, data in enumerate(self.dataloader): + # if cnt >= num_samples: + # break + # for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + # input = data[0][i:i + 1] + # target = data[1][i:i + 1] + + # self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + # layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + # layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + # hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, + # only_inputs=True, retain_graph=False) + # cnt += 1 + # if cnt >= num_samples: + # break + + def get_weight_traces(self, num_samples): + import tqdm + layer_traces_per_iter = [] + prev_avg_model_trace = 0 + for iter in tqdm.tqdm(range(self.max_iter)): + layer_traces = self.get_vtHv_weight(self.params, num_samples) + layer_traces_per_iter.append(layer_traces) + layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) + model_trace = torch.sum(layer_traces_estimate) + diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + # if iter == 20: ##TODO for debugging + # break + prev_avg_model_trace = model_trace + weight_name_to_traces = {} + layer_traces = layer_traces_estimate + for weight_name, trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weight_name] = float(trace)# tensor->float + op_name_to_trace = {} + for weight_name in self.weight_names: + op_name = self.weight_to_op[weight_name] + op_name_to_trace[op_name] = weight_name_to_traces[weight_name] + return op_name_to_trace + def get_act_traces(self, num_samples): + unfused_training = self.unfused_model.training + self.unfused_model.eval() + self.hook_handles = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_act_grad_hooks(self.unfused_model) + cnt = 0 + act_traces_per_sample = [] + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + bs = data[0].shape[0] + act_traces_sum = 0 + act_traces_per_iter = [] + prev_avg_model_trace = 0 + act_traces_sums = None + for i in range(bs): ##force the bs to be one + input = data[0][i:i + 1] + target = data[1][i:i + 1] + self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) + acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + if act_traces_sums == None: + act_traces_sums = [0] * len(acts) + acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts + vt_H_v_sum_per_act = [0] * len(acts) + + prev_model_act_trace = 0 + for iter in range(self.max_iter): + v = self.sample_rademacher(acts) + H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) + vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + + vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + enumerate(vt_H_v_sum_per_act)] + vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + + diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + prev_model_act_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + # if iter == 50: ##TODO for debug + # break + + prev_model_act_trace = current_model_act_trace + act_traces_per_sample.append(vt_H_v_mean_per_act) + cnt += 1 + if cnt >= num_samples: + break + + if unfused_training: + self.unfused_model.train() + self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) + act_traces = torch.mean(act_traces_stack, dim=0) + res_dict = {} + for index, key in enumerate(self.layer_acts.keys()): + res_dict[key] = act_traces[index] + + self.layer_acts = [] + self.layer_acts_grads = [] + return res_dict + def insert_hook(self, model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + # print("target_model:",target_module) + if layer == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def insert_hook_quantize(self,model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + length = len("_model.") + new_key = layer[length:] + # print("target_model:",target_module) + if new_key == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def get_act_gap(self,fp32_model,q_model): + """ + Estimates each activation gap between quantized model and float model + """ + self.handle_acts=[] + fp32_model.eval() + # temp_model = fuse_fx(fp32_model.model) + temp_model=fp32_model + # target_module_list = [nn.ReLU] # Insert hook for FP32 model + target_module_list = self.op_list + temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) + # intern_outputs={} + for input, target in self.dataloader: + temp_model(input) + break + + fp32_act_out={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + # print ("No.", i, " ", intern_output.out_features.shape) + # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) + # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) + # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) + fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + target_module_list = self.op_list + q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) + for input, target in self.dataloader: #only one sample + q_model(input) + break + qnt_act_out={} + intern_outputs={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + act_gap={} + mse_gap={} + for fp_i,int_i in zip(fp32_act_out,qnt_act_out): + activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] + mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) + act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size + return act_gap,mse_gap + def get_avg_traces(self, enable_act=True, num_samples=32): + """ + Estimates average hessian trace for each parameter + """ + assert num_samples > 0 + traces = {} + weight_traces = self.get_weight_traces(num_samples) + traces['weight'] = weight_traces + act_trace={} + if enable_act: + act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) + act_traces = self.get_act_traces(num_samples) + for i,j in zip(act_traces,mse_gap): + #currently use mse to analysis + act_trace[i]=float(act_traces[i])+float(mse_gap[j])# Tensor->float + traces['activation'] = act_traces + return traces + + +##copy from torch.quantization._numeric_suite +def _find_match( + str_list: Union[Dict[str, Any], List[str]], key_str: str, + postfix: str, +) -> Optional[str]: + split_str = key_str.split(".") + if split_str[-1] == postfix: + match_string = "".join(key_str.split(".")[0:-1]) + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + + # For matching "fc.weight" and "fc._packed_params._packed_params" + if postfix == "_packed_params": + match_string = "".join(key_str.split(".")[0:-2]) + if len(match_string) == 0: + return None + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + return None + else: + return None + + +##copy form torch.quantization._numeric_suite +def compare_weights( + float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] +) -> Dict[str, Dict[str, torch.Tensor]]: + r"""Compare the weights of the float module with its corresponding quantized + module. Return a dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights. This dict can be used to compare and compute the quantization + error of the weights of float and quantized models. + + Example usage:: + + wt_compare_dict = compare_weights( + float_model.state_dict(), qmodel.state_dict()) + for key in wt_compare_dict: + print( + key, + compute_error( + wt_compare_dict[key]['float'], + wt_compare_dict[key]['quantized'].dequantize() + ) + ) + + Args: + float_dict: state dict of the float model + quantized_dict: state dict of the quantized model + + Return: + weight_dict: dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights + """ + + weight_dict: Dict[str, Dict] = {} + for key in quantized_dict: + match_key = _find_match(float_dict, key, "weight") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key] + continue + + # For matching "fc.weight" and "fc._packed_params._packed_params" + match_key = _find_match(float_dict, key, "_packed_params") + if match_key is not None: + weight_dict[match_key] = {} + weight_dict[match_key]["float"] = float_dict[match_key] + weight_dict[match_key]["quantized"] = quantized_dict[key][0] + ##TODO:should consider more models in further work + + # For LSTM + split_str = key.split(".") + if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": + layer = split_str[-2] + module_name = ".".join(split_str[:-3]) + float_weight_ih_key = module_name + ".weight_ih_l" + layer + float_weight_hh_key = module_name + ".weight_hh_l" + layer + if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[float_weight_ih_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] + ) + weight_dict[key]["float"] = float_dict[float_weight_hh_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] + ) + + return weight_dict +def hawq_top(fp32_model,q_model,dataloader,criterion,enable_act): + orig_eval=True + if fp32_model.training: + orig_eval=False + fp32_model.eval() + ht=HessianTrace(fp32_model,dataloader=dataloader,q_model=q_model) + q_model_state_dict={} + for key in q_model.state_dict().keys(): + length=len("_model.") + new_key=key[length:] + q_model_state_dict[new_key]=q_model.state_dict()[key] + weight_quant_loss=compare_weights(ht.model.state_dict(),q_model_state_dict) + pertur_lst={} + for key in weight_quant_loss: + op_float_tensor=weight_quant_loss[key]['float'] + op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) + pertur_lst[key]=diff_l2 + traces=ht.get_avg_traces(enable_act) + op_to_traces=traces['weight'] + if enable_act: + act_to_traces=traces['activation'] + for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): + #Formula:Omig=Trace*L2+act_trace + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] + else: + for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 + if orig_eval==False: + fp32_model.train() + return op_to_traces + + \ No newline at end of file diff --git a/neural_compressor/adaptor/torch_utils/util.py b/neural_compressor/adaptor/torch_utils/util.py index ad5067f9034..71b15c02d36 100644 --- a/neural_compressor/adaptor/torch_utils/util.py +++ b/neural_compressor/adaptor/torch_utils/util.py @@ -14,13 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import copy import re import numpy as np from collections import UserDict +from ...utils import logger from ...utils.utility import LazyImport, CpuInfo +tqdm = LazyImport("tqdm") torch = LazyImport("torch") def get_embedding_contiguous(model): @@ -44,6 +45,127 @@ def contiguous_hook(module, input): child.register_forward_pre_hook(contiguous_hook) +def is_fused_module(module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + + Args: + module (object): input module + + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + +def _set_input_scale_hook(model, op_cfgs): + """Insert hooks to observer input scale and zeropoint. + + Args: + model (object): input model + op_cfgs (dict): dictionary of quantization configure for each op + + Returns: + hook_list (list): input observer hooks + """ + def input_scale_hook(module, input): + module.input_observer = module.qconfig.activation() + module.input_observer(input[0]) + return input + + def output_scale_hook(module, input, output): + module.output_observer = module.qconfig.activation() + module.output_observer(output) + return output + + def ConvReLU2d_scale_hook(module, input): + module.input_observer = module.qconfig.activation() + module.input_observer(input[0]) + output = module._conv_forward(input[0], module.weight_fake_quant(module.weight), module.bias) + module.output_observer = module.qconfig.activation() + module.output_observer(output) + return input + + def LinearReLU_scale_hook(module, input): + import torch.nn.functional as F + module.input_observer = module.qconfig.activation() + module.input_observer(input[0]) + output = F.linear(input[0], module.weight_fake_quant(module.weight), module.bias) + module.output_observer = module.qconfig.activation() + module.output_observer(output) + return input + + hook_list = [] + for name, module in model.named_modules(): + if 'Conv' in str(module.__class__.__name__) or \ + 'Linear' in str(module.__class__.__name__): + if not hasattr(module, 'qconfig') or not module.qconfig: + continue + from torch.nn.intrinsic.qat import ConvBn2d, ConvReLU2d, ConvBnReLU2d, LinearReLU + if type(module) in [ConvBn2d, ConvBnReLU2d]: + handle_in = module.register_forward_pre_hook(input_scale_hook) + # module[0] == torch.nn.BatchNorm2d + module[0].qconfig = module.qconfig + handle_out = module[0].register_forward_hook(output_scale_hook) + hook_list.extend([handle_in, handle_out]) + elif type(module) in [ConvReLU2d]: + handle_in_out = module.register_forward_pre_hook(ConvReLU2d_scale_hook) + hook_list.extend([handle_in_out]) + elif type(module) in [LinearReLU]: + handle_in_out = module.register_forward_pre_hook(LinearReLU_scale_hook) + hook_list.extend([handle_in_out]) + else: + if is_fused_module(module): + continue + handle_in = module.register_forward_pre_hook(input_scale_hook) + handle_out = module.register_forward_hook(output_scale_hook) + hook_list.extend([handle_in, handle_out]) + return hook_list + + +def _get_input_scale(model, hook_list): + """Fetch input scale and zeropoint from observer. + + Args: + model (object): input model + hook_list (list): input observer hooks + + Returns: + input_scale_info (dict): input scale and zero_point of each modules + """ + scale_info = {} + for name, module in model.named_modules(): + from torch.nn.intrinsic.qat import ConvBn2d, ConvBnReLU2d + if type(module) in [ConvBn2d, ConvBnReLU2d]: + if hasattr(module, "input_observer") and hasattr(module[0], "output_observer"): + scale_in, zero_point_in = module.input_observer.calculate_qparams() + scale_out, zero_point_out = module[0].output_observer.calculate_qparams() + scale_info[name] = { + 'input_scale': float(scale_in), + 'input_zeropoint': int(zero_point_in), + 'output_scale': float(scale_out), + 'output_zeropoint': int(zero_point_out) + } + del module.input_observer, module[0].output_observer + elif hasattr(module, "input_observer") and hasattr(module, "output_observer"): + scale_in, zero_point_in = module.input_observer.calculate_qparams() + scale_out, zero_point_out = module.output_observer.calculate_qparams() + scale_info[name] = { + 'input_scale': float(scale_in), + 'input_zeropoint': int(zero_point_in), + 'output_scale': float(scale_out), + 'output_zeropoint': int(zero_point_out) + } + del module.input_observer, module.output_observer + for h in hook_list: + h.remove() + return scale_info + + def collate_torch_preds(results): batch = results[0] if isinstance(batch, list): @@ -371,3 +493,260 @@ def unwrap_proxy(a): torch.nn.Sequential.forward = orig_nn_sequential_forward # type: ignore[assignment] new_module.__class__ = CopyDispatchModule return new_module + +def fetch_module(model, op_name): + module = model + name_list = op_name.split('.') + for name in name_list: + if hasattr(module, name): + module = getattr(module, name) + else: + module = module + return module + +def set_module(model, op_name, new_module): + module = model + name_list = op_name.split('.') + for name in name_list[:-1]: + if hasattr(module, name): + module = getattr(module, name) + else: + module = module + setattr(module, name_list[-1], new_module) + return module + +def simple_inference(model, input): + with torch.no_grad(): + if type(input) is dict: + output = model(**input) + elif type(input) is tuple or type(input) is list: + try: + output = model(*input) + except: + output = model(input) + else: + output = model(input) + return output + +def get_example_input(dataloader, i=1): + iter = 0 + try: + for example_inp, label in dataloader: + if iter == i: + break + else: + iter += 1 + except: + for example_inp in dataloader: + if iter == i: + break + else: + iter += 1 + return example_inp + + +def get_fallback_order(adaptor, fp32_model, dataloader, tune_cfg, + confidence_batches, fallback=False, requantize_cfgs=None): + fp32_model.eval() + order_dict = {} + for i in range(0, confidence_batches): + example_input = get_example_input(dataloader, i) + if fallback: + ordered_ops = get_mse_order_per_fp32(adaptor, fp32_model, example_input, tune_cfg) + for i, name in enumerate(ordered_ops): + order_dict[name] = order_dict.get(name, 0) + len(order_dict) - i + ordered_ops = sorted(order_dict, key=lambda k: order_dict[k], reverse=True) + else: + ordered_ops = get_mse_order_per_int8(adaptor, fp32_model, example_input, tune_cfg) + for i, name in enumerate(ordered_ops): + order_dict[name] = order_dict.get(name, 0) + len(order_dict) - i + return ordered_ops + +op_cfg_mapping = {} +def get_mse_order_per_fp32(adaptor, model, example_inp, tune_cfg): + """a helper method to check the mse influence to last module after QDQ(quant/dequant). + Args: + model(torch.fx.GraphModule/torch.nn.Module): A torch model. + dataloader(torch.utils.data.DataLoader): The calibration dataloader. + tune_cfg (dict): dictionary of quantization configuration. + Returns: + fallback_order (dict/list): The fallback order for strategy. + """ + + inner_output = None + def output_hook(self, input, output): + nonlocal inner_output + inner_output = output + return output + + op_type_dict = {} + for k, v in tune_cfg['op'].keys(): + op_type_dict[k] = v + + from ..pytorch import _cfg_to_qconfig, _cfgs_to_fx_cfgs, PyTorch_FXAdaptor + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) + # insert hook to get output tesnor from last module + last_module_name = list(op_cfgs.keys())[-1] + module = fetch_module(model, last_module_name) # get last module + module.register_forward_hook(output_hook) + # record fp32 model output tensor at first + output_fp32 = simple_inference(model, example_inp) + inner_output_fp32 = inner_output + + fx_op_cfgs = {} + fallback_order = {} + logger.info('Evaluate the sensitivity for each int8 operation') + for op_name, qconfig in tqdm(op_cfgs.items()): + global op_cfg_mapping + if op_name not in op_cfg_mapping: + op_cfg_mapping[op_name] = qconfig + tmp_model = copy.deepcopy(model) + if not qconfig: + continue + op_cfgs[op_name] = None + fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) + op_cfgs[op_name] = qconfig + from torch.quantization.quantize_fx import prepare_fx,convert_fx + # do quantization + if adaptor.sub_module_list is None: + tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + else: + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ + tmp_model, prefix='') + simple_inference(tmp_model, example_inp) + if adaptor.sub_module_list is None: + tmp_model = convert_fx(tmp_model) + else: + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ + tmp_model, prefix='') + + # insert hook to get output tesnor from last module + module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module + module.register_forward_hook(output_hook) + output_qdq = simple_inference(tmp_model, example_inp) + inner_output_int8 = inner_output.dequantize() if \ + inner_output.dtype == torch.quint8 else inner_output + mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() + fallback_order[(op_name, op_type_dict[op_name])] = mse_val + + ordered_ops = sorted(fallback_order.keys(), key=lambda key: fallback_order[key], \ + reverse=False) + min_mse, max_mse = fallback_order[ordered_ops[0]], fallback_order[ordered_ops[-1]] + + if min_mse < 0.8 * max_mse: + return ordered_ops + + + double_check_list = [] + for op_name in ordered_ops: + if min_mse <= fallback_order[op_name] <= (max_mse - min_mse) * 0.1 + min_mse: + double_check_list.append(op_name) + + check_num = min(len(ordered_ops)//10, 5) + double_check_list = ordered_ops[:check_num] + worst_op_name = ordered_ops[-1] + op_cfgs[worst_op_name[0]] = None # fallback worst module first + new_fallback_order = {} + + logger.info('Evaluate the sensitivity gradient for selected operations') + for op_name, op_type in tqdm(double_check_list): + tmp_model = copy.deepcopy(model) + qconfig = op_cfgs[op_name] + op_cfgs[op_name] = None + fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) + op_cfgs[op_name] = qconfig + from torch.quantization.quantize_fx import prepare_fx,convert_fx + # do quantization + if adaptor.sub_module_list is None: + tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + else: + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ + tmp_model, prefix='') + simple_inference(tmp_model, example_inp) + if adaptor.sub_module_list is None: + tmp_model = convert_fx(tmp_model) + else: + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ + tmp_model, prefix='') + + # insert hook to get output tesnor from last module + module = fetch_module(tmp_model, last_module_name) # get last module + module.register_forward_hook(output_hook) + output_qdq = simple_inference(tmp_model, example_inp) + inner_output_int8 = inner_output.dequantize() if \ + inner_output.dtype == torch.quint8 else inner_output + mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() + new_fallback_order[(op_name, op_type_dict[op_name])] = mse_val + + ordered_ops = sorted(new_fallback_order.keys(), key=lambda key: new_fallback_order[key], \ + reverse=False) + + return ordered_ops + +def get_mse_order_per_int8(adaptor, fp32_model, example_input, tune_cfg): + inner_output = None + def output_hook(self, input, output): + nonlocal inner_output + inner_output = output + return output + + op_type_dict = {} + for k, v in tune_cfg['op'].keys(): + op_type_dict[k] = v + + example_inp = example_input + + from ..pytorch import _cfg_to_qconfig + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) + module = fetch_module(fp32_model, list(op_cfgs.keys())[-1]) # get last module + # insert hook to get output tesnor from last module + module.register_forward_hook(output_hook) + # record fp32 model output tensor at first + output_fp32 = simple_inference(fp32_model, example_inp) + inner_output_fp32 = inner_output + + quant_list = [] + for k, v in tune_cfg['op'].items(): + if k[1] in ['LayerNorm', 'Dropout', 'InstanceNorm3d']: + continue + if v['weight']['dtype'] == 'fp32': + quant_list.append(k) + fallback_order = {} + logger.info('Evaluate the sensitivity for each fp32 operation') + for op_name, op_type in tqdm(quant_list): + if op_name in op_cfg_mapping: + tmp_model = copy.deepcopy(fp32_model) + from ..pytorch import _cfg_to_qconfig, _cfgs_to_fx_cfgs, PyTorch_FXAdaptor + op_cfgs[op_name] = op_cfg_mapping[op_name] + fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) + from torch.quantization.quantize_fx import prepare_fx,convert_fx + # do quantization + if adaptor.sub_module_list is None: + tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + else: + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ + tmp_model, prefix='') + simple_inference(tmp_model, example_inp) + if adaptor.sub_module_list is None: + tmp_model = convert_fx(tmp_model) + else: + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ + tmp_model, prefix='') + + + # record int8 model output tensor + module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module + module.register_forward_hook(output_hook) + output_qdq = simple_inference(tmp_model, example_inp) + inner_output_int8 = inner_output + if inner_output_fp32.dtype == torch.quint8: + inner_output_fp32 = inner_output_fp32.dequantize() + if inner_output_int8.dtype == torch.quint8: + inner_output_int8 = inner_output_int8.dequantize() + + mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() + fallback_order[(op_name, op_type_dict[op_name])] = mse_val + # re-insert fp32 module into model + ordered_ops = sorted(fallback_order.keys(), key=lambda key: fallback_order[key], \ + reverse=False) + return ordered_ops diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 87d425a846b..98696e39cbf 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -69,9 +69,18 @@ def postprocess(self, name, postprocess_cls, **kwargs): self.exp_benchmarker.postprocess = nc_postprocess -def fit( - model, config=None, b_dataloader=None, b_func=None -): +def fit(model, config=None, b_dataloader=None, b_func=None): + """Benchmark the model performance with the configure. + + Args: + model (object): The model to be benchmarked. + config (BenchmarkConfig): The configuration for benchmark containing accuracy goal, + tuning objective and preferred calibration & quantization + tuning space etc. + b_dataloader: The dataloader for frameworks. + b_func: customized benchmark function. if user passes the dataloader, + than b_func is not needed. + """ if isinstance(config, BenchmarkConfig): config = Config(benchmark=config) benchmarker = ExpBenchmark(config) @@ -82,6 +91,3 @@ def fit( benchmarker.b_dataloader = b_dataloader benchmarker() return benchmarker.results - - -benchmark = fit diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index aef8f695291..127eb5b6158 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -20,7 +20,7 @@ from ..adaptor import FRAMEWORKS from ..strategy import STRATEGIES from ..objective import OBJECTIVES -from ..pruners import PRUNERS +from ..pruner.pruner_legacy import PRUNERS from ..utils import logger from ..version import __version__ import re @@ -217,7 +217,7 @@ def percent_to_float(data): lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), Optional('dtype'): And( list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16'] for i in s)), Optional('algorithm'): And( list, lambda s: all(i in ['minmax'] for i in s)), @@ -235,7 +235,7 @@ def percent_to_float(data): lambda s: all(i in ['asym', 'sym'] for i in s)), Optional('dtype'): And( list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16'] for i in s)), # compute_dtypeis only for PyTorch framework Optional('compute_dtype', default=['uint8']): And( list, @@ -259,13 +259,13 @@ def percent_to_float(data): Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['fp32', 'bf16'] for i in s)), }, Optional('activation', default=None): { Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['fp32', 'bf16'] for i in s)), } } }) @@ -275,20 +275,20 @@ def percent_to_float(data): Optional('precisions', default={'precisions': ['fp32']}): And( Or(str, list), Use(input_to_list), - lambda s: all(i in [ 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in [ 'fp32', 'bf16'] for i in s)), Optional('op_wise', default={'weight': {}, 'activation': {}}): { Optional('weight', default=None): { Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['fp32', 'bf16'] for i in s)), }, Optional('activation', default=None): { Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['fp32', 'bf16'] for i in s)), } } }) @@ -730,6 +730,7 @@ def percent_to_float(data): 'framework': And(str, lambda s: s in list(FRAMEWORKS.keys()) + ['NA']), Optional('inputs', default=[]): And(Or(str, list), Use(input_to_list)), Optional('outputs', default=[]): And(Or(str, list), Use(input_to_list)), + }, Optional('version', default=float(__version__.split('.')[0])): And( Or(float, @@ -746,6 +747,7 @@ def percent_to_float(data): 'pre_post_process_quantization': True}, 'model_wise': {'weight': {'bit': [7.0]}, 'activation': {}}, + 'optimization_level': 1, }): { Optional('approach', default='post_training_static_quant'): And( str, @@ -797,7 +799,7 @@ def percent_to_float(data): Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16'] for i in s)), Optional('algorithm', default=None): And( Or(str, list), Use(input_to_list), @@ -820,7 +822,7 @@ def percent_to_float(data): Optional('dtype', default=None): And( Or(str, list), Use(input_to_list), - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16'] for i in s)), # compute_dtypeis only for PyTorch framework Optional('compute_dtype', default=['uint8']): And( Or(str, list), @@ -839,15 +841,17 @@ def percent_to_float(data): Optional('op_wise', default=None): { str: ops_schema }, + Optional('optimization_level', default=1): And(int, lambda level: level in [0, 1]), }, - Optional('use_bf16', default=False): bool, + Optional('use_bf16', default=True): bool, + Optional('optimization_level', default=1): And(int, lambda level: level in [0, 1]), Optional('graph_optimization'): graph_optimization_schema, Optional('mixed_precision'): mixed_precision_schema, Optional('model_conversion'): model_conversion_schema, Optional('tuning', default={ - 'strategy': {'name': 'basic'}, + 'strategy': {'name': 'basic'}, 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, 'objective': 'performance', 'exit_policy': {'timeout': 0, 'max_trials': 100, 'performance_only': False}, @@ -856,11 +860,14 @@ def percent_to_float(data): 'diagnosis': False, }): { Optional('strategy', default={'name': 'basic'}): { - 'name': And(str, lambda s: s in STRATEGIES), Optional('sigopt_api_token'): str, + 'name': And(str, lambda s: s in STRATEGIES), + Optional('sigopt_api_token'): str, Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, Optional('accuracy_weight', default=1.0): float, - Optional('latency_weight', default=1.0): float + Optional('latency_weight', default=1.0): float, + Optional('confidence_batches', default=2): int, + Optional('hawq_v2_loss', default=None): object, } , Hook('accuracy_criterion', handler=_valid_accuracy_field): object, Optional('accuracy_criterion', default={'relative': 0.01}): { @@ -1084,6 +1091,7 @@ def percent_to_float(data): Optional("num_evals", default=100000): int, Optional("results_csv_path", default=None): str, Optional("dataset_path", default=None): str, + Optional("supernet_ckpt_path", default=None): str, Optional("batch_size", default=64): int, }, }, @@ -1111,6 +1119,7 @@ def percent_to_float(data): 'activation': {}}, }): dict, Optional('use_bf16', default=False): bool, + Optional('optimization_level', default=1): int, Optional('tuning', default={ 'strategy': {'name': 'basic'}, 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, @@ -1328,7 +1337,8 @@ def map_pyconfig_to_cfg(self, pythonic_config): 'device': pythonic_config.quantization.device, 'model.inputs': pythonic_config.quantization.inputs, 'model.outputs': pythonic_config.quantization.outputs, - 'model.framework': pythonic_config.quantization.backend, + 'model.backend': pythonic_config.quantization.backend, + 'model.quant_format': pythonic_config.quantization.quant_format, 'quantization.approach': pythonic_config.quantization.approach, 'quantization.calibration.sampling_size': pythonic_config.quantization.calibration_sampling_size, @@ -1346,8 +1356,18 @@ def map_pyconfig_to_cfg(self, pythonic_config): 'tuning.exit_policy.max_trials': pythonic_config.quantization.max_trials, 'tuning.exit_policy.performance_only': pythonic_config.quantization.performance_only, 'use_bf16': pythonic_config.quantization.use_bf16, + 'quantization.optimization_level': pythonic_config.quantization.optimization_level, 'reduce_range': pythonic_config.quantization.reduce_range }) + if pythonic_config.quantization.strategy_kwargs: + st_kwargs = pythonic_config.quantization.strategy_kwargs + for st_key in ['sigopt_api_token', 'sigopt_project_id', 'sigopt_experiment_name', \ + 'accuracy_weight', 'latency_weight', 'hawq_v2_loss']: + + if st_key in st_kwargs: + st_val = st_kwargs[st_key] + mapping.update({'tuning.strategy.' + st_key: st_val}) + if pythonic_config.distillation is not None: mapping.update({ 'distillation.train.criterion': pythonic_config.distillation.criterion, @@ -1371,7 +1391,12 @@ def map_pyconfig_to_cfg(self, pythonic_config): 'tuning.tensorboard': pythonic_config.options.tensorboard, }) if pythonic_config.benchmark is not None: + if pythonic_config.benchmark.inputs != []: + mapping.update({'model.inputs': pythonic_config.benchmark.inputs}) + if pythonic_config.benchmark.outputs != []: + mapping.update({'model.outputs': pythonic_config.benchmark.outputs}) mapping.update({ + 'model.backend': pythonic_config.benchmark.backend, 'evaluation.performance.warmup': pythonic_config.benchmark.warmup, 'evaluation.performance.iteration': pythonic_config.benchmark.iteration, 'evaluation.performance.configs.cores_per_instance': diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 49eaa1738e7..2124579b406 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -27,23 +27,42 @@ class QuantizationConfig(_BaseQuantizationConfig): def __init__(self, inputs=[], outputs=[], - backend='NA', + backend='default', device='cpu', approach='post_training_static_quant', calibration_sampling_size=[100], op_type_list=None, op_name_list=None, strategy='basic', + strategy_kwargs=None, objective='performance', timeout=0, max_trials=100, performance_only=False, reduce_range=None, - use_bf16=False, + use_bf16=True, + optimization_level=1, accuracy_criterion=accuracy_criterion): - super().__init__(inputs, outputs, backend, device, calibration_sampling_size, op_type_list, - op_name_list, strategy, objective, timeout, max_trials, performance_only, - reduce_range, use_bf16, accuracy_criterion) + excluded_precisions = ["bf16"] if not use_bf16 else [] + super().__init__( + inputs=inputs, + outputs=outputs, + backend=backend, + device=device, + calibration_sampling_size=calibration_sampling_size, + op_type_list=op_type_list, + op_name_list=op_name_list, + strategy=strategy, + strategy_kwargs=strategy_kwargs, + objective=objective, + timeout=timeout, + max_trials=max_trials, + performance_only=performance_only, + reduce_range=reduce_range, + excluded_precisions=excluded_precisions, + accuracy_criterion=accuracy_criterion, + optimization_level=optimization_level + ) self._approach = approach @property @@ -144,7 +163,7 @@ def precisions(self): def precisions(self, precisions): if not isinstance(precisions, list): precisions = [precisions] - if check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']): + if check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16']): self._precisions = precisions class ONNX(MXNet): @@ -166,6 +185,10 @@ class TensorFlow(MXNet): def __init__(self, precisions=None): super().__init__(precisions) +class Keras(MXNet): + def __init__(self, precisions=None): + super().__init__(precisions) + class PyTorch(MXNet): def __init__(self, precisions=None): super().__init__(precisions) @@ -224,6 +247,7 @@ def search(self, search): nas = NASConfig() onnxruntime_config = ONNX() tensorflow_config = TensorFlow() +keras_config = Keras() pytorch_config = PyTorch() mxnet_config = MXNet() @@ -239,7 +263,8 @@ def __init__(self, onnxruntime=onnxruntime_config, tensorflow=tensorflow_config, pytorch=pytorch_config, - mxnet=mxnet_config): + mxnet=mxnet_config, + keras=keras_config): self._quantization = quantization self._benchmark = benchmark self._options = options @@ -250,6 +275,7 @@ def __init__(self, self._tensorflow = tensorflow self._pytorch = pytorch self._mxnet = mxnet + self._keras = keras @property def distillation(self): @@ -263,6 +289,10 @@ def nas(self): def tensorflow(self): return self._tensorflow + @property + def keras(self): + return self._keras + @property def pytorch(self): return self._pytorch diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 371ba422963..51225f95206 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -17,7 +17,6 @@ import datetime import logging -from typing import List from schema import Schema, And, Optional from .conf.dotdict import DotDict from .conf.config import Pruner @@ -44,7 +43,7 @@ lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), Optional('dtype'): And( list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16'] for i in s)), Optional('algorithm'): And( list, lambda s: all(i in ['minmax'] for i in s))}, @@ -57,7 +56,7 @@ lambda s: all(i in ['asym', 'sym'] for i in s)), Optional('dtype'): And( list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16', 'None'] for i in s)), + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'None'] for i in s)), Optional('algorithm'): And( list, lambda s: all(i in ['minmax', 'kl', 'placeholder'] for i in s))}}) @@ -65,24 +64,20 @@ def check_value(name, src, supported_type, supported_value=[]): if isinstance(src, list) and any([not isinstance(i, supported_type) for i in src]): - logger.warning("Type of {} items should be {} but not {}, " \ - "use its default value.".format(name, str(supported_type), [type(i) for i in src])) - return False + assert False, ("Type of {} items should be {} but not {}".format( + name, str(supported_type), [type(i) for i in src])) elif not isinstance(src, list) and not isinstance(src, supported_type): - logger.warning("Type of {} should be {} but not {}, " \ - "use its default value.".format(name, str(supported_type), type(src))) - return False + assert False, ("Type of {} should be {} but not {}".format( + name, str(supported_type), type(src))) if len(supported_value) > 0: if isinstance(src, str) and src not in supported_value: - logger.warning("{} is not in supported {}: {}. Skip setting it and" \ - " use default value.".format(src, name, str(supported_value))) - return False + assert False, ("{} is not in supported {}: {}. Skip setting it.".format( + src, name, str(supported_value))) elif isinstance(src, list) and all([isinstance(i, str) for i in src]) and \ any([i not in supported_value for i in src]): - logger.warning("{} is not in supported {}: {}. Skip setting it and" \ - " use default value.".format(src, name, str(supported_value))) - return False + assert False, ("{} is not in supported {}: {}. Skip setting it.".format( + src, name, str(supported_value))) return True @@ -90,10 +85,10 @@ def check_value(name, src, supported_type, supported_value=[]): class Options: def __init__(self, random_seed=1978, workspace=default_workspace, resume_from=None, tensorboard=False): - self._random_seed = random_seed - self._workspace = workspace - self._resume_from = resume_from - self._tensorboard = tensorboard + self.random_seed = random_seed + self.workspace = workspace + self.resume_from = resume_from + self.tensorboard = tensorboard @property def random_seed(self): @@ -119,7 +114,7 @@ def resume_from(self): @resume_from.setter def resume_from(self, resume_from): - if check_value('resume_from', resume_from, str): + if resume_from is None or check_value('resume_from', resume_from, str): self._resume_from = resume_from @property @@ -136,14 +131,53 @@ def tensorboard(self, tensorboard): class BenchmarkConfig: - def __init__(self, warmup=5, iteration=-1, cores_per_instance=None, num_of_instance=None, - inter_num_of_threads=None, intra_num_of_threads=None): - self._warmup = warmup - self._iteration = iteration - self._cores_per_instance = cores_per_instance - self._num_of_instance = num_of_instance - self._inter_num_of_threads = inter_num_of_threads - self._intra_num_of_threads = intra_num_of_threads + def __init__(self, + inputs=[], + outputs=[], + backend='default', + warmup=5, + iteration=-1, + cores_per_instance=None, + num_of_instance=None, + inter_num_of_threads=None, + intra_num_of_threads=None): + self.inputs = inputs + self.outputs = outputs + self.backend = backend + self.warmup = warmup + self.iteration = iteration + self.cores_per_instance = cores_per_instance + self.num_of_instance = num_of_instance + self.inter_num_of_threads = inter_num_of_threads + self.intra_num_of_threads = intra_num_of_threads + + @property + def backend(self): + return self._backend + + @backend.setter + def backend(self, backend): + if check_value('backend', backend, str, [ + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): + self._backend = backend + + @property + def outputs(self): + return self._outputs + + @outputs.setter + def outputs(self, outputs): + if check_value('outputs', outputs, str): + self._outputs = outputs + + @property + def inputs(self): + return self._inputs + + @inputs.setter + def inputs(self, inputs): + if check_value('inputs', inputs, str): + self._inputs = inputs @property def warmup(self): @@ -169,7 +203,8 @@ def cores_per_instance(self): @cores_per_instance.setter def cores_per_instance(self, cores_per_instance): - if check_value('cores_per_instance', cores_per_instance, int): + if cores_per_instance is None or check_value('cores_per_instance', cores_per_instance, + int): self._cores_per_instance = cores_per_instance @property @@ -178,7 +213,7 @@ def num_of_instance(self): @num_of_instance.setter def num_of_instance(self, num_of_instance): - if check_value('num_of_instance', num_of_instance, int): + if num_of_instance is None or check_value('num_of_instance', num_of_instance, int): self._num_of_instance = num_of_instance @property @@ -187,7 +222,8 @@ def inter_num_of_threads(self): @inter_num_of_threads.setter def inter_num_of_threads(self, inter_num_of_threads): - if check_value('inter_num_of_threads', inter_num_of_threads, int): + if inter_num_of_threads is None or check_value('inter_num_of_threads', + inter_num_of_threads, int): self._inter_num_of_threads = inter_num_of_threads @property @@ -196,41 +232,16 @@ def intra_num_of_threads(self): @intra_num_of_threads.setter def intra_num_of_threads(self, intra_num_of_threads): - if check_value('intra_num_of_threads', intra_num_of_threads, int): + if intra_num_of_threads is None or check_value('intra_num_of_threads', + intra_num_of_threads, int): self._intra_num_of_threads = intra_num_of_threads -class AccuracyLoss: - def __init__(self, loss=0.01): - self._loss = loss - - @property - def relative(self): - return self._loss - - @relative.setter - def relative(self, relative): - if check_value('relative tolerable loss', relative, float): - self._loss = relative - - @property - def absolute(self): - return self._loss - - @absolute.setter - def absolute(self, absolute): - if check_value('absolute tolerable loss', absolute, float): - self._loss = absolute - - -tolerable_loss = AccuracyLoss() - - class AccuracyCriterion: - def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=tolerable_loss): - self._higher_is_better = higher_is_better - self._criterion = criterion - self._tolerable_loss = tolerable_loss + def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=0.01): + self.higher_is_better = higher_is_better + self.criterion = criterion + self.tolerable_loss = tolerable_loss @property def higher_is_better(self): @@ -243,29 +254,47 @@ def higher_is_better(self, higher_is_better): @property def relative(self): - if self._criterion != 'relative': + if self.criterion != 'relative': return None - return self._tolerable_loss.relative + return self.tolerable_loss @relative.setter def relative(self, relative): - self._criterion = 'relative' - self._tolerable_loss.relative = relative + self.criterion = 'relative' + self.tolerable_loss = relative @property def absolute(self): - if self._criterion != 'absolute': + if self.criterion != 'absolute': return None - return self._tolerable_loss.absolute + return self.tolerable_loss @absolute.setter def absolute(self, absolute): - self._criterion = 'absolute' - self._tolerable_loss.absolute = absolute + self.criterion = 'absolute' + self.tolerable_loss = absolute - def __str__(self): + @property + def criterion(self): return self._criterion + @criterion.setter + def criterion(self, criterion): + if check_value('criterion', criterion, str, ['relative', 'absolute']): + self._criterion = criterion + + @property + def tolerable_loss(self): + return self._tolerable_loss + + @tolerable_loss.setter + def tolerable_loss(self, tolerable_loss): + if check_value('tolerable_loss', tolerable_loss, float): + self._tolerable_loss = tolerable_loss + + def __str__(self): + return self.criterion + accuracy_criterion = AccuracyCriterion() @@ -274,50 +303,68 @@ class _BaseQuantizationConfig: def __init__(self, inputs=[], outputs=[], - backend="NA", + backend="default", + quant_format="default", device="cpu", calibration_sampling_size=[100], op_type_list=None, op_name_list=None, strategy="basic", + strategy_kwargs=None, objective="performance", timeout=0, max_trials=100, performance_only=False, reduce_range=None, - extra_precisions=[], + excluded_precisions=[], + optimization_level=1, accuracy_criterion=accuracy_criterion): - self._inputs = inputs - self._outputs = outputs - self._backend = backend - self._device = device - self._op_type_list = op_type_list - self._op_name_list = op_name_list - self._strategy = strategy - self._objective = objective - self._timeout = timeout - self._max_trials = max_trials - self._performance_only = performance_only - self._reduce_range = reduce_range - self._extra_precisions = extra_precisions \ - if isinstance(extra_precisions, List) else [extra_precisions] - self.use_bf16 = "bf16" in self._extra_precisions - self._accuracy_criterion = accuracy_criterion - self._calibration_sampling_size = calibration_sampling_size + self.inputs = inputs + self.outputs = outputs + self.backend = backend + self.quant_format = quant_format + self.device = device + self.op_type_list = op_type_list + self.op_name_list = op_name_list + self.strategy = strategy + self.strategy_kwargs = strategy_kwargs + self.objective = objective + self.timeout = timeout + self.max_trials = max_trials + self.performance_only = performance_only + self.reduce_range = reduce_range + self.excluded_precisions = excluded_precisions + self.use_bf16 = "bf16" not in self.excluded_precisions + self.accuracy_criterion = accuracy_criterion + self.calibration_sampling_size = calibration_sampling_size + self.optimization_level = optimization_level @property def accuracy_criterion(self): return self._accuracy_criterion + @accuracy_criterion.setter + def accuracy_criterion(self, accuracy_criterion): + if check_value("accuracy_criterion", accuracy_criterion, AccuracyCriterion): + self._accuracy_criterion = accuracy_criterion + @property - def extra_precisions(self): - return self._extra_precisions + def excluded_precisions(self): + return self._excluded_precisions - @extra_precisions.setter - def extra_precisions(self, extra_precisions): - if check_value('extra_precisions', extra_precisions, List): - self._extra_precisions = extra_precisions - self._use_bf16 = "bf16" in extra_precisions + @excluded_precisions.setter + def excluded_precisions(self, excluded_precisions): + if check_value("excluded_precisions", excluded_precisions, str, ["bf16"]): + self._excluded_precisions = excluded_precisions + self._use_bf16 = "bf16" not in excluded_precisions + + @property + def optimization_level(self): + return self._optimization_level + + @optimization_level.setter + def optimization_level(self, optimization_level): + self._optimization_level = optimization_level @property def reduce_range(self): @@ -325,7 +372,7 @@ def reduce_range(self): @reduce_range.setter def reduce_range(self, reduce_range): - if check_value('reduce_range', reduce_range, bool): + if reduce_range is None or check_value('reduce_range', reduce_range, bool): self._reduce_range = reduce_range @property @@ -372,22 +419,32 @@ def strategy(self): @strategy.setter def strategy(self, strategy): if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): self._strategy = strategy + @property + def strategy_kwargs(self): + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + self._strategy_kwargs = strategy_kwargs + @property def op_name_list(self): return self._op_name_list @op_name_list.setter def op_name_list(self, op_name_list): - if not isinstance(op_name_list, dict): - logger.warning("Type of op_name_list should be dict but not {}, " \ - "use its default value.".format(type(op_name_list))) - else: + if op_name_list is None: + self._op_name_list = op_name_list + elif isinstance(op_name_list, dict): for k, v in op_name_list.items(): ops_schema.validate(v) self._op_name_list = op_name_list + else: + assert False, ("Type of op_name_list should be dict but not {}, ".format( + type(op_name_list))) @property def op_type_list(self): @@ -395,13 +452,15 @@ def op_type_list(self): @op_type_list.setter def op_type_list(self, op_type_list): - if not isinstance(op_type_list, dict): - logger.warning("Type of op_type_list should be dict but not {}, " \ - "use its default value.".format(type(op_type_list))) - else: + if op_type_list is None: + self._op_type_list = op_type_list + elif isinstance(op_type_list, dict): for k, v in op_type_list.items(): ops_schema.validate(v) self._op_type_list = op_type_list + else: + assert False, ("Type of op_type_list should be dict but not {}".format( + type(op_type_list))) @property def calibration_sampling_size(self): @@ -410,6 +469,8 @@ def calibration_sampling_size(self): @calibration_sampling_size.setter def calibration_sampling_size(self, sampling_size): if check_value('calibration_sampling_size', sampling_size, int): + if isinstance(sampling_size, int): + sampling_size =[sampling_size] self._calibration_sampling_size = sampling_size @property @@ -421,6 +482,16 @@ def device(self, device): if check_value('device', device, str, ['cpu', 'gpu']): self._device = device + @property + def quant_format(self): + return self._quant_format + + @quant_format.setter + def quant_format(self, quant_format): + if check_value('quant_format', quant_format, str, + ['default', 'QDQ', 'QOperator']): + self._quant_format = quant_format + @property def backend(self): return self._backend @@ -428,9 +499,7 @@ def backend(self): @backend.setter def backend(self, backend): if check_value('backend', backend, str, [ - 'tensorflow', 'tensorflow_itex', 'pytorch', 'pytorch_ipex', 'pytorch_fx', - 'onnxrt_qlinearops', 'onnxrt_integerops', 'onnxrt_qdq', 'onnxrt_qoperator', 'mxnet' - ]): + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): self._backend = backend @property @@ -453,11 +522,12 @@ def inputs(self, inputs): class TuningCriterion: - def __init__(self, strategy="basic", timeout=0, max_trials=100, objective="performance"): - self._strategy = strategy - self._timeout = timeout - self._max_trials = max_trials - self._objective = objective + def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): + self.strategy = strategy + self.timeout = timeout + self.max_trials = max_trials + self.objective = objective + self.strategy_kwargs = strategy_kwargs @property def max_trials(self): @@ -494,41 +564,54 @@ def strategy(self): @strategy.setter def strategy(self, strategy): if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): self._strategy = strategy + @property + def strategy_kwargs(self): + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + self._strategy_kwargs = strategy_kwargs tuning_criterion = TuningCriterion() class PostTrainingQuantConfig(_BaseQuantizationConfig): def __init__(self, - device='cpu', - backend="NA", + device="cpu", + backend="default", + quant_format="default", inputs=[], outputs=[], - approach='auto', + approach="static", calibration_sampling_size=[100], op_type_list=None, op_name_list=None, reduce_range=None, - extra_precisions = [], + excluded_precisions=[], + optimization_level=1, tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, ): + self.tuning_criterion = tuning_criterion super().__init__(inputs=inputs, outputs=outputs, device=device, backend=backend, + quant_format=quant_format, calibration_sampling_size=calibration_sampling_size, op_type_list=op_type_list, op_name_list=op_name_list, strategy=tuning_criterion.strategy, + strategy_kwargs=tuning_criterion.strategy_kwargs, objective=tuning_criterion.objective, timeout=tuning_criterion.timeout, max_trials=tuning_criterion.max_trials, reduce_range=reduce_range, - extra_precisions=extra_precisions, + excluded_precisions=excluded_precisions, + optimization_level=optimization_level, accuracy_criterion=accuracy_criterion) self.approach = approach @@ -541,20 +624,36 @@ def approach(self, approach): if check_value("approach", approach, str, ["static", "dynamic", "auto"]): self._approach = QUANTMAPPING[approach] + @property + def tuning_criterion(self): + return self._tuning_criterion + + @tuning_criterion.setter + def tuning_criterion(self, tuning_criterion): + if check_value("tuning_criterion", tuning_criterion, TuningCriterion): + self._tuning_criterion = tuning_criterion + class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): def __init__(self, device="cpu", - backend="NA", + backend="default", inputs=[], outputs=[], op_type_list=None, op_name_list=None, reduce_range=None, - extra_precisions=[]): - super().__init__(inputs=inputs, outputs=outputs, device=device, backend=backend, - op_type_list=op_type_list, op_name_list=op_name_list, - reduce_range=reduce_range, extra_precisions=extra_precisions) + excluded_precisions=[], + optimization_level=1): + super().__init__(inputs=inputs, + outputs=outputs, + device=device, + backend=backend, + op_type_list=op_type_list, + op_name_list=op_name_list, + reduce_range=reduce_range, + excluded_precisions=excluded_precisions, + optimization_level=optimization_level) self._approach = 'quant_aware_training' @property @@ -572,7 +671,7 @@ def __init__(self, pruners=pruners, initial_sparsity=0.0, target_sparsity=0.97, update_frequency_on_step=1, not_to_prune_names=[], prune_domain="global", names=[], exclude_names=[], prune_layer_type=[], sparsity_decay_type="exp", pattern="tile_pattern_1x1"): - self._weight_compression = DotDict({ + self.weight_compression = DotDict({ 'initial_sparsity': initial_sparsity, 'target_sparsity': target_sparsity, 'max_sparsity_ratio_per_layer': max_sparsity_ratio_per_layer, @@ -659,14 +758,14 @@ class DistillationConfig: """ def __init__(self, - teacher_model, + teacher_model=None, criterion=criterion, optimizer={'SGD': { 'learning_rate': 0.0001 }}): - self._criterion = criterion.config - self._optimizer = optimizer - self._teacher_model = teacher_model + self.criterion = criterion.config + self.optimizer = optimizer + self.teacher_model = teacher_model @property def criterion(self): @@ -696,19 +795,19 @@ def teacher_model(self, teacher_model): class MixedPrecisionConfig(PostTrainingQuantConfig): def __init__(self, device="cpu", - backend="NA", + backend="default", inputs=[], outputs=[], tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, - extra_precisions=["bf16"]): + excluded_precisions=[]): super().__init__(inputs=inputs, outputs=outputs, device=device, backend=backend, tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, - extra_precisions=extra_precisions, + excluded_precisions=excluded_precisions, ) @@ -723,13 +822,13 @@ def __init__( output_names=None, dynamic_axes=None, ): - self._dtype = dtype - self._opset_version = opset_version - self._quant_format = quant_format - self._example_inputs = example_inputs - self._input_names = input_names - self._output_names = output_names - self._dynamic_axes = dynamic_axes + self.dtype = dtype + self.opset_version = opset_version + self.quant_format = quant_format + self.example_inputs = example_inputs + self.input_names = input_names + self.output_names = output_names + self.dynamic_axes = dynamic_axes @property def dtype(self): @@ -787,18 +886,22 @@ def dynamic_axes(self): def dynamic_axes(self, dynamic_axes): self._dynamic_axes = dynamic_axes +class ONNXQlinear2QDQConfig: + def __init__(self): + pass class Torch2ONNXConfig(ExportConfig): - def __init__( - self, - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=None, - input_names=None, - output_names=None, - dynamic_axes=None, - **kwargs, + def __init__( + self, + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + recipe='QDQ_OP_FP32_BIAS', + **kwargs, ): super().__init__( dtype=dtype, @@ -809,20 +912,21 @@ def __init__( output_names=output_names, dynamic_axes=dynamic_axes, ) + self.recipe = recipe self.kwargs = kwargs class TF2ONNXConfig(ExportConfig): - def __init__( - self, - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=None, - input_names=None, - output_names=None, - dynamic_axes=None, - **kwargs, + def __init__( + self, + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, ): super().__init__( dtype=dtype, @@ -834,19 +938,3 @@ def __init__( dynamic_axes=dynamic_axes, ) self.kwargs = kwargs - - -def set_random_seed(seed: int): - options.random_seed - - -def set_workspace(workspace: str): - options.workspace = workspace - - -def set_resume_from(resume_from: str): - options.resume_from = resume_from - - -def set_tensorboard(tensorboard: bool): - options.tensorboard = tensorboard diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 19b3ae1ed3e..8a1e7a34164 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -20,8 +20,8 @@ from neural_compressor.utils.utility import LazyImport from neural_compressor.strategy.strategy import strategy_registry, TuneStrategy from collections import OrderedDict -from neural_compressor.strategy.st_utils.tuning_sampler import OpWiseTuningSampler -from neural_compressor.strategy.st_utils.tuning_structs import OpTuningConfig +from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler +from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig sigopt = LazyImport('sigopt') @@ -88,7 +88,13 @@ def __init__(self, model, conf, q_dataloader, q_func=None, try: import sigopt except ImportError: - ImportError(f"Please install sigopt for using {strategy_name} strategy.") + try: + import subprocess + import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) + import sigopt # pylint: disable=import-error + except: + assert False, "Unable to import sigopt from the local environment." else: pass # SigOpt init @@ -215,7 +221,7 @@ def create_exp(self, acc_target): calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 39362f1749b..730a9f9fef0 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -24,8 +24,8 @@ from neural_compressor.utils.utility import LazyImport from neural_compressor.strategy.strategy import strategy_registry, TuneStrategy from collections import OrderedDict -from neural_compressor.strategy.st_utils.tuning_sampler import OpWiseTuningSampler -from neural_compressor.strategy.st_utils.tuning_structs import OpTuningConfig +from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler +from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig hyperopt = LazyImport('hyperopt') @@ -91,7 +91,13 @@ def __init__(self, model, conf, q_dataloader, q_func=None, try: import hyperopt except ImportError: - raise ImportError(f"Please install hyperopt for using {strategy_name} strategy.") + try: + import subprocess + import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "hyperopt"]) + import hyperopt # pylint: disable=import-error + except: + assert False, "Unable to import hyperopt from the local environment." else: pass self.hpopt_search_space = None @@ -190,7 +196,7 @@ def traverse(self): calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) diff --git a/neural_compressor/data/__init__.py b/neural_compressor/data/__init__.py index 2883a446e7b..6729875aa67 100644 --- a/neural_compressor/data/__init__.py +++ b/neural_compressor/data/__init__.py @@ -14,26 +14,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# ============================================================================== +"""Built-in dataloaders, datasets, transforms, filters for multiple framework backends.""" -from .dataloaders import DataLoader import neural_compressor.data.datasets import neural_compressor.data.transforms -from ..experimental.data.datasets import DATASETS, Dataset, IterableDataset, dataset_registry -from ..experimental.data.transforms import TRANSFORMS, BaseTransform, transform_registry -from ..experimental.data.dataloaders import DATALOADERS -from ..experimental.data.filters import FILTERS, Filter, filter_registry +from .datasets import Datasets, Dataset, IterableDataset, dataset_registry +from .dataloaders import DATALOADERS, DataLoader +from .transforms import TRANSFORMS, BaseTransform, transform_registry, Postprocess + +from .filters import FILTERS, Filter, filter_registry __all__ = [ "DataLoader", "DATALOADERS", - "DATASETS", + "Datasets", "Dataset", "IterableDataset", "dataset_registry", "TRANSFORMS", "BaseTransform", "transform_registry", + "Postprocess", "FILTERS", "Filter", "filter_registry",] diff --git a/neural_compressor/data/dataloaders/__init__.py b/neural_compressor/data/dataloaders/__init__.py index 560070e31e6..b2568bad678 100644 --- a/neural_compressor/data/dataloaders/__init__.py +++ b/neural_compressor/data/dataloaders/__init__.py @@ -14,9 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# ============================================================================== -from .dataloader import DataLoader +from .dataloader import DataLoader, DATALOADERS __all__ = [ "DataLoader", -] + "DATALOADERS" +] \ No newline at end of file diff --git a/neural_compressor/data/dataloaders/base_dataloader.py b/neural_compressor/data/dataloaders/base_dataloader.py new file mode 100644 index 00000000000..9349760239e --- /dev/null +++ b/neural_compressor/data/dataloaders/base_dataloader.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""BaseDataloder of all dataloaders.""" + +from abc import abstractmethod + + +class BaseDataLoader: # pragma: no cover + """Base class for all DataLoaders. + + _generate_dataloader is needed to create a dataloader object + from the general params like batch_size and sampler. The dynamic batching is just to + generate a new dataloader by setting batch_size and last_batch. + + """ + + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False, distributed=False): + """Initialize BaseDataLoader. + + Args: + dataset (object): dataset from which to load the data + batch_size (int, optional): number of samples per batch. Defaults to 1. + last_batch (str, optional): whether to drop the last batch if it is incomplete. + Support ['rollover', 'discard'], rollover means False, discard means True. + Defaults to 'rollover'. + collate_fn (callable, optional): merge data with outer dimension batch size. Defaults to None. + sampler (Sampler, optional): Sampler object to sample data. Defaults to None. + batch_sampler (BatchSampler, optional): BatchSampler object to generate batch of indices. Defaults to None. + num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0. + pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False. + shuffle (bool, optional): whether to shuffle data. Defaults to False. + distributed (bool, optional): whether the dataloader is distributed. Defaults to False. + """ + self.dataset = dataset + self.collate_fn = collate_fn + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self._batch_size = batch_size + self.shuffle = shuffle + self.distributed = distributed + self.last_batch = last_batch + self.drop_last = False if last_batch == 'rollover' else True + + self.dataloader = self._generate_dataloader( + self.dataset, + batch_size=batch_size, + last_batch=last_batch, + collate_fn=collate_fn, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + shuffle=shuffle, + distributed=distributed) + + def batch(self, batch_size, last_batch=None): + """Set batch size for dataloader. + + Args: + batch_size (int): number of samples per batch. + last_batch (str, optional): whether to drop the last batch if it is incomplete. + Support ['rollover', 'discard'], rollover means False, discard means True. + Defaults to None. + """ + self._batch_size = batch_size + if last_batch is not None: + self.last_batch = last_batch + self.dataloader = self._generate_dataloader( + self.dataset, + batch_size, + self.last_batch, + self.collate_fn, + self.sampler, + self.batch_sampler, + self.num_workers, + self.pin_memory, + self.shuffle, + self.distributed) + + @property + def batch_size(self): + """Get dataloader's batch_size. + + Returns: + int: batch_size + """ + return self._batch_size + + def __iter__(self): + """Yield data in iterative order. + + Returns: + iterator: iterator for dataloder + """ + return iter(self.dataloader) + + @abstractmethod + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle, distributed): + raise NotImplementedError diff --git a/neural_compressor/data/dataloaders/dataloader.py b/neural_compressor/data/dataloaders/dataloader.py index 3c7078af916..5683b6c8e12 100644 --- a/neural_compressor/data/dataloaders/dataloader.py +++ b/neural_compressor/data/dataloaders/dataloader.py @@ -15,13 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Built-in dataloaders for multiple framework backends.""" + from neural_compressor.experimental.data.dataloaders import DATALOADERS # THIS API IS TO BE DEPRECATED! class DataLoader(object): """Entrance of all configured DataLoaders. Will dispatch the DataLoaders to framework specific one. Users will be not aware of the dispatching, and the Interface is unified. - """ def __new__(cls, framework, dataset, batch_size=1, collate_fn=None, diff --git a/neural_compressor/data/dataloaders/default_dataloader.py b/neural_compressor/data/dataloaders/default_dataloader.py new file mode 100644 index 00000000000..d9a2d74fb26 --- /dev/null +++ b/neural_compressor/data/dataloaders/default_dataloader.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Default dataloader for multiple framework backends.""" + +import collections +import numpy as np +from math import ceil, floor +from abc import abstractmethod +from .sampler import IterableSampler, SequentialSampler, BatchSampler +from .fetcher import FETCHERS +from .base_dataloader import BaseDataLoader + +def default_collate(batch): # pragma: no cover + """Merge data with outer dimension batch size.""" + elem = batch[0] + if isinstance(elem, collections.abc.Mapping): + return {key: default_collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, collections.abc.Sequence): + batch = zip(*batch) + return [default_collate(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + try: + return np.stack(batch) + except: + return batch + else: + return batch + +class DefaultDataLoader(BaseDataLoader): # pragma: no cover + """DefaultDataLoader for multiple framework backends.""" + + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False, distributed=False): + """Initialize DefaultDataLoader. + + Args: + dataset (object): dataset from which to load the data + batch_size (int, optional): number of samples per batch. Defaults to 1. + last_batch (str, optional): whether to drop the last batch if it is incomplete. + Support ['rollover', 'discard'], rollover means False, discard means True. + Defaults to 'rollover'. + collate_fn (callable, optional): merge data with outer dimension batch size. Defaults to None. + sampler (Sampler, optional): Sampler object to sample data. Defaults to None. + batch_sampler (BatchSampler, optional): BatchSampler object to generate batch of indices. Defaults to None. + num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0. + pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False. + shuffle (bool, optional): whether to shuffle data. Defaults to False. + distributed (bool, optional): whether the dataloader is distributed. Defaults to False. + """ + self.dataset = dataset + self.last_batch = last_batch + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.collate_fn = collate_fn + self._batch_size = batch_size + self.shuffle = shuffle + self.distributed = distributed + self.drop_last = False if last_batch == 'rollover' else True + if self.collate_fn == None: + self.collate_fn = default_collate + + def batch(self, batch_size, last_batch='rollover'): + """Set batch_size and last_batch.""" + self._batch_size = batch_size + self.last_batch = last_batch + + @property + def dataloader(self): + """Return dataloader.""" + return self + + def __iter__(self): + """Yield data in iterative order.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + collate_fn=self.collate_fn, + sampler=self.sampler, + batch_sampler=self.batch_sampler, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=self.shuffle, + distributed=self.distributed) + + def __len__(self): + """Get dataset length.""" + try: + dataset_len = self.dataset.__len__() + except (AttributeError, TypeError): + dataset_len = 0 + for _ in self.dataset: + dataset_len += 1 + except Exception: + raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ + " does not support calculating the length of its dataloader") + if self.drop_last == False: + dataloader_len = ceil(dataset_len / self.batch_size) + else: + dataloader_len = floor(dataset_len / self.batch_size) + return dataloader_len + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle, distributed): + + sampler = self._generate_sampler(dataset, distributed) + self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) + self.fetcher = FETCHERS[self.dataset_type](dataset, collate_fn, self.drop_last, distributed) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + + def _generate_sampler(self, dataset, distributed): + if hasattr(dataset, "__getitem__"): + self.dataset_type = 'index' + return SequentialSampler(dataset, distributed) + elif hasattr(dataset, "__iter__"): + self.dataset_type = 'iter' + return IterableSampler(dataset) + else: + raise ValueError("dataset type only support (index, iter)") diff --git a/neural_compressor/data/dataloaders/fetcher.py b/neural_compressor/data/dataloaders/fetcher.py new file mode 100644 index 00000000000..01ab6d895fa --- /dev/null +++ b/neural_compressor/data/dataloaders/fetcher.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Definitions of the methods to fetch data from an iterable-style or list-style dataset.""" + +from abc import abstractmethod + +class Fetcher(object): # pragma: no cover + """Base class for different fetchers.""" + + def __init__(self, dataset, collate_fn, drop_last): + """Initialize Fetcher. + + Args: + dataset (object): dataset object from which to get data + collate_fn (callable): merge data with outer dimension batch size + drop_last (bool): whether to drop the last batch if it is incomplete + """ + self.dataset = dataset + self.collate_fn = collate_fn + self.drop_last = drop_last + + @abstractmethod + def __call__(self, batched_indices): + """Fetch data. + + Args: + batched_indices (list): fetch data according to batched_indices + + """ + raise NotImplementedError + +class IterableFetcher(Fetcher): # pragma: no cover + """Iterate to get next batch-size samples as a batch.""" + + def __init__(self, dataset, collate_fn, drop_last, distributed): + """Initialize IterableFetcher. + + Args: + dataset (object): dataset object from which to get data + collate_fn (callable): merge data with outer dimension batch size + drop_last (bool): whether to drop the last batch if it is incomplete + distributed (bool): whether the dataloader is distributed + + """ + super(IterableFetcher, self).__init__(dataset, collate_fn, drop_last) + self.dataset_iter = iter(dataset) + self.index_whole = 0 + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + if distributed: + import horovod.tensorflow as hvd + hvd.init() + self.process_rank = hvd.rank() + self.process_size = hvd.size() + if self.process_size < 2: + raise EnvironmentError("The program is now trying to traverse" \ + " the distributed TensorFlow DefaultDataLoader in only one process." \ + " If you do not want to use distributed DataLoader, please set" \ + " 'distributed: False'. Or If you want to use distributed DataLoader," \ + " please set 'distributed: True' and launch multiple processes.") + + def __call__(self, batched_indices): + """Fetch data. + + Args: + batched_indices (list): fetch data according to batched_indices + + """ + batch_data = [] + batch_size = len(batched_indices) + while True: + try: + iter_data = next(self.dataset_iter) + if (self.index_whole-self.process_rank)%self.process_size == 0: + batch_data.append(iter_data) + self.index_whole += 1 + if len(batch_data) == batch_size: + break + except StopIteration: + break + if len(batch_data) == 0 or (self.drop_last and len(batch_data) < len(batched_indices)): + raise StopIteration + return self.collate_fn(batch_data) + +class IndexFetcher(Fetcher): # pragma: no cover + """Take single index or a batch of indices to fetch samples as a batch.""" + + def __init__(self, dataset, collate_fn, drop_last, distributed): + """Initialize IndexFetcher. + + Args: + dataset (object): dataset object from which to get data + collate_fn (callable): merge data with outer dimension batch size + drop_last (bool): whether to drop the last batch if it is incomplete + distributed (bool): whether the dataloader is distributed + """ + super(IndexFetcher, self).__init__(dataset, collate_fn, drop_last) + + def __call__(self, batched_indices): + """Fetch data. + + Args: + batched_indices (list): fetch data according to batched_indices + + """ + data = [self.dataset[idx] for idx in batched_indices] + return self.collate_fn(data) + +FETCHERS = {"index": IndexFetcher, "iter": IterableFetcher, } diff --git a/neural_compressor/data/dataloaders/mxnet_dataloader.py b/neural_compressor/data/dataloaders/mxnet_dataloader.py new file mode 100644 index 00000000000..352f63fc731 --- /dev/null +++ b/neural_compressor/data/dataloaders/mxnet_dataloader.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""MXNet Dataloader implementation.""" + +from neural_compressor.utils.utility import LazyImport +from .base_dataloader import BaseDataLoader +import logging +mx = LazyImport('mxnet') + +class MXNetDataLoader(BaseDataLoader): # pragma: no cover + """Subclass of BaseDataLoader.""" + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed): + """Overwrite _generate_dataloader function.""" + if shuffle: + logging.warning('Shuffle is not supported yet in MXNetDataLoader, ' \ + 'ignoring shuffle keyword.') + return mx.gluon.data.DataLoader( + dataset, + batch_size=batch_size, + batchify_fn=collate_fn, + last_batch=last_batch, + num_workers=num_workers, + pin_memory=pin_memory, + sampler=sampler, + batch_sampler=batch_sampler) diff --git a/neural_compressor/data/dataloaders/onnxrt_dataloader.py b/neural_compressor/data/dataloaders/onnxrt_dataloader.py new file mode 100644 index 00000000000..028bcdb6981 --- /dev/null +++ b/neural_compressor/data/dataloaders/onnxrt_dataloader.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Built-in dataloaders for onnxruntime framework backends.""" + +from neural_compressor.utils.utility import LazyImport +from .base_dataloader import BaseDataLoader +from .default_dataloader import DefaultDataLoader +from ..datasets.bert_dataset import ONNXRTBertDataset +import logging +torch = LazyImport('torch') + +class ONNXRTBertDataLoader(DefaultDataLoader): # pragma: no cover + """Built-in dataloader for onnx bert model and its varients.""" + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed): + import numpy as np + from torch.utils.data import DataLoader, SequentialSampler + sampler = SequentialSampler(dataset) + dataloader = DataLoader(dataset, sampler=sampler, \ + batch_size=batch_size) + dynamic_length = dataset.dynamic_length + model_type = dataset.model_type + max_seq_length = dataset.max_seq_length + + for batch in dataloader: + try: + batch_seq_length = max_seq_length if not dynamic_length \ + else torch.max(batch[-2], 0)[0].item() + batch = tuple(t.detach().cpu().numpy() \ + if not isinstance(t, np.ndarray) else t \ + for t in batch) + if model_type == 'bert': + data = [ + batch[0][:,:batch_seq_length], + batch[1][:,:batch_seq_length], + batch[2][:,:batch_seq_length] + ] + else: + data = [ + batch[0][:,:batch_seq_length], + batch[1][:,:batch_seq_length] + ] + label = batch[-1] + yield data, label + except StopIteration: + return + +class ONNXRTDataLoader(BaseDataLoader): # pragma: no cover + """Built-in dataloader for onnxruntime framework backends.""" + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed): + if shuffle: + logging.warning('Shuffle is not supported yet in ONNXRTDataLoader, ' \ + 'ignoring shuffle keyword.') + + if isinstance(dataset, ONNXRTBertDataset): + return ONNXRTBertDataLoader(dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed) + else: + return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed) diff --git a/neural_compressor/data/dataloaders/pytorch_dataloader.py b/neural_compressor/data/dataloaders/pytorch_dataloader.py new file mode 100644 index 00000000000..301519a8acf --- /dev/null +++ b/neural_compressor/data/dataloaders/pytorch_dataloader.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Initialize the DATASETS class.""" + +import numpy as np +from neural_compressor.utils.utility import LazyImport +from .base_dataloader import BaseDataLoader +torch = LazyImport('torch') +hvd = LazyImport('horovod.torch') + +class PyTorchDataLoader(BaseDataLoader): # pragma: no cover + """PyTorchDataLoader inherits from BaseDataLoader.""" + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, + shuffle, distributed): + """Generate PyTorch dataloader. + + Args: + dataset: dataset + batch_size (int): batch size + last_batch (string): rollover last batch or not. + collate_fn: collate_fn + sampler: sampler + batch_sampler: batch_sampler + num_workers (int): num_workers + pin_memory (bool): pin_memory + shuffle (bool): shuffle + distributed (bool): distributed + + Returns: + _type_: _description_ + """ + drop_last = False if last_batch == 'rollover' else True + assert len(dataset) != 0, \ + "Warning: Dataset is empty, Please check dataset path!" + if distributed and sampler is None: + # TODO: lazy init here + hvd.init() + # sampler option is mutually exclusive with shuffle pytorch + self.sampler = sampler = torch.utils.data.distributed.DistributedSampler( + dataset, num_replicas=hvd.size(), rank=hvd.rank()) + + return torch.utils.data.DataLoader( + dataset, + shuffle=shuffle, + batch_size=batch_size, + collate_fn=collate_fn, + drop_last=drop_last, + num_workers=num_workers, + pin_memory=pin_memory, + sampler=sampler, + batch_sampler=batch_sampler) + diff --git a/neural_compressor/data/dataloaders/sampler.py b/neural_compressor/data/dataloaders/sampler.py new file mode 100644 index 00000000000..a383e6d9891 --- /dev/null +++ b/neural_compressor/data/dataloaders/sampler.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Definitions of the methods to sample data.""" + +from abc import abstractmethod + +class Sampler(object): # pragma: no cover + """Base class for all Samplers. + + __iter__ is needed no matter whether you use IterableSampler + or Squential sampler, if you want implement your own sampler, make clear what the type is + your Dataset, if IterableDataset(method __iter__ implemented), try to use IterableSampler, + else if you have an IndexDataset(method __getitem__ implemented), your dataset should have + method __len__ implemented. + """ + + def __init__(self, data_source): + """Initialize Sampler.""" + pass + + @abstractmethod + def __iter__(self): + """Convert dataloder to an iterator.""" + raise NotImplementedError + + +class IterableSampler(Sampler): # pragma: no cover + """Interally samples elements. + + Used for datasets retrieved element by interator. Yield None to act as a placeholder for each iteration. + """ + + def __init__(self, dataset): + """Initialize IterableSampler. + + Args: + dataset (object): dataset object from which to get data + """ + super(IterableSampler, self).__init__(None) + self.whole_dataset = dataset + + def __iter__(self): + """Yield data in iterative order.""" + while True: + yield None + + def __len__(self): + """Return the length of dataset.""" + raise NotImplementedError("'__len__' for IterableDataset object has not defined") + +class SequentialSampler(Sampler): # pragma: no cover + """Sequentially samples elements, used for datasets retrieved element by index.""" + + def __init__(self, dataset, distributed): + """Initialize SequentialSampler. + + Args: + dataset (object): dataset object from which to get data + distributed (bool): whether the dataloader is distributed + """ + self.whole_dataset = dataset + self.distributed = distributed + + def __iter__(self): + """Yield data in iterative order.""" + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + if self.distributed: + import horovod.tensorflow as hvd + hvd.init() + self.process_rank = hvd.rank() + self.process_size = hvd.size() + if self.process_size < 2: + raise EnvironmentError("The program is now trying to traverse" \ + " the distributed TensorFlow DefaultDataLoader in only one process." \ + " If you do not want to use distributed DataLoader, please set" \ + " 'distributed: False'. Or If you want to use distributed DataLoader," \ + " please set 'distributed: True' and launch multiple processes.") + return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) + + def __len__(self): + """Return the length of dataset.""" + return len(self.whole_dataset) + +class BatchSampler(Sampler): # pragma: no cover + """Yield a batch of indices and number of batches.""" + + def __init__(self, sampler, batch_size, drop_last=True): + """Initialize BatchSampler. + + Args: + sampler (Sampler): sampler used for generating batches + batch_size (int): size of batch + drop_last (bool, optional): whether to drop the last batch if it is incomplete. Defaults to True. + """ + if isinstance(drop_last, bool): + self.drop_last = drop_last + else: + raise ValueError("last_batch only support bool as input") + + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + """Yield data in iterative order.""" + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self): + """Return the number of batches.""" + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size diff --git a/neural_compressor/data/dataloaders/tensorflow_dataloader.py b/neural_compressor/data/dataloaders/tensorflow_dataloader.py new file mode 100644 index 00000000000..ddc010841ac --- /dev/null +++ b/neural_compressor/data/dataloaders/tensorflow_dataloader.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Dataloader implementation.""" + +from neural_compressor.experimental.data.datasets import dataset +from neural_compressor.utils.utility import LazyImport +from abc import abstractmethod +import collections +import numpy as np +import sys +from math import ceil, floor +from .sampler import IterableSampler, SequentialSampler, BatchSampler +from .fetcher import FETCHERS +from .default_dataloader import default_collate +from .default_dataloader import DefaultDataLoader +from ..datasets.bert_dataset import TensorflowBertDataset, TensorflowModelZooBertDataset +from .base_dataloader import BaseDataLoader +import logging + +tf = LazyImport('tensorflow') +neural_compressor = LazyImport('neural_compressor') + +class TFDataDataLoader(BaseDataLoader): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch='rollover'): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self._batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch='rollover'): + """Dataset return data per batch.""" + drop_last = False if last_batch == 'rollover' else True + self._batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch,) + + def _generate_dataloader(self, dataset, batch_size=1, last_batch='rollover', \ + collate_fn=None, sampler=None, batch_sampler=None, \ + num_workers=None, pin_memory=None, shuffle=False, \ + distributed=False): + """Yield data.""" + drop_last = False if last_batch == 'rollover' else True + if shuffle: + logging.warning('Shuffle is not supported yet in TFDataLoader, ' \ + 'ignoring shuffle keyword.') + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError('unrecognized element spec...') + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError('not supported output format....') + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0],iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append((iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels,tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return + +class TensorflowBertDataLoader(DefaultDataLoader): # pragma: no cover + """Subclass of DefaultDataLoader. + + this dataloader is designed to satisfy the usage of Bert models. + """ + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, shuffle, + distributed): + + if shuffle: + logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ + 'ignoring shuffle keyword.') + def bert_collate_fn(batch): + elem = batch[0] + return elem + drop_last = False if last_batch == 'rollover' else True + sampler = self._generate_sampler(dataset, distributed) + self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) + self.fetcher = FETCHERS[self.dataset_type]\ + (dataset, bert_collate_fn, drop_last, distributed) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield (data[0], batch_size), data[1] + except StopIteration: + return + +class TensorflowModelZooBertDataLoader(DefaultDataLoader): # pragma: no cover + """Subclass of DefaultDataLoader. + + this dataloader is designed to satisfy the usage of Model Zoo Bert models. + """ + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, pin_memory, shuffle, + distributed): + + if shuffle: + logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ + 'ignoring shuffle keyword.') + def bert_collate_fn(batch): + input_ids = [] + input_mask = [] + segment_ids = [] + for elem in batch: + input_ids.append(elem[0][0][0]) + input_mask.append(elem[0][1][0]) + segment_ids.append(elem[0][2][0]) + inputs = [input_ids, input_mask, segment_ids] + return inputs, batch[0][1] + drop_last = False if last_batch == 'rollover' else True + sampler = self._generate_sampler(dataset, distributed) + self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) + self.fetcher = FETCHERS[self.dataset_type]\ + (dataset, bert_collate_fn, drop_last, distributed) + + inputs = [] + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + +class TensorflowDataLoader(BaseDataLoader): # pragma: no cover + """DataLoader for framework Tensorflow. + + if it's a tf.data.Dataset we will directly use the dataloader in the other case + will use DefaultDataLoader instead. + """ + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, \ + sampler, batch_sampler, num_workers, pin_memory, shuffle, distributed): + + if shuffle: + logging.warning('Shuffle is not supported yet in TensorflowDataLoader, ' \ + 'ignoring shuffle keyword.') + if isinstance(dataset, tf.data.Dataset): + if int(tf.__version__[0]) > 1: + has_batch = hasattr(dataset, '_batch_size') + else: + has_batch = hasattr(dataset._dataset, '_batch_size') + if has_batch: + raise TypeError(f"Parameter 'batch_size={batch_size}'" \ + " conflicts with 'tf.data.Dataset'," \ + f" because {dataset} is already a BatchDataset." \ + f" Please pass in 'tf.data.Dataset' without batch attributes.") + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running + if self.distributed: + import horovod.tensorflow as hvd + hvd.init() + process_rank = hvd.rank() + process_size = hvd.size() + if process_size < 2: + raise EnvironmentError("The program is now trying to generate" \ + " the distributed TensorflowDataLoader in only one process." \ + " If you do not want to use distributed DataLoader, please set" \ + " 'distributed: False'. Or If you want to use distributed DataLoader," \ + " please set 'distributed: True' and launch multiple processes.") + dataset = dataset.shard(process_size, process_rank) + tf_dataloader = TFDataDataLoader(dataset, batch_size, last_batch=last_batch) + return tf_dataloader + elif isinstance(dataset, TensorflowBertDataset): + if distributed: + raise NotImplementedError("Distributed TensorflowBertDataLoader" \ + " is not yet supported, please set 'distributed: False'") + tf_bert_dataloader = TensorflowBertDataLoader(dataset, batch_size, \ + last_batch, collate_fn, sampler, batch_sampler, \ + num_workers, pin_memory, shuffle, distributed) + return tf_bert_dataloader + elif isinstance(dataset, TensorflowModelZooBertDataset): + if distributed: + raise NotImplementedError("Distributed TensorflowBertDataLoader" \ + " is not yet supported, please set 'distributed: False'") + tf_bert_dataloader = TensorflowModelZooBertDataLoader(dataset, batch_size, \ + last_batch, collate_fn, sampler, batch_sampler, \ + num_workers, pin_memory, shuffle, distributed) + return tf_bert_dataloader + else: + return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, + sampler, batch_sampler, num_workers, + pin_memory, shuffle, distributed) + + def __bool__(self): + """Judgement if the dataloader exists.""" + # workaround in assert dataloader which will overload __len__() without __bool__() + # provided. Calling __len__() in asserting is not supposed and may cause issues. + return True + + def __len__(self): + """Total number of dataset.""" + try: + dataset_len = self.dataset.__len__() + except (AttributeError, TypeError): + try: + dataset_len = 0 + for _ in self.dataset: + dataset_len += 1 + except RuntimeError: return sum([1 for _ in self]) + except Exception: + raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ + " does not support calculating the length of its dataloader") + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running + if self.distributed: + import horovod.tensorflow as hvd + hvd.init() + process_rank = hvd.rank() + process_size = hvd.size() + if process_size < 2: + raise EnvironmentError("The program is now trying to get length of" \ + " the distributed TensorflowDataLoader in only one process." \ + " If you do not want to use distributed DataLoader, please set" \ + " 'distributed: False'. Or If you want to use distributed DataLoader," \ + " please set 'distributed: True' and launch multiple processes.") + if process_rank < (dataset_len % process_size): + self.dis_dataset_len = dataset_len // process_size + 1 + else: + self.dis_dataset_len = dataset_len // process_size + if self.drop_last == False: + dataloader_len = ceil(self.dis_dataset_len / self.batch_size) + else: + dataloader_len = floor(self.dis_dataset_len / self.batch_size) + return sys.maxsize if dataloader_len > sys.maxsize else dataloader_len diff --git a/neural_compressor/data/datasets/__init__.py b/neural_compressor/data/datasets/__init__.py index 77edda38b30..c2460d737ed 100644 --- a/neural_compressor/data/datasets/__init__.py +++ b/neural_compressor/data/datasets/__init__.py @@ -15,6 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Built-in datasets class for multiple framework backends.""" + +from .dataset import Datasets, Dataset, IterableDataset, dataset_registry from os.path import dirname, basename, isfile, join import glob @@ -24,3 +27,5 @@ if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): __import__(basename(f)[:-3], globals(), locals(), level=1) + +__all__ = ["Datasets", "Dataset", "IterableDataset", "dataset_registry"] diff --git a/neural_compressor/data/datasets/bert_dataset.py b/neural_compressor/data/datasets/bert_dataset.py new file mode 100644 index 00000000000..dc6cfc896b4 --- /dev/null +++ b/neural_compressor/data/datasets/bert_dataset.py @@ -0,0 +1,467 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Built-in BERT datasets class for multiple framework backends.""" + +import os +import logging +import json +import dataclasses +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.utils.utility import LazyImport +from .dataset import dataset_registry, Dataset +torch = LazyImport('torch') +transformers = LazyImport('transformers') + +logger = logging.getLogger("neural_compressor") + +@dataset_registry(dataset_type="bert", framework="pytorch", dataset_format='') +class PytorchBertDataset(Dataset): # pragma: no cover + """PyTorch dataset used for model Bert. + + This Dataset is to construct from the Bert TensorDataset and not a full implementation + from yaml config. The original repo link is: https://github.com/huggingface/transformers. + When you want use this Dataset, you should add it before you initialize your DataLoader. + (TODO) add end to end support for easy config by yaml by adding the method of + load examples and process method. + + Args: dataset (list): list of data. + task (str): the task of the model, support "classifier", "squad". + model_type (str, default='bert'): model type, support 'distilbert', 'bert', + 'xlnet', 'xlm'. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + + Examples: + dataset = [[ + [101,2043,2001], + [1,1,1], + [[0,0,0,0,0,0,0], + [0,0,0,0,0,0,0], + [0,0,0,0,0,0,0]], + [1,1,1], + [1,1,1], + [[0,0,0,0,0,0,0], + [0,0,0,0,0,0,0], + [0,0,0,0,0,0,0]] + ]] + dataset = PytorchBertDataset(dataset=dataset, task='classifier', model_type='bert', + transform=preprocess, filter=filter) + """ + + def __init__(self, dataset, task, model_type='bert', transform=None, filter=None): + """Initialize the attributes of class.""" + self.dataset = dataset + assert task in ("classifier", "squad"), "Bert task support only classifier squad" + self.task = task + self.transform = transform + self.model_type = model_type + + def __len__(self): + """Length of the dataset.""" + return len(self.dataset) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.dataset[index] + if self.transform is not None: + sample = self.transform(sample) + if self.task == 'classifier': + inputs = { + 'input_ids': sample[0], + 'attention_mask': sample[1], + 'labels': sample[3]} + + if self.model_type != 'distilbert': + # XLM, DistilBERT and RoBERTa don't use segment_ids + if self.model_type in ['bert', 'xlnet']: + inputs['token_type_ids'] = sample[2] + sample = (inputs, inputs['labels']) + + elif self.task == 'squad': + inputs = { + 'input_ids': sample[0], + 'attention_mask': sample[1], } + if self.model_type != 'distilbert': + # XLM, DistilBERT and RoBERTa don't use segment_ids + inputs['token_type_ids'] = sample[2] if self.model_type in [ + 'bert', 'xlnet'] else None + if self.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': sample[4], 'p_mask': sample[5]}) + example_indices = sample[3] + sample = (inputs, example_indices) + return sample + + +@dataset_registry(dataset_type="GLUE", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class ONNXRTBertDataset(Dataset): # pragma: no cover + """ONNXRT dataset used for model Bert. + + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default='bert'): model type, support 'distilbert', 'bert', + 'mobilebert', 'roberta'. + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + + Examples: + dataset = ONNXRTBertDataset(data_dir=data_dir, model_name_or_path='bert-base-uncase', + transform=preprocess, filter=filter) + """ + def __init__(self, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + """Initialize the attributes of class.""" + task = task.lower() + model_type = model_type.lower() + assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ + 'mnli', 'wnli'], 'Unsupported task type' + assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ + model type' + + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + """Length of the dataset.""" + return len(self.dataset) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + return self.dataset[index] + + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): # pragma: no cover + """Load and cache the examples. + + Helper Function for ONNXRTBertDataset. + """ + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( + 'dev' if evaluate else 'train', + list(filter(None, model_name_or_path.split('/'))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + if task in ['mnli', 'mnli-mm'] and model_type in ['roberta']: + # HACK(label indices are swapped in RoBERTa pretrained model) + label_list[1], label_list[2] = label_list[2], label_list[1] + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, + ): # pragma: no cover + """Convert examples to features. + + Helper function for load_and_cache_examples. + """ + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + + +@dataclass(frozen=True) +class InputFeatures: # pragma: no cover + """Single set of features of data. + + Property names are the same names as the corresponding inputs to a model. + + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + + def to_json_string(self): + """Serialize this instance to a JSON string.""" + return json.dumps(dataclasses.asdict(self)) + "\n" + + +@dataset_registry(dataset_type="bert", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowBertDataset(Dataset): # pragma: no cover + """Tensorflow dataset used for model Bert. + + This dataset supports tfrecord data, please refer to Guide to create tfrecord file first. + + Args: root (str): path of dataset. + label_file (str): path of label file. + task (str, default='squad'): task type of model. + model_type (str, default='bert'): model type, support 'bert'. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions + """ + + def __init__(self, root, label_file, task='squad', + model_type='bert', transform=None, filter=None): + """Initialize the attributes of class.""" + import json + with open(label_file) as lf: + label_json = json.load(lf) + assert label_json['version'] == '1.1', 'only support squad 1.1' + self.label = label_json['data'] + self.root = root + self.transform = transform + self.filter = filter + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index). + """ + return self.root, self.label + + def __len__(self): + """Length of the dataset.""" + return 1 + + +class ParseDecodeBert(): # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + import tensorflow as tf + # Dense features in Example proto. + feature_map = { + 'input_ids': + tf.compat.v1.VarLenFeature(dtype=tf.int64), + 'input_mask': + tf.compat.v1.VarLenFeature(dtype=tf.int64), + 'segment_ids': + tf.compat.v1.VarLenFeature(dtype=tf.int64), + } + + features = tf.io.parse_single_example(sample, feature_map) + + input_ids = features['input_ids'].values + input_mask = features['input_mask'].values + segment_ids = features['segment_ids'].values + + return (input_ids, input_mask, segment_ids) + +@dataset_registry(dataset_type="mzbert", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowModelZooBertDataset(Dataset): # pragma: no cover + """Tensorflow dataset for three-input Bert in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + Args: root (str): path of dataset. + label_file (str): path of label file. + task (str, default='squad'): task type of model. + model_type (str, default='bert'): model type, support 'bert'. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according. + """ + + def __init__(self, root, label_file, task='squad', + model_type='bert', transform=None, filter=None, num_cores=28): + """Initialize the attributes of class.""" + import json + with open(label_file) as lf: + label_json = json.load(lf) + assert label_json['version'] == '1.1', 'only support squad 1.1' + self.label = label_json['data'] + import tensorflow as tf + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if len(feature['input_ids'].int64_list.value) == 0 \ + and len(feature['input_mask'].int64_list.value) == 0: + raise ValueError("Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/' to create correct tfrecord") + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave(tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000)) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeBert()) + else: + transform = ParseDecodeBert() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + from ..dataloaders.tensorflow_dataloader import TFDataDataLoader + ds = TFDataDataLoader(ds) + self.root = [] + for inputs in ds: + self.root.append(inputs) + self.transform = transform + self.filter = filter + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + return self.root[index], self.label + + def __len__(self): + """Length of the dataset.""" + return len(self.root) diff --git a/neural_compressor/data/datasets/coco_dataset.py b/neural_compressor/data/datasets/coco_dataset.py new file mode 100644 index 00000000000..650c0648bc8 --- /dev/null +++ b/neural_compressor/data/datasets/coco_dataset.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Built-in COCO datasets class for multiple framework backends.""" + +import numpy as np +from PIL import Image +from neural_compressor.utils.utility import LazyImport +from .dataset import dataset_registry, IterableDataset, Dataset + +tf = LazyImport('tensorflow') +mx = LazyImport('mxnet') +torch = LazyImport('torch') + +class ParseDecodeCoco(): # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': + tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), + 'image/object/class/text': + tf.compat.v1.VarLenFeature(dtype=tf.string), + 'image/object/class/label': + tf.compat.v1.VarLenFeature(dtype=tf.int64), + 'image/source_id':tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update({ + k: sparse_float32 + for k in [ + 'image/object/bbox/xmin', 'image/object/bbox/ymin', + 'image/object/bbox/xmax', 'image/object/bbox/ymax' + ] + }) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features['image/encoded'] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features['image/object/class/text'].values + int_label = features['image/object/class/label'].values + image_id = features['image/source_id'] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + +@dataset_registry(dataset_type="COCORecord", framework="tensorflow, tensorflow_itex", dataset_format='') +class COCORecordDataset(IterableDataset): # pragma: no cover + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if len(feature['image/object/class/text'].bytes_list.value) == 0 \ + and len(feature['image/object/class/label'].int64_list.value) == 0: + raise ValueError("Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord") + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave(tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000)) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + +@dataset_registry(dataset_type="COCORaw", framework="onnxrt_qlinearops, \ + onnxrt_integerops, pytorch, mxnet, tensorflow, \ + tensorflow_itex", dataset_format='') +class COCORaw(Dataset): # pragma: no cover + """Coco raw dataset. + + Please arrange data in this way: + /root/img_dir/1.jpg + /root/img_dir/2.jpg + ... + /root/img_dir/n.jpg + /root/anno_dir + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + img_dir (str, default='val2017'): image file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=filter): + """Initialize the attributes of class.""" + import json + import os + import numpy as np + from pycocotools.coco import COCO + self.image_list = [] + self.transform = transform + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ + bbox[2]/float(pic_width), bbox[3]/float(pic_height)] + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) + labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + + with Image.open(img_file) as image: + image = np.array(image.convert('RGB')) + self.image_list.append( + (image, [np.array(bboxes), np.array(labels), np.array([]),\ + np.array(img_detail['file_name'].encode('utf-8'))])) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + if self.transform is not None: + sample= self.transform(sample) + return sample + +@dataset_registry(dataset_type="COCONpy", framework="onnxrt_qlinearops, \ + onnxrt_integerops, pytorch, mxnet, tensorflow, \ + tensorflow_itex", dataset_format='') +class COCONpy(Dataset): # pragma: no cover + """COCO npy dataset. + + Please arrange data in this way: + /root/npy_dir/1.jpg.npy + /root/npy_dir/2.jpg.npy + ... + /root/npy_dir/n.jpg.npy + /root/anno_dir + + Args: root (str): Root directory of dataset. + npy_dir (str, default='val2017'): npy file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, npy_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + """Initialize the attributes of class.""" + import json + import os + import numpy as np + from pycocotools.coco import COCO + self.image_list = [] + npy_path = os.path.join(root, npy_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + category_id = ann['category_id'] + if len(bbox) == 0: + continue + labels.append((np.array(category_id), np.array(bbox))) + npy_file = os.path.join(npy_path, img_detail['file_name']) + npy_file = npy_file + ".npy" + if not os.path.exists(npy_file): + continue + + image = np.load(npy_file) + self.image_list.append( + (image, labels)) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + return sample diff --git a/neural_compressor/data/datasets/dataset.py b/neural_compressor/data/datasets/dataset.py new file mode 100644 index 00000000000..8cba94c54ca --- /dev/null +++ b/neural_compressor/data/datasets/dataset.py @@ -0,0 +1,1114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This is the base class for each framework.""" + +from abc import abstractmethod +import os +from neural_compressor.utils.utility import LazyImport, singleton +from PIL import Image +torch = LazyImport('torch') +torchvision = LazyImport('torchvision') +tf = LazyImport('tensorflow') +mx = LazyImport('mxnet') +np = LazyImport('numpy') +hashlib = LazyImport('hashlib') +gzip = LazyImport('gzip') +tarfile = LazyImport('tarfile') +zipfile = LazyImport('zipfile') +pickle = LazyImport('pickle') +glob = LazyImport('glob') + + +@singleton +class TensorflowDatasets(object): # pragma: no cover + """The base class of Tensorflow datasets class.""" + + def __init__(self): + """Initialize the attributes of class.""" + self.datasets = {} + self.datasets.update(TENSORFLOW_DATASETS) + + +@singleton +class PyTorchDatasets(object): # pragma: no cover + """The base class of PyTorch datasets class.""" + + def __init__(self): + """Initialize the attributes of class.""" + self.datasets = { + 'ImageFolder': PytorchMxnetWrapDataset( + torchvision.datasets.ImageFolder), + } + self.datasets.update(PYTORCH_DATASETS) + + +@singleton +class MXNetDatasets(object): # pragma: no cover + """The base class of MXNet datasets class.""" + + def __init__(self): + """Initialize the attributes of class.""" + self.datasets = {} + self.datasets.update(MXNET_DATASETS) + + +@singleton +class ONNXRTQLDatasets(object): # pragma: no cover + """The base class of ONNXRT QLinear datasets class.""" + + def __init__(self): + """Initialize the attributes of class.""" + self.datasets = {} + self.datasets.update(ONNXRTQL_DATASETS) + + +@singleton +class ONNXRTITDatasets(object): # pragma: no cover + """The base class of ONNXRT IT datasets class.""" + + def __init__(self): + """Initialize the attributes of class.""" + self.datasets = {} + self.datasets.update(ONNXRTIT_DATASETS) + + +class PytorchMxnetWrapDataset(): # pragma: no cover + """The base class for PyTorch and MXNet frameworks. + + Args: + datafunc: The datasets class of PyTorch or MXNet. + """ + + def __init__(self, datafunc): + """Initialize the attributes of class.""" + self.datafunc = datafunc + + def __call__(self, transform=None, filter=None, *args, **kwargs): + """Wrap the dataset for PyTorch and MXNet framework.""" + return PytorchMxnetWrapFunction(self.datafunc, transform=transform, \ + filter=filter, *args, **kwargs) + + +class PytorchMxnetWrapFunction(): # pragma: no cover + """The Helper class for PytorchMxnetWrapDataset. + + Args: + dataset (datasets class): The datasets class of PyTorch or MXNet. + transform (transform object): transform to process input data. + filter (Filter objects): filter out examples according to specific + conditions. + """ + + def __init__(self, dataset, transform, filter, *args, **kwargs): + """Initialize the attributes of class.""" + self.dataset = dataset(*args, **kwargs) + self.transform = transform + self.filter = filter + + def __len__(self): + """Length of the dataset.""" + return len(self.dataset) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.dataset[index] + if self.transform is not None: + sample = self.transform(sample) + return sample + + +framework_datasets = {"tensorflow": TensorflowDatasets, + "tensorflow_itex": TensorflowDatasets, + "mxnet": MXNetDatasets, + "pytorch": PyTorchDatasets, + "pytorch_ipex": PyTorchDatasets, + "pytorch_fx": PyTorchDatasets, + "onnxrt_qdq": ONNXRTQLDatasets, + "onnxrt_qlinearops": ONNXRTQLDatasets, + "onnxrt_qoperator": ONNXRTQLDatasets, + "onnxrt_integerops": ONNXRTITDatasets, + } + +"""The datasets supported by neural_compressor, it's model specific and can be configured by yaml file. + + User could add new datasets by implementing new Dataset subclass under this directory. + The naming convention of new dataset subclass should be something like ImageClassifier, user + could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. + + Datasets variable is used to store all implemented Dataset subclasses to support + model specific dataset. +""" + + +class Datasets(object): # pragma: no cover + """A base class for all framework datasets. + + Args: + framework (str): framework name, like:"tensorflow", "tensorflow_itex", + "mxnet", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", + "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qoperator". + """ + + def __init__(self, framework): + """Initialize the attributes of class.""" + assert framework in ["tensorflow", "tensorflow_itex", \ + "mxnet", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", \ + "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qoperator"], \ + "framework support tensorflow pytorch mxnet onnxrt" + self.datasets = framework_datasets[framework]().datasets + + def __getitem__(self, dataset_type): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + assert dataset_type in self.datasets.keys(), "dataset type only support {}".\ + format(self.datasets.keys()) + return self.datasets[dataset_type] + + +# user/model specific datasets will be registered here +TENSORFLOW_DATASETS = {} +TENSORFLOWITEX_DATASETS = {} +MXNET_DATASETS = {} +PYTORCH_DATASETS = {} +PYTORCHIPEX_DATASETS = {} +PYTORCHFX_DATASETS = {} +ONNXRTQL_DATASETS = {} +ONNXRTIT_DATASETS = {} + +registry_datasets = {"tensorflow": TENSORFLOW_DATASETS, + "tensorflow_itex": TENSORFLOWITEX_DATASETS, + "mxnet": MXNET_DATASETS, + "pytorch": PYTORCH_DATASETS, + "pytorch_ipex": PYTORCHIPEX_DATASETS, + "pytorch_fx": PYTORCHFX_DATASETS, + "onnxrt_integerops": ONNXRTIT_DATASETS, + "onnxrt_qdq": ONNXRTQL_DATASETS, + "onnxrt_qoperator": ONNXRTQL_DATASETS, + "onnxrt_qlinearops": ONNXRTQL_DATASETS, + } + + +def dataset_registry(dataset_type, framework, dataset_format=''): # pragma: no cover + """Register dataset subclasses. + + Args: + cls (class): The class of register. + dataset_type (str): The dataset registration name + framework (str): support 3 framework including 'tensorflow', 'pytorch', 'mxnet' + data_format (str): The format dataset saved, eg 'raw_image', 'tfrecord' + + Returns: + cls: The class of register. + """ + def decorator_dataset(cls): + for single_framework in [fwk.strip() for fwk in framework.split(',')]: + assert single_framework in [ + "tensorflow", + "tensorflow_itex", + "mxnet", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qlinearops", + "onnxrt_integerops", + "onnxrt_qdq", + "onnxrt_qoperator", + ], "The framework support tensorflow mxnet pytorch onnxrt" + dataset_name = dataset_type + dataset_format + if dataset_name in registry_datasets[single_framework].keys(): + raise ValueError('Cannot have two datasets with the same name') + registry_datasets[single_framework][dataset_name] = cls + return cls + return decorator_dataset + + +class Dataset(object): # pragma: no cover + """The base class of dataset. + + Subclass datasets should overwrite two methods: + `__getitem__` for indexing to data sample and `__len__`for the size of the dataset + """ + + @abstractmethod + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + raise NotImplementedError + + # it's suggested to implement your __len__ method though we do not set it in abstract class + # @abstractmethod + # def __len__(self): + # raise NotImplementedError + + +class IterableDataset(object): # pragma: no cover + """An iterable Dataset. + + Subclass iterable dataset should also implement a method: + `__iter__` for interating over the samples of the dataset. + """ + + @abstractmethod + def __iter__(self): + """Magic method. + + Returns the iterator object itself. + """ + raise NotImplementedError + + +def download_url(url, root, filename=None, md5=None): # pragma: no cover + """Download from url. + + Args: + url (str): the address to download from. + root (str): the path for saving. + filename (str): the file name for saving. + md5 (str): the md5 string. + """ + import urllib + root = os.path.expanduser(root) + if not filename: + filename = os.path.basename(url) + fpath = os.path.join(root, filename) + + os.makedirs(root, exist_ok=True) + + if check_integrity(fpath, md5): + print('Using downloaded and verified file: ' + fpath) + else: + try: + print('Downloading ' + url + ' to ' + fpath) + urllib.request.urlretrieve( + url, fpath, + reporthook=gen_bar_updater() + ) + except (urllib.error.URLError, IOError) as e: + if url[:5] == 'https': + url = url.replace('https:', 'http:') + print('Failed download. Trying https -> http instead.' + ' Downloading ' + url + ' to ' + fpath) + urllib.request.urlretrieve( + url, fpath, + reporthook=gen_bar_updater() + ) + else: + raise e + if not check_integrity(fpath, md5): + raise RuntimeError("File not found or corrupted.") + + +def gen_bar_updater(): # pragma: no cover + """Generate progress bar.""" + from tqdm import tqdm + pbar = tqdm(total=None) + + def bar_update(count, block_size, total_size): + """Update progress bar.""" + if pbar.total is None and total_size: + pbar.total = total_size + progress_bytes = count * block_size + pbar.update(progress_bytes - pbar.n) + return bar_update + + +def check_integrity(fpath, md5): # pragma: no cover + """Check MD5 checksum.""" + if not os.path.isfile(fpath): + return False + if md5 is None: + return True + return md5 == calculate_md5(fpath) + + +def calculate_md5(fpath, chunk_size=1024*1024): # pragma: no cover + """Generate MD5 checksum for a file.""" + md5 = hashlib.md5() + with open(fpath, 'rb') as f: + for chunk in iter(lambda: f.read(chunk_size), b''): + md5.update(chunk) + return md5.hexdigest() + +@dataset_registry(dataset_type="CIFAR10", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class CIFAR10(Dataset): # pragma: no cover + """The CIFAR10 and CIFAR100 database. + + For CIFAR10: If download is True, it will download dataset to root/ and extract it + automatically, otherwise user can download file from + https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz manually to + root/ and extract it. + For CIFAR100: If download is True, it will download dataset to root/ and extract it + automatically, otherwise user can download file from + https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to + root/ and extract it. + + Args: + root (str): Root directory of dataset. + train (bool, default=False): If True, creates dataset from train subset, + otherwise from validation subset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + download (bool, default=True): If true, downloads the dataset from the internet + and puts it in root directory. If dataset is already + downloaded, it is not downloaded again. + """ + + url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" + filename = "cifar-10-python.tar.gz" + tgz_md5 = 'c58f30108f718f92721af3b95e74349a' + train_list = [ + ['data_batch_1', 'c99cafc152244af753f735de768cd75f'], + ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'], + ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'], + ['data_batch_4', '634d18415352ddfa80567beed471001a'], + ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'], + ] + + test_list = [ + ['test_batch', '40351d587109b95175f43aff81a1287e'], + ] + + meta = { + 'filename': 'batches.meta', + 'key': 'label_names', + 'md5': '5ff9c542aee3614f3951f8cda6e48888', + } + + def __init__(self, + root, + train=False, + transform=None, + filter=None, + download=True): # pragma: no cover + """Initialize the attributes of class.""" + self.root = root + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError( + 'Dataset not found or corrupted. You can use download=True to download it') + if train: + downloaded_list = self.train_list + else: + downloaded_list = self.test_list + + self.data = [] + self.targets = [] + for file_name, checksum in downloaded_list: + file_path = os.path.join(self.root, file_name) + with open(file_path, 'rb') as f: + entry = pickle.load(f, encoding='latin1') + self.data.append(entry['data']) + if 'labels' in entry: + self.targets.extend(entry['labels']) + else: + self.targets.extend(entry['fine_labels']) + self.data = np.vstack(self.data).reshape(-1, 3, 32, 32) + self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC + + self.load_meta() + self.transform = transform + + def load_meta(self): # pragma: no cover + """Load meta.""" + path = os.path.join(self.root, self.meta['filename']) + if not check_integrity(path, self.meta['md5']): + raise RuntimeError('Dataset metadata file not found or corrupted.' + + ' You can use download=True to download it') + with open(path, 'rb') as infile: + data = pickle.load(infile, encoding='latin1') + self.classes = data[self.meta['key']] + self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)} + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + if self.transform is not None: + image, label = self.transform((image, label)) + return image, label + + def __len__(self): # pragma: no cover + """Length of the dataset.""" + return len(self.data) + + def download(self): # pragma: no cover + """Download a file.""" + if self._check_integrity(): + print('Files already downloaded and verified') + return + download_root = os.path.expanduser(self.root) + filename = os.path.basename(self.url) + download_url(self.url, download_root, filename, self.tgz_md5) + archive = os.path.join(download_root, filename) + print("Extracting {} to {}".format(archive, download_root)) + with tarfile.open(archive, 'r:gz') as tar: + tar.extractall(path=download_root) + + def _check_integrity(self): # pragma: no cover + """Check MD5 checksum.""" + root = self.root + for fentry in (self.train_list + self.test_list): + filename, md5 = fentry[0], fentry[1] + fpath = os.path.join(root, filename) + if not check_integrity(fpath, md5): + return False + return True + + +@dataset_registry(dataset_type="CIFAR10", framework="pytorch", dataset_format='') +class PytorchCIFAR10(CIFAR10): + """The PyTorch datasets for CIFAR10.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + image = Image.fromarray(image) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="CIFAR10", framework="mxnet", dataset_format='') +class MXNetCIFAR10(CIFAR10): + """The MXNet datasets for CIFAR10.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + image = mx.nd.array(image) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="CIFAR10", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowCIFAR10(CIFAR10): + """The Tensorflow datasets for CIFAR10.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) + + +@dataset_registry(dataset_type="CIFAR100", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class CIFAR100(CIFAR10): + """CIFAR100 database. + + For CIFAR100: If download is True, it will download dataset to root/ and extract it + automatically, otherwise user can download file from + https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to + root/ and extract it. + + Args: + root (str): Root directory of dataset. + train (bool, default=False): If True, creates dataset from train subset, + otherwise from validation subset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + download (bool, default=True): If true, downloads the dataset from the internet + and puts it in root directory. If dataset is already + downloaded, it is not downloaded again. + """ + + url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" + filename = "cifar-100-python.tar.gz" + tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' + train_list = [ + ['train', '16019d7e3df5f24257cddd939b257f8d'], + ] + test_list = [ + ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ] + meta = { + 'filename': 'meta', + 'key': 'fine_label_names', + 'md5': '7973b15100ade9c7d40fb424638fde48', + } + + +@dataset_registry(dataset_type="CIFAR100", framework="pytorch", dataset_format='') +class PytorchCIFAR100(CIFAR100): + """The PyTorch datasets for CIFAR100.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + image = Image.fromarray(image) + if self.transform is not None: + image, label = self.transform((image, label)) + image = np.array(image) + return (image, label) + + +@dataset_registry(dataset_type="CIFAR100", framework="mxnet", dataset_format='') +class MXNetCIFAR100(CIFAR100): + """The MXNet datasets for CIFAR100.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + image = mx.nd.array(image) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="CIFAR100", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowCIFAR100(CIFAR100): + """The Tensorflow datasets for CIFAR100.""" + + def __getitem__(self, index): # pragma: no cover + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], self.targets[index] + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) + +@dataset_registry(dataset_type="MNIST", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class MNIST(Dataset): # pragma: no cover + """Modified National Institute of Standards and Technology database and FashionMNIST database. + + For MNIST: If download is True, it will download dataset to root/MNIST/, otherwise user + should put mnist.npz under root/MNIST/ manually. + For FashionMNIST: If download is True, it will download dataset to root/FashionMNIST/, + otherwise user should put train-labels-idx1-ubyte.gz, + train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz + and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually. + + Args: + root (str): Root directory of dataset. + train (bool, default=False): If True, creates dataset from train subset, + otherwise from validation subset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + download (bool, default=True): If true, downloads the dataset from the internet + and puts it in root directory. If dataset is already + downloaded, it is not downloaded again. + """ + + classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', + '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] + resource = [ + ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz', + '8a61469f7ea1b51cbae51d4f78837e45') + ] + + def __init__(self, root, train=False, transform=None, filter=None, download=True): + """Initialize the attributes of class.""" + self.root = root + self.train = train + self.transform = transform + if download: + self.download() + + self.read_data() + + def read_data(self): + """Read data from a file.""" + for file_name, checksum in self.resource: + file_path = os.path.join(self.root, os.path.basename(file_name)) + if not os.path.exists(file_path): + raise RuntimeError( + 'Dataset not found. You can use download=True to download it') + with np.load(file_path, allow_pickle=True) as f: + if self.train: + self.data, self.targets = f['x_train'], f['y_train'] + else: + self.data, self.targets = f['x_test'], f['y_test'] + + def __len__(self): + """Length of the dataset.""" + return len(self.data) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = np.expand_dims(image, -1) + if self.transform is not None: + image, label = self.transform((image, label)) + return image, label + + @property + def class_to_idx(self): + """Return a dict of class.""" + return {_class: i for i, _class in enumerate(self.classes)} + + def download(self): + """Download a file.""" + for url, md5 in self.resource: + filename = os.path.basename(url) + if os.path.exists(os.path.join(self.root, filename)): + continue + else: + download_url(url, root=self.root, + filename=filename, md5=md5) + + +@dataset_registry(dataset_type="MNIST", framework="pytorch", dataset_format='') +class PytorchMNIST(MNIST): # pragma: no cover + """The PyTorch datasets for MNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = Image.fromarray(image, mode='L') + if self.transform is not None: + image, label = self.transform((image, label)) + image = np.array(image) + return (image, label) + + +@dataset_registry(dataset_type="MNIST", framework="mxnet", dataset_format='') +class MXNetMNIST(MNIST): # pragma: no cover + """The MXNet datasets for MNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = mx.nd.array(image) + image = image.reshape((image.shape[0], image.shape[1], 1)) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="MNIST", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowMNIST(MNIST): # pragma: no cover + """The Tensorflow datasets for MNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = np.expand_dims(image, -1) + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) + + +@dataset_registry(dataset_type="FashionMNIST", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class FashionMNIST(MNIST): # pragma: no cover + """FashionMNIST database. + + For FashionMNIST: If download is True, it will download dataset to root/FashionMNIST/, + otherwise user should put train-labels-idx1-ubyte.gz, + train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz + and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually. + + Args: + root (str): Root directory of dataset. + train (bool, default=False): If True, creates dataset from train subset, + otherwise from validation subset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + download (bool, default=True): If true, downloads the dataset from the internet + and puts it in root directory. If dataset is already + downloaded, it is not downloaded again. + """ + + resource = [ + ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + file_name, None) + for file_name in [ + 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', + 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' + ] + ] + + classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', + 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + + def read_data(self): + """Read data from a file.""" + import struct + if self.train: + label_path = os.path.join(self.root, 'train-labels-idx1-ubyte.gz') + image_path = os.path.join(self.root, 'train-images-idx3-ubyte.gz') + else: + label_path = os.path.join(self.root, 't10k-labels-idx1-ubyte.gz') + image_path = os.path.join(self.root, 't10k-images-idx3-ubyte.gz') + with gzip.open(label_path, 'rb') as f: + struct.unpack(">II", f.read(8)) + self.targets = np.frombuffer(f.read(), dtype=np.uint8).astype(np.int32) + with gzip.open(image_path, 'rb') as f: + struct.unpack(">IIII", f.read(16)) + data = np.frombuffer(f.read(), dtype=np.uint8) + self.data = data.reshape(len(self.targets), 28, 28) + + +@dataset_registry(dataset_type="FashionMNIST", framework="pytorch", dataset_format='') +class PytorchFashionMNIST(FashionMNIST): # pragma: no cover + """The PyTorch datasets for FashionMNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = Image.fromarray(image, mode='L') + if self.transform is not None: + image, label = self.transform((image, label)) + image = np.array(image) + return (image, label) + + +@dataset_registry(dataset_type="FashionMNIST", framework="mxnet", dataset_format='') +class MXNetFashionMNIST(FashionMNIST): # pragma: no cover + """The MXNet Dataset for FashionMNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = mx.nd.array(image) + image = image.reshape((image.shape[0], image.shape[1], 1)) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="FashionMNIST", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowFashionMNIST(FashionMNIST): # pragma: no cover + """The Tensorflow Dataset for FashionMNIST.""" + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + image, label = self.data[index], int(self.targets[index]) + image = np.expand_dims(image, -1) + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) + + +@dataset_registry(dataset_type="ImageFolder", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class ImageFolder(Dataset): # pragma: no cover + """The base class for ImageFolder. + + Expects the data folder to contain subfolders representing the classes to which + its images belong. + + Please arrange data in this way: + root/class_1/xxx.png + root/class_1/xxy.png + root/class_1/xxz.png + ... + root/class_n/123.png + root/class_n/nsdf3.png + root/class_n/asd932_.png + Please put images of different categories into different folders. + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + """ + + def __init__(self, root, transform=None, filter=None): + """Initialize the attributes of class.""" + self.root = root + assert os.path.exists(self.root), "Datapath doesn't exist!" + + self.transform = transform + self.image_list = [] + files = glob.glob(os.path.join(self.root, '*')) + files.sort() + for idx, file in enumerate(files): + imgs = glob.glob(os.path.join(file, '*')) + imgs.sort() + for img in imgs: + self.image_list.append((img, idx)) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + label = sample[1] + with Image.open(sample[0]) as image: + image = np.array(image) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="ImageFolder", framework="mxnet", dataset_format='') +class MXNetImageFolder(ImageFolder): # pragma: no cover + """The MXNet Dataset for image folder. + + Expects the data folder to contain subfolders representing the classes to which + its images belong. + + Please arrange data in this way: + root/class_1/xxx.png + root/class_1/xxy.png + root/class_1/xxz.png + ... + root/class_n/123.png + root/class_n/nsdf3.png + root/class_n/asd932_.png + Please put images of different categories into different folders. + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + """ + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + label = sample[1] + image = mx.image.imread(sample[0]) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + +@dataset_registry(dataset_type="ImageFolder", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowImageFolder(ImageFolder): # pragma: no cover + """The Tensorflow Dataset for image folder. + + Expects the data folder to contain subfolders representing the classes to which + its images belong. + + Please arrange data in this way: + root/class_1/xxx.png + root/class_1/xxy.png + root/class_1/xxz.png + ... + root/class_n/123.png + root/class_n/nsdf3.png + root/class_n/asd932_.png + Please put images of different categories into different folders. + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according to specific + conditions. + """ + + def __getitem__(self, index): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + label = sample[1] + with Image.open(sample[0]) as image: + if image.mode != 'RGB': + image = image.convert('RGB') + image = np.array(image) + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) + + +@dataset_registry(dataset_type="TFRecordDataset", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowTFRecordDataset(IterableDataset): # pragma: no cover + """The Tensorflow TFRecord Dataset. + + Root is a full path to tfrecord file, which contains the file name. + + Args: root (str): filename of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowTFRecordDataset class.""" + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + from tensorflow.python.platform import gfile + file_names = gfile.Glob(root) + ds = tf.data.Dataset.from_tensor_slices(file_names) + ds = ds.apply(parallel_interleave( + tf.data.TFRecordDataset, cycle_length=len(file_names))) + if transform is not None: + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +@dataset_registry(dataset_type="ImageRecord", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowImageRecord(IterableDataset): # pragma: no cover + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + glob_pattern = os.path.join(root, '*-*-of-*') + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave( + tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +@dataset_registry(dataset_type="VOCRecord", framework="tensorflow, tensorflow_itex", dataset_format='') +class TensorflowVOCRecord(IterableDataset): # pragma: no cover + """The Tensorflow PASCAL VOC 2012 database in tf record format. + + Please arrange data in this way: + root/val-00000-of-00004.tfrecord + root/val-00001-of-00004.tfrecord + ... + root/val-00003-of-00004.tfrecord + The file name needs to follow this pattern: 'val-*-of-*' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowVOCRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + glob_pattern = os.path.join(root, '%s-*' % 'val') + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave( + tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds diff --git a/neural_compressor/data/datasets/dummy_dataset.py b/neural_compressor/data/datasets/dummy_dataset.py new file mode 100644 index 00000000000..8b54296015d --- /dev/null +++ b/neural_compressor/data/datasets/dummy_dataset.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Dummy dataset for dummy data generation on multiple framework backends.""" + +from .dataset import dataset_registry, Dataset +import numpy as np +from neural_compressor.utils.utility import LazyImport +import logging + +mx = LazyImport('mxnet') +torch = LazyImport('torch') + +logger = logging.getLogger("neural_compressor") + +@dataset_registry(dataset_type="dummy", framework="tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, \ + pytorch, pytorch_ipex, pytorch_fx, \ + mxnet", + dataset_format='') +class DummyDataset(Dataset): # pragma: no cover + """Dataset used for dummy data generation. + + This Dataset is to construct a dataset from a specific shape. + The value range is calculated from: low * stand_normal(0, 1) + high. + (TODO) construct dummy data from real dataset or iteration of data. + """ + + def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ + transform=None, filter=None): + """Initialize `DummyDataset` class. + + Args: + shape (list or tuple): Support create multi shape tensors, use list of tuples + for each tuple in the list, will create a such size tensor. + low (list or float, default=-128.): Low out the tensor value range from [0, 1] + to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. + high (list or float, default=127.): High the tensor value by add all tensor element + value high. If list, length of list should be same with shape list. + dtype (list or str, default='float32'): Support multi tensor dtype setting. + If list, length of list should be same with shape list. If str, all tensors will + use same dtype. dtype supports 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool'. + label (bool, default=True): Whether to return 0 as label. + transform (transform object, default=None): Dummy dataset does not need transform. + If transform is not None, it will ignore it. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ + 'int8': np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool,\ + 'string': str} + + np.random.seed(9527) + self.transform = transform + self.label = label + if len(shape)==0: + logger.info("No data in the dummy dataset.") + elif isinstance(shape, list): + # list tensor should same first demension n + n = shape[0][0] + assert all(isinstance(elem, tuple) and elem[0] == n for elem in shape), \ + 'each tensor shape should be tuple and same fisrt demension' + + if isinstance(low, list): + assert len(low) == len(shape) and all(isinstance(elem, float) for elem in low), \ + 'low list should have same length with shape with element data type float' + else: + low = (low * np.ones(len(shape))).astype(float) + + if isinstance(high, list): + assert len(high) == len(shape) and all(isinstance(elem, float) for elem in high), \ + 'high list should have same length with shape with element data type float' + else: + high = (high * np.ones(len(shape))).astype(float) + + if isinstance(dtype, list): + assert len(dtype) == len(shape) and \ + all(elem in dtype_map.keys() for elem in dtype), \ + 'high list should have same length with shape with element data type float' + else: + dtype = [dtype for i in range(0, len(shape))] + + elif isinstance(shape, tuple): + shape = [shape] + if isinstance(low, float): + low = [low] + else: + assert isinstance(low, list) and len(low) == 1 and isinstance(low[0], float), \ + 'low should be float or list of float with length 1' + + if isinstance(high, float): + high = [high] + else: + assert isinstance(high, list) and len(high) == 1 and isinstance(high[0], float), \ + 'high should be float or list of float with length 1' + + if isinstance(dtype, str): + assert dtype in dtype_map.keys(), 'dtype only support {}'.format(dtype_map.keys()) + dtype = [dtype] + else: + assert isinstance(dtype, list) and \ + len(dtype) == 1 and dtype[0] in dtype_map.keys(), \ + 'dtype should be str or list of str in supported dtypes' + + self.dataset = [] + for idx in range(0, len(shape)): + tensor = np.random.uniform(low=low[idx], high=high[idx], size=shape[idx]) + tensor = tensor.astype(dtype_map[dtype[idx]]) + self.dataset.append(tensor) + + if len(self.dataset) == 1: + self.dataset = self.dataset[0] + else: + self.dataset = [elem for elem in zip(*self.dataset)] + + + def __len__(self): + """Return the length of dataset.""" + return len(self.dataset) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + sample = self.dataset[index] + if self.transform is not None: + logger.warning("Dummy dataset does not need transform.") + + if self.label: + return sample, 0 + else: + return sample diff --git a/neural_compressor/data/datasets/dummy_dataset_v2.py b/neural_compressor/data/datasets/dummy_dataset_v2.py new file mode 100644 index 00000000000..46cf3bba73d --- /dev/null +++ b/neural_compressor/data/datasets/dummy_dataset_v2.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Dummy dataset for dummy_v2/sparse_dummy_v2 data generation on multiple framework backends.""" + +import sys +from .dataset import dataset_registry, IterableDataset +import numpy as np +from neural_compressor.utils.utility import LazyImport +from functools import reduce + +mx = LazyImport('mxnet') +torch = LazyImport('torch') + +@dataset_registry(dataset_type="dummy_v2", framework="tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, \ + pytorch, pytorch_ipex, pytorch_fx, mxnet", + dataset_format='') +class DummyDataset(IterableDataset): # pragma: no cover + """Dataset used for dummy_v2 data generation. + + This Dataset is to construct a dataset from a input shape and label shape. + The value range is calculated from: low * stand_normal(0, 1) + high. + """ + + def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ + dtype='float32', transform=None, filter=None): + """Initialize `DummyDataset` class. + + Args: + sample_size (int): Total size of the dummy samples. + input_shape (list or tuple): Create single or multi input tensors, + tuple reperesent the sample shape of the dataset, e.g. an image size should be + represented as (224, 224, 3), list contains multiple tuple and represent multi input tensors. + label_shape (list or tuple): Create single or multi label tensors, + tuple reperesent the label shape of the dataset, e.g. an label size should be + represented as (1, ), list contains multiple tuple and represent multi label tensors. + low (list or float, default=-128.): Low out the tensor value range from [0, 1] + to [0, low] or [low, 0] if low < 0. If float, will implement all tensors with same low value. + high (list or float, default=127.): High the tensor value by add all tensor element value high. + If list, length of list should be same with shape list. + dtype (list or str, default='float32'): Support multi tensor dtype setting. + If list, length of list should be same with shape list. + If str, all tensors will use same dtype. + dtype supports 'float32', 'float16', 'uint8', 'int8','int32', 'int64', 'bool'. + transform (transform object, default=None): dummy_v2 dataset does not need transform. + If transform is not None, it will ignore it. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ + 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':np.bool} + + np.random.seed(9527) + self.transform = transform + self.input_shape = input_shape + self.label_shape = label_shape + self.low = low + self.high = high + self.dtype = dtype + + if label_shape is None: + self.label_dim = 0 + elif isinstance(label_shape, tuple): + self.label_dim = 1 + else: + self.label_dim = len(label_shape) + + self.input_dim = 1 if isinstance(input_shape, tuple) else len(input_shape) + self.total_dim = self.input_dim + self.label_dim + + if isinstance(high, list): + assert len(high) == self.total_dim and \ + all(isinstance(elem, float) for elem in high),\ + 'high value list length should same with label dim + input_dim' + else: + self.high = (high * np.ones(self.total_dim)).astype(np.float) + + if isinstance(low, list): + assert len(low) == self.total_dim and \ + all(isinstance(elem, float) for elem in low), \ + 'low value list length should same with label dim + input_dim' + else: + self.low = (low * np.ones(self.total_dim)).astype(np.float) + + if isinstance(dtype, list): + assert len(dtype) == self.total_dim and \ + all(elem in self.dtype_map.keys() for elem in dtype), \ + 'dtype list length should same with label dim + input_dim' + else: + self.dtype = [self.dtype for i in range(0, self.total_dim)] + + if isinstance(input_shape, tuple): + self.input_shape = [input_shape] + + if isinstance(label_shape, tuple): + self.label_shape = [label_shape] + + def __iter__(self): + """Yield data in iterative order.""" + while True: + input_data = [] + for idx in range(0, self.input_dim): + tensor = np.random.uniform(\ + low=self.low[idx], high=self.high[idx], size=self.input_shape[idx]) + tensor = tensor.astype(self.dtype_map[self.dtype[idx]]) + input_data.append(tensor) + + label = [] + for idx in range(0, self.label_dim): + shift_idx = self.input_dim + idx + tensor = np.random.uniform(low=self.low[shift_idx], + high=self.high[shift_idx], + size=self.label_shape[idx]) + tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) + label.append(tensor) + + if len(input_data) == 1: + input_data = input_data[0] + + if len(label) == 1: + label = label[0] + + if len(label) > 0: + yield input_data, label + else: + yield input_data + + def __len__(self): + """Return the length of dataset.""" + return sys.maxsize + +@dataset_registry(dataset_type="sparse_dummy_v2", framework="tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, \ + pytorch, pytorch_ipex, pytorch_fx, mxnet", + dataset_format='') +class SparseDummyDataset(IterableDataset): # pragma: no cover + """Dataset used for sparse_dummy_v2 data generation. + + This Dataset is to construct a dataset from a input shape and label shape. + The value range is calculated from: low * stand_normal(0, 1) + high. + """ + + def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., high=127., \ + dtype='float32', transform=None, filter=None): + """Initialize `SparseDummyDataset` class. + + Args: + sample_size (int): Total size of the dummy samples. + dense_shape (list or tuple): Create single or multi sparse tensors, tuple reperesent + the sample shape of the dataset, e.g. an image size should be represented as (224, 224, 3), + list contains multiple tuple and represent multi input tensors. + label_shape (list or tuple): Create single or multi label tensors, tuple reperesent + the label shape of the dataset, e.g. an label size should be represented as (1, ), + list contains multiple tuple and represent multi label tensors. + sparse_ratio (float, default=0.5): The ratio of sparsity, supports [0, 1]. + low (list or float, default=-128.): Low out the tensor value range from [0, 1] + to [0, low] or [low, 0] if low < 0. If float, will implement all tensors with same low value. + high (list or float, default=127.): High the tensor value by add all tensor element value high. + If list, length of list should be same with shape list. + dtype (list or str, default='float32'): Support multi tensor dtype setting. If list, + length of list should be same with shape list. If str, all tensors will use same dtype. + dtype supports 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool'. + transform (transform object, default=None): dummy_v2 dataset does not need transform. + If transform is not None, it will ignore it. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ + 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':np.bool} + + np.random.seed(9527) + self.transform = transform + self.dense_shape = dense_shape + self.label_shape = label_shape + self.sparse_ratio = sparse_ratio + self.low = low + self.high = high + self.dtype = dtype + + if isinstance(dense_shape, tuple): + self.dense_shape = [dense_shape] + + if label_shape is None: + self.label_dim = 0 + else: + if isinstance(label_shape, tuple): + self.label_shape = [label_shape] + if len(self.label_shape) == 1 and len(self.label_shape) != len(self.dense_shape): + self.label_shape = len(self.dense_shape) * self.label_shape + assert len(self.label_shape) == len(self.dense_shape), \ + 'length of dense_shape should be euqal to length of label_shape' + self.label_dim = len(self.label_shape) + + self.input_dim = 1 if isinstance(dense_shape, tuple) else len(dense_shape) + self.total_dim = self.input_dim + self.label_dim + + if isinstance(sparse_ratio, list): + assert len(sparse_ratio) == self.input_dim and \ + all(isinstance(elem, float) for elem in sparse_ratio),\ + 'sparse_ratio list length should same with input_dim' + else: + self.sparse_ratio = (sparse_ratio * np.ones(self.input_dim)).astype(np.float) + assert all([0 <= i <= 1 for i in self.sparse_ratio]), 'sparse_ratio should be in [0,1]' + + if isinstance(high, list): + assert len(high) == self.total_dim and \ + all(isinstance(elem, float) for elem in high),\ + 'high value list length should same with label dim + input_dim' + else: + self.high = (high * np.ones(self.total_dim)).astype(np.float) + + if isinstance(low, list): + assert len(low) == self.total_dim and \ + all(isinstance(elem, float) for elem in low), \ + 'low value list length should same with label dim + input_dim' + else: + self.low = (low * np.ones(self.total_dim)).astype(np.float) + + if isinstance(dtype, list): + assert len(dtype) == self.total_dim and \ + all(elem in self.dtype_map.keys() for elem in dtype), \ + 'dtype list length should same with label dim + input_dim' + else: + self.dtype = [self.dtype for i in range(0, self.total_dim)] + + def __iter__(self): + """Yield data in iterative order.""" + while True: + input_data = [] + for idx, shape in enumerate(self.dense_shape): + dim = len(shape) + total = reduce(lambda x, y: x*y, shape) + sparse_num = round(total * (1 - self.sparse_ratio[idx])) + val = np.random.uniform(\ + low=self.low[idx], high=self.high[idx], size=sparse_num) + val = val.astype(self.dtype_map[self.dtype[idx]]) + nums = np.arange(sparse_num) + indices = [] + dim_shape = [reduce(lambda x, y: x*y, shape[i:])/shape[i] \ + for i in range(len(shape))] + for num in nums: + indice = [] + for item in dim_shape: + indice.append(num//item) + num = num - indice[-1] * item if num - indice[-1] * item > 0 else num + indices.append(indice) + + if self.label_dim > 0: + shift_idx = self.input_dim + idx + tensor = np.random.uniform(low=self.low[shift_idx], + high=self.high[shift_idx], + size=self.label_shape[idx]) + tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) + input_data.append([(np.array(indices), val), tensor]) + else: + input_data.append((np.array(indices), val)) + + yield input_data + + def __len__(self): + """Return the length of dataset.""" + return sys.maxsize diff --git a/neural_compressor/data/datasets/imagenet_dataset.py b/neural_compressor/data/datasets/imagenet_dataset.py index 3cf944ad2ef..9d0d7daf2d1 100644 --- a/neural_compressor/data/datasets/imagenet_dataset.py +++ b/neural_compressor/data/datasets/imagenet_dataset.py @@ -29,20 +29,131 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +"""Dataset for ImageNet data generation on multiple framework backends.""" + import os +import re +import numpy as np from PIL import Image from neural_compressor.utils.utility import LazyImport from neural_compressor.utils import logger -from neural_compressor.experimental.data.datasets import dataset_registry, IterableDataset, Dataset +from .dataset import dataset_registry, IterableDataset, Dataset tf = LazyImport('tensorflow') +mx = LazyImport('mxnet') +torch = LazyImport('torch') + +@dataset_registry(dataset_type="ImagenetRaw", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class ImagenetRaw(Dataset): # pragma: no cover + """Configuration for ImageNet raw dataset. + + Please arrange data in this way: + data_path/img1.jpg + data_path/img2.jpg + ... + data_path/imgx.jpg + dataset will read name and label of each image from image_list file, + if user set image_list to None, it will read from data_path/val_map.txt automatically. + """ + + def __init__(self, data_path, image_list, transform=None, filter=None): + """Initialize `ImagenetRaw` class. + + Args: + data_path (str): Root directory of dataset. + image_list (str): Data file, record image_names and their labels. + transform (transform object, default=None): Transform to process input data. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.image_list = [] + self.label_list = [] + self.data_path = data_path + self.transform = transform + not_found = 0 + if image_list is None: + # by default look for val.txt + image_list = os.path.join(data_path, "val.txt") + + with open(image_list, 'r') as f: + for s in f: + image_name, label = re.split(r"\s+", s.strip()) + src = os.path.join(data_path, image_name) + if not os.path.exists(src): + # if the image does not exists ignore it + not_found += 1 + continue + self.image_list.append(src) + self.label_list.append(int(label)) + + if not self.image_list: + raise ValueError("no images in image list found") + if not_found > 0: + print("reduced image list, %d images not found", not_found) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + image_path, label = self.image_list[index], self.label_list[index] + with Image.open(image_path) as image: + image = np.array(image.convert('RGB')) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + + def __len__(self): + """Return the length of dataset.""" + return len(self.image_list) + +@dataset_registry(dataset_type="ImagenetRaw", framework="pytorch", dataset_format='') +class PytorchImagenetRaw(ImagenetRaw): # pragma: no cover + """Dataset for ImageNet data generation on pytorch backend.""" + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + image_path, label = self.image_list[index], self.label_list[index] + with Image.open(image_path) as image: + image = image.convert('RGB') + if self.transform is not None: + image, label = self.transform((image, label)) + image = np.array(image) + return (image, label) + +@dataset_registry(dataset_type="ImagenetRaw", framework="mxnet", dataset_format='') +class MXNetImagenetRaw(ImagenetRaw): # pragma: no cover + """Dataset for ImageNet data generation on mxnet backend.""" + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + image_path, label = self.image_list[index], self.label_list[index] + image = mx.image.imread(image_path) + if self.transform is not None: + image, label = self.transform((image, label)) + return (image, label) + +@dataset_registry(dataset_type="ImagenetRaw", framework="tensorflow, \ + tensorflow_itex", dataset_format='') +class TensorflowImagenetRaw(ImagenetRaw): # pragma: no cover + """Dataset for ImageNet data generation on tensorflow/inteltensorflow/tensorflow_itex backend.""" + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + image_path, label = self.image_list[index], self.label_list[index] + with Image.open(image_path) as image: + image = np.array(image.convert('RGB')) + if self.transform is not None: + image, label = self.transform((image, label)) + if type(image).__name__ == 'Tensor': + with tf.compat.v1.Session() as sess: + image = sess.run(image) + elif type(image).__name__ == 'EagerTensor': + image = image.numpy() + return (image, label) -# BELOW API TO BE DEPRECATED! @dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format='') -class TensorflowImagenetDataset(IterableDataset): +class TensorflowImagenetDataset(IterableDataset): # pragma: no cover """Configuration for Imagenet dataset.""" def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter=None): - + """New a imagenet dataset for tensorflow.""" assert subset in ('validation', 'train'), \ 'only support subset (validation, train)' logger.warning("This api is going to be deprecated, " @@ -55,7 +166,7 @@ def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) from tensorflow.python.data.experimental import parallel_interleave - from neural_compressor.experimental.data.transforms.imagenet_transform import ParseDecodeImagenet + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) ds = ds.apply( parallel_interleave( @@ -72,10 +183,11 @@ def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter @dataset_registry(dataset_type="Imagenet", framework="onnxrt_qlinearops, \ onnxrt_integerops", dataset_format='') -class ONNXRTImagenetDataset(Dataset): +class ONNXRTImagenetDataset(Dataset): # pragma: no cover """Configuration for Imagenet dataset.""" def __init__(self, root, subset='val', num_cores=28, transform=None, filter=None): + """Initialize `ONNXRTImagenetDataset` class.""" self.val_dir = os.path.join(root, subset) assert os.path.exists(self.val_dir), "find no val dir in {}".format(root) + \ "please make sure there are train/val subfolders" @@ -93,9 +205,11 @@ def __init__(self, root, subset='val', num_cores=28, transform=None, filter=None self.image_list.append((img, idx)) def __len__(self): + """Return the number of images.""" return len(self.image_list) def __getitem__(self, index): + """Return the item of dataset according to the given index.""" from PIL import Image sample = self.image_list[index] image = Image.open(sample[0]) diff --git a/neural_compressor/data/datasets/style_transfer_dataset.py b/neural_compressor/data/datasets/style_transfer_dataset.py new file mode 100644 index 00000000000..8f6f6ff332f --- /dev/null +++ b/neural_compressor/data/datasets/style_transfer_dataset.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Dataset used for style transfer task on multiple framework backends.""" + +import os +import numpy as np +import glob +from .dataset import dataset_registry, Dataset + + +@dataset_registry(dataset_type="style_transfer", framework="tensorflow, \ + tensorflow_itex", dataset_format='') +class StyleTransferDataset(Dataset): # pragma: no cover + """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend. + + This Dataset is to construct a dataset from two specific image holders representing + content image folder and style image folder. + """ + + def __init__(self, content_folder, style_folder, crop_ratio=0.1, + resize_shape=(256, 256), image_format='jpg', transform=None, filter=None): + """Initialize `StyleTransferDataset` class. + + Args: + content_folder (str): Root directory of content images. + style_folder (str): Root directory of style images. + crop_ratio (float, default=0.1): Cropped ratio to each side. + resize_shape (tuple, default=(256, 256)): Target size of image. + image_format (str, default='jpg'): Target image format. + transform (transform object, default=None): Transform to process input data. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.transform = transform + self.content_folder = content_folder + self.style_folder = style_folder + self.resize_shape = resize_shape + self.crop_ratio = crop_ratio + self.content_images = glob.glob(os.path.join(content_folder, '*' + image_format)) + self.style_images = glob.glob(os.path.join(style_folder, '*' + image_format)) + self.image_list = [] + for content in self.content_images: + for style in self.style_images: + self.image_list.append((content, style)) + + def __len__(self): + """Return the length of dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + from PIL import Image + content_image, style_image = self.image_list[index] + content_image = Image.open(content_image) + style_image = Image.open(style_image) + width, height = style_image.size + crop_ratio = self.crop_ratio + crop_box = ( + crop_ratio * height, + crop_ratio * width, + (1 - crop_ratio) * height, + (1 - crop_ratio) * width) + content_image = np.asarray(content_image.resize(self.resize_shape)) + style_image = np.asarray(style_image.resize(self.resize_shape)) + if content_image.max() > 1.0: + content_image = content_image / 255. + if style_image.max() > 1.0: + style_image = style_image / 255. + + return (content_image, style_image), 0 diff --git a/neural_compressor/data/filters/__init__.py b/neural_compressor/data/filters/__init__.py new file mode 100644 index 00000000000..6ec13cf416f --- /dev/null +++ b/neural_compressor/data/filters/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Built-in filter.""" + +from .filter import FILTERS, Filter, filter_registry +from os.path import dirname, basename, isfile, join +import glob + +modules = glob.glob(join(dirname(__file__), "*.py")) + +for f in modules: + if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + __import__(basename(f)[:-3], globals(), locals(), level=1) + + +__all__ = ["FILTERS", "Filter", "filter_registry"] diff --git a/neural_compressor/data/filters/coco_filter.py b/neural_compressor/data/filters/coco_filter.py new file mode 100644 index 00000000000..3f9431185ab --- /dev/null +++ b/neural_compressor/data/filters/coco_filter.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Built-in COCO filter.""" + +from neural_compressor.utils.utility import LazyImport +from .filter import Filter, filter_registry +tf = LazyImport('tensorflow') + + +@filter_registry(filter_type="LabelBalanceCOCORecord", framework="tensorflow, tensorflow_itex") +class LabelBalanceCOCORecordFilter(Filter): # pragma: no cover + """The label balance filter for COCO Record.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return tf.math.equal(len(label[0]), self.size) + + +@filter_registry(filter_type="LabelBalanceCOCORaw", framework="tensorflow, \ + tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") +class LabelBalanceCOCORawFilter(Filter): # pragma: no cover + """The label balance filter for COCO raw data.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return len(label) == self.size + diff --git a/neural_compressor/data/filters/filter.py b/neural_compressor/data/filters/filter.py new file mode 100644 index 00000000000..7abda00e054 --- /dev/null +++ b/neural_compressor/data/filters/filter.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The base filter class for all frameworks.""" + +from abc import abstractmethod +from neural_compressor.utils.utility import singleton + + +@singleton +class TensorflowFilters(object): # pragma: no cover + """The base filter class for Tensorflow framework.""" + + def __init__(self): + """Initialize the atrribute of the class.""" + self.filters = {} + self.filters.update(TENSORFLOW_FILTERS) + + +@singleton +class ONNXRTQLFilters(object): # pragma: no cover + """The base filter class for ONNXRT framework QLinear mode.""" + + def __init__(self): + """Initialize the atrribute of the class.""" + self.filters = {} + self.filters.update(ONNXRT_QL_FILTERS) + + +@singleton +class ONNXRTITFilters(object): # pragma: no cover + """The base filter class for ONNXRT framework IT mode.""" + + def __init__(self): + """Initialize the atrribute of the class.""" + self.filters = {} + self.filters.update(ONNXRT_IT_FILTERS) + + +@singleton +class PyTorchFilters(object): # pragma: no cover + """The base filter class for PyTorch framework.""" + + def __init__(self): + """Initialize the atrribute of the class.""" + self.filters = {} + self.filters.update(PYTORCH_FILTERS) + + +@singleton +class MXNetFilters(object): # pragma: no cover + """The base filter class for MXNet framework.""" + + def __init__(self): + """Initialize the atrribute of the class.""" + self.filters = {} + self.filters.update(MXNET_FILTERS) + + +TENSORFLOW_FILTERS = {} +TENSORFLOW_ITEX_FILTERS = {} +ONNXRT_IT_FILTERS = {} +ONNXRT_QL_FILTERS = {} +PYTORCH_FILTERS = {} +MXNET_FILTERS = {} + +framework_filters = {"tensorflow": TensorflowFilters, + "tensorflow_itex": TensorflowFilters, + "pytorch": PyTorchFilters, + "pytorch_ipex": PyTorchFilters, + "pytorch_fx": PyTorchFilters, + "mxnet": MXNetFilters, + "onnxrt_qlinearops": ONNXRTQLFilters, + "onnxrt_qdq": ONNXRTQLFilters, + "onnxrt_qoperator": ONNXRTQLFilters, + "onnxrt_integerops": ONNXRTITFilters, + } + +registry_filters = {"tensorflow": TENSORFLOW_FILTERS, + "tensorflow_itex": TENSORFLOW_ITEX_FILTERS, + "pytorch": PYTORCH_FILTERS, + "pytorch_ipex": PYTORCH_FILTERS, + "pytorch_fx": PYTORCH_FILTERS, + "mxnet": MXNET_FILTERS, + "onnxrt_integerops": ONNXRT_IT_FILTERS, + "onnxrt_qdq": ONNXRT_QL_FILTERS, + "onnxrt_qoperator": ONNXRT_QL_FILTERS, + "onnxrt_qlinearops": ONNXRT_QL_FILTERS} + + +class FILTERS(object): # pragma: no cover + """The filter register for all frameworks. + + Args: + framework (str): frameworks in ["tensorflow", "tensorflow_itex", "mxnet", + "onnxrt_qdq", "pytorch", "pytorch_ipex", + "pytorch_fx", "onnxrt_integerops", + "onnxrt_qlinearops", "onnxrt_qoperator"]. + """ + + def __init__(self, framework): + """Initialize the attribute of class.""" + assert framework in ["tensorflow", "tensorflow_itex", + "mxnet", "onnxrt_qdq", "pytorch", "pytorch_ipex", "pytorch_fx", + "onnxrt_integerops", "onnxrt_qlinearops", "onnxrt_qoperator"], \ + "framework support tensorflow pytorch mxnet onnxrt" + self.filters = framework_filters[framework]().filters + self.framework = framework + + def __getitem__(self, filter_type): + """Magic method. + + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + assert filter_type in self.filters.keys(), "filter support {}".\ + format(self.filters.keys()) + return self.filters[filter_type] + + +def filter_registry(filter_type, framework): # pragma: no cover + """Register all transform subclasses. + + Args: + filter_type (str): fILTER registration name. + framework (str): support 4 framework including 'tensorflow', 'pytorch', 'mxnet', 'onnxrt'. + cls (class): The class of register. + + Returns: + cls: The class of register. + """ + def decorator_transform(cls): + """Decorate a class.""" + for single_framework in [fwk.strip() for fwk in framework.split(',')]: + assert single_framework in [ + "tensorflow", + "tensorflow_itex", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "mxnet", + "onnxrt_integerops", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_qoperator" + ], "The framework support tensorflow mxnet pytorch onnxrt" + if filter_type in registry_filters[single_framework].keys(): + raise ValueError('Cannot have two transforms with the same name') + registry_filters[single_framework][filter_type] = cls + return cls + return decorator_transform + + +class Filter(object): # pragma: no cover + """The base class for transform. + + __call__ method is needed when write user specific transform. + + """ + + @abstractmethod + def __call__(self, *args, **kwargs): + """Execute the filter.""" + raise NotImplementedError diff --git a/neural_compressor/data/transforms/__init__.py b/neural_compressor/data/transforms/__init__.py index 77edda38b30..eb849e9f002 100644 --- a/neural_compressor/data/transforms/__init__.py +++ b/neural_compressor/data/transforms/__init__.py @@ -14,7 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# ============================================================================== +"""Neural Compressor Built-in transforms for multiple framework backends.""" +from .transform import TRANSFORMS, BaseTransform, transform_registry +from .postprocess import Postprocess from os.path import dirname, basename, isfile, join import glob @@ -24,3 +28,5 @@ if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): __import__(basename(f)[:-3], globals(), locals(), level=1) + +__all__ = ["TRANSFORMS", "BaseTransform", "transform_registry", "Postprocess"] diff --git a/neural_compressor/data/transforms/coco_transform.py b/neural_compressor/data/transforms/coco_transform.py index 7bef847eb78..6fdac97e94d 100644 --- a/neural_compressor/data/transforms/coco_transform.py +++ b/neural_compressor/data/transforms/coco_transform.py @@ -36,9 +36,11 @@ # BELOW IS TO BE DEPRECATED! @transform_registry(transform_type="ParseDecodeCoco", \ process="preprocess", framework="tensorflow") -class ParseDecodeCocoTransform(BaseTransform): - +class ParseDecodeCocoTransform(BaseTransform): # pragma: no cover + """Coco decoding will be performed automatically from Neural Compressor v1.4. + """ def __call__(self, sample): + """Convert `ParseDecodeCocoTransform` feature.""" logger.warning("This transform is going to be deprecated, " \ "coco decoding will be performed automatically from Neural Compressor v1.4.") return sample diff --git a/neural_compressor/data/transforms/imagenet_transform.py b/neural_compressor/data/transforms/imagenet_transform.py index 842e068a188..251e1aa667b 100644 --- a/neural_compressor/data/transforms/imagenet_transform.py +++ b/neural_compressor/data/transforms/imagenet_transform.py @@ -29,28 +29,130 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +"""Neural Compressor built-in imagenet transforms.""" import numpy as np -from neural_compressor.utils import logger from neural_compressor.utils.utility import LazyImport -from neural_compressor.experimental.data.transforms import transform_registry, BaseTransform - +from neural_compressor.utils import logger +from .transform import transform_registry, BaseTransform tf = LazyImport('tensorflow') cv2 = LazyImport('cv2') -# BELOW IS TO BE DEPRECATED! +@transform_registry(transform_type="QuantizedInput", \ + process="preprocess", framework="tensorflow, tensorflow_itex") +class QuantizedInput(BaseTransform): # pragma: no cover + """Convert the dtype of input to quantize it. + + Args: + dtype(str): desired image dtype, support 'uint8', 'int8' + scale(float, default=None):scaling ratio of each point in image + + Returns: + tuple of processed image and label + """ + + def __init__(self, dtype, scale=None): + """Initialize `QuantizedInput` class.""" + self.dtype_map = {'uint8': tf.uint8, 'int8': tf.int8} + assert dtype in self.dtype_map.keys(), \ + 'only support cast dtype {}'.format(self.dtype_map.keys()) + self.dtype = dtype + self.scale = scale + + def __call__(self, sample): + """Convert the dtype of input.""" + # scale is not know when tuning, in this case this transform + # do nothing, it's only used when scale is set + if self.scale == None: + return sample + image, label = sample + image = image * self.scale + if self.dtype == 'uint8': + image = image + 128 + image = tf.dtypes.cast(image, dtype=self.dtype_map[self.dtype]) + return image, label + +@transform_registry(transform_type="LabelShift", \ + process="postprocess", framework="pytorch, tensorflow, tensorflow_itex,\ + onnxrt_qlinearops, onnxrt_integerops") +class LabelShift(BaseTransform): # pragma: no cover + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + +class ParseDecodeImagenet(): # pragma: no cover + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1)} + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + image = features['image/encoded'] + image = tf.image.decode_jpeg( + image, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + return (image, label) + @transform_registry(transform_type="ParseDecodeImagenet", \ process="preprocess", framework="tensorflow") -class ParseDecodeImagenetTransform(BaseTransform): +class ParseDecodeImagenetTransform(BaseTransform): # pragma: no cover + """Imagenet decoding will be performed automatically from Neural Compressor v1.4. + + Returns: + sample + """ def __call__(self, sample): + """Convert `ParseDecodeImagenetTransform` feature.""" logger.warning("This transform is going to be deprecated, " \ "imagenet decoding will be performed automatically from Neural Compressor v1.4.") return sample @transform_registry(transform_type="ResizeCropImagenet", \ process="preprocess", framework="tensorflow") -class TensorflowResizeCropImagenetTransform(BaseTransform): +class TensorflowResizeCropImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: @@ -70,7 +172,7 @@ def __init__(self, height, width, random_crop=False, resize_side=256, \ resize_method='bilinear', random_flip_left_right=False, \ mean_value=[0.0,0.0,0.0], scale=1.0, \ data_format='channels_last', subpixels='RGB'): - + """Initialize `TensorflowResizeCropImagenetTransform` class.""" self.height = height self.width = width self.mean_value = mean_value @@ -84,6 +186,7 @@ def __init__(self, height, width, random_crop=False, resize_side=256, \ # sample is (images, labels) def __call__(self, sample): + """Convert `TensorflowResizeCropImagenetTransform` feature.""" image, label = sample shape = tf.shape(input=image) @@ -99,15 +202,6 @@ def __call__(self, sample): new_height = tf.cast(tf.math.rint(height*scale), dtype=tf.int32) new_width = tf.cast(tf.math.rint(width*scale), dtype=tf.int32) - # image = tf.cond(pred=tf.greater(shape[0], shape[1]), \ - # false_fn=lambda: tf.image.resize(image, \ - # tf.convert_to_tensor(value=[self.resize_side*shape[0]/shape[1], \ - # self.resize_side], dtype=tf.int32)), - # true_fn=lambda: tf.image.resize(image, \ - # tf.convert_to_tensor(value=[self.resize_side, \ - # self.resize_side * shape[1] / shape[0]], dtype=tf.int32)), - # ) - if self.subpixels=='BGR' and self.data_format=='channels_first': # 'RGB'->'BGR' image = tf.cond(tf.equal(tf.rank(image), 3), @@ -119,8 +213,7 @@ def __call__(self, sample): image = tf.expand_dims(image, 0) image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) - image = tf.squeeze(image) - + image = tf.squeeze(image) shape = tf.shape(input=image) if self.random_crop: y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height +1), @@ -141,7 +234,7 @@ def __call__(self, sample): @transform_registry(transform_type="BilinearImagenet", \ process="preprocess", framework="tensorflow") -class BilinearImagenetTransform(BaseTransform): +class BilinearImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: @@ -157,7 +250,7 @@ class BilinearImagenetTransform(BaseTransform): def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0,0.0,0.0], scale=1.0): - + """Initialize `BilinearImagenetTransform` class.""" self.height = height self.width = width self.mean_value = mean_value @@ -166,6 +259,7 @@ def __init__(self, height, width, central_fraction=0.875, # sample is (images, labels) def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" image, label = sample if image.dtype is not tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) @@ -184,12 +278,11 @@ def __call__(self, sample): image = tf.multiply(image, 2.0) means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) image = (image - means) * self.scale - return (image, label) @transform_registry(transform_type="BilinearImagenet", process="preprocess", \ framework="onnxrt_qlinearops, onnxrt_integerops") -class OnnxBilinearImagenetTransform(BaseTransform): +class OnnxBilinearImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: @@ -205,6 +298,7 @@ class OnnxBilinearImagenetTransform(BaseTransform): def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0,0.0,0.0], scale=1.0): + """Initialize `OnnxBilinearImagenetTransform` class.""" self.height = height self.width = width self.mean_value = mean_value @@ -212,6 +306,7 @@ def __init__(self, height, width, central_fraction=0.875, self.central_fraction = central_fraction def __call__(self, sample): + """Convert `OnnxBilinearImagenetTransform` feature.""" image, label = sample if isinstance(image, np.ndarray): image = image.astype('float32') / 255. @@ -235,12 +330,11 @@ def __call__(self, sample): means = np.broadcast_to(self.mean_value, image.shape) image = (image - means) * self.scale image = image.astype(np.float32) - return (image, label) @transform_registry(transform_type="ResizeCropImagenet", process="preprocess", \ framework="onnxrt_qlinearops, onnxrt_integerops") -class ONNXResizeCropImagenetTransform(BaseTransform): +class ONNXResizeCropImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: @@ -257,7 +351,7 @@ class ONNXResizeCropImagenetTransform(BaseTransform): def __init__(self, height, width, random_crop=False, resize_side=256, \ mean_value=[0.0,0.0,0.0], std_value=[0.229, 0.224, 0.225], \ resize_method='bilinear', data_format='channels_last', subpixels='RGB'): - + """Initialize `ONNXResizeCropImagenetTransform` class.""" self.height = height self.width = width self.mean_value = mean_value @@ -270,6 +364,7 @@ def __init__(self, height, width, random_crop=False, resize_side=256, \ # sample is (images, labels) def __call__(self, sample): + """Convert `ONNXResizeCropImagenetTransform` feature.""" # TODO Support optional resize_method, data_format, subpixels for ONNX image, label = sample height, width = image.shape[0], image.shape[1] @@ -295,14 +390,22 @@ def __call__(self, sample): @transform_registry(transform_type="ResizeWithAspectRatio", process="preprocess", \ framework="onnxrt_qlinearops, onnxrt_integerops") -class ResizeWithAspectRatio(BaseTransform): +class ResizeWithAspectRatio(BaseTransform): # pragma: no cover + """Resize the image with aspect ratio. + + Returns: + image and label + """ + def __init__(self, height, width, scale=87.5, inter_pol=cv2.INTER_AREA): + """Initialize `ResizeWithAspectRatio` class.""" self.height = height self.width = width self.scale = scale self.inter_pol = inter_pol def __call__(self, sample): + """Convert `ResizeWithAspectRatio` feature.""" (img, label) = sample assert len(img.shape) == 3 height, width, _ = img.shape diff --git a/neural_compressor/data/transforms/postprocess.py b/neural_compressor/data/transforms/postprocess.py new file mode 100644 index 00000000000..605417a73ab --- /dev/null +++ b/neural_compressor/data/transforms/postprocess.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common Postprocess.""" + +class Postprocess(object): +# class Transform(object): + """Just collect the infos to construct a Postprocess.""" + + def __init__(self, postprocess_cls, name='user_postprocess', **kwargs): + """Initialize `Postprocess` class.""" + self.postprocess_cls = postprocess_cls + self.name = name + self.kwargs = kwargs diff --git a/neural_compressor/data/transforms/tokenization.py b/neural_compressor/data/transforms/tokenization.py new file mode 100644 index 00000000000..06dc3a86fe4 --- /dev/null +++ b/neural_compressor/data/transforms/tokenization.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tokenization helper classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from neural_compressor.utils.utility import LazyImport +import collections +import re +import unicodedata +import six +tf = LazyImport('tensorflow') + +def convert_to_unicode(text): # pragma: no cover + """Convert `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): # pylint: disable=undefined-variable + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + +def load_vocab(vocab_file): + """Load a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with tf.io.gfile.GFile(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + +def convert_by_vocab(vocab, items): + """Convert a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + +def whitespace_tokenize(text): + """Run basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Run end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + """Construct a FullTokenizer. + + Args: + vocab_file: vocab file. + do_lower_case: Whether to lower case the input. + """ + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + """Tokenize text.""" + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Convert tokens to ids.""" + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + """Convert ids to tokens.""" + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Run basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Construct a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strip accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Split punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Add whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): # pragma: no cover + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Check whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Perform invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Run WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + """Construct a WordpieceTokenizer. + + Args: + vocab: the given vocabulary. + unk_token: unknown token. + max_input_chars_per_word: max input chars number in any word. + """ + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenize a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: # pragma: no cover + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + +def _is_whitespace(char): + """Check whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": # pragma: no cover + return True + return False + +def _is_control(char): # pragma: no cover + """Check whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat in ("Cc", "Cf"): + return True + return False + +def _is_punctuation(char): # pragma: no cover + """Check whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/neural_compressor/data/transforms/transform.py b/neural_compressor/data/transforms/transform.py new file mode 100644 index 00000000000..3c645057f23 --- /dev/null +++ b/neural_compressor/data/transforms/transform.py @@ -0,0 +1,2727 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Neural Compressor built-in Transforms on multiple framework backends.""" + +import numpy as np +import collections +from abc import abstractmethod +from neural_compressor.utils.utility import LazyImport, singleton +from neural_compressor.utils import logger + +torchvision = LazyImport('torchvision') +torch = LazyImport('torch') +tf = LazyImport('tensorflow') +mx = LazyImport('mxnet') +cv2 = LazyImport('cv2') + +class Transforms(object): + """INC supports built-in preprocessing, postprocessing and general methods on different framework backends. + + Transforms base class provides the abstract methods. + Users can also register their own Transforms classes by inheriting this base class. + """ + + def __init__(self, process, concat_general=True): + """Initialize `Transforms` class. + + Args: + process (str): processing type, the value can be preprocess, postprocess or general + concat_general (Boolean): users can use general transform in both preprocess + or postprocess if set True + """ + transform_map = {"preprocess": self._get_preprocess, + "postprocess": self._get_postprocess, + "general": self._get_general, } + self.transforms = transform_map[process]() + if concat_general: + self.transforms.update(transform_map['general']()) + + @abstractmethod + def _get_preprocess(self): + """Abstract method to get preprocessing method.""" + raise NotImplementedError + + @abstractmethod + def _get_postprocess(self): + """Abstract method to get postprocess method.""" + raise NotImplementedError + + @abstractmethod + def _get_general(self): + """Abstract method to get general method.""" + raise NotImplementedError + + +class TensorflowTransforms(Transforms): + """Tensorflow Transforms subclass.""" + + def _get_preprocess(self): + """Tensorflow get preprocess method. + + Returns: + preprocess: a dict including all the registered preprocess methods + """ + preprocess = { + "DecodeImage": TensorflowWrapFunction(tf.io.decode_jpeg), + "EncodeJpeg": TensorflowWrapFunction(tf.io.encode_jpeg), + } + # update the registry transforms + preprocess.update(TENSORFLOW_TRANSFORMS["preprocess"]) + return preprocess + + def _get_postprocess(self): + """Tensorflow get postprocess method. + + Returns: + postprocess: a dict including all the registered postprocess methods + """ + postprocess = {} + postprocess.update(TENSORFLOW_TRANSFORMS["postprocess"]) + return postprocess + + def _get_general(self): + """Tensorflow get general method. + + Returns: + general: a dict including all the registered general methods + """ + general = {} + general.update(TENSORFLOW_TRANSFORMS["general"]) + return general + + +class MXNetTransforms(Transforms): + """Mxnet Transforms subclass.""" + + def _get_preprocess(self): + """Mxnet get preprocess method. + + Returns: + preprocess: a dict including all the registered preprocess methods + """ + preprocess = { + 'ToTensor': PytorchMxnetWrapFunction( + mx.gluon.data.vision.transforms.ToTensor), + 'CenterCrop': PytorchMxnetWrapFunction( + mx.gluon.data.vision.transforms.CenterCrop), + 'RandomHorizontalFlip': PytorchMxnetWrapFunction( + mx.gluon.data.vision.transforms.RandomFlipLeftRight), + 'RandomVerticalFlip': PytorchMxnetWrapFunction( + mx.gluon.data.vision.transforms.RandomFlipTopBottom), + } + preprocess.update(MXNET_TRANSFORMS["preprocess"]) + return preprocess + + def _get_postprocess(self): + """Mxnet get postprocess method. + + Returns: + postprocess: a dict including all the registered postprocess methods + """ + postprocess = {} + postprocess.update(MXNET_TRANSFORMS["postprocess"]) + return postprocess + + def _get_general(self): + """Mxnet get general method. + + Returns: + general: a dict including all the registered general methods + """ + general = { + 'Compose': mx.gluon.data.vision.transforms.Compose, + 'Cast': PytorchMxnetWrapFunction( + mx.gluon.data.vision.transforms.Cast), + } + general.update(MXNET_TRANSFORMS["general"]) + return general + + +class PyTorchTransforms(Transforms): + """Pytorch Transforms subclass.""" + + def _get_preprocess(self): + """Pytorch get preprocessing method. + + Returns: + preprocess: a dict including all the registered preprocess methods + """ + preprocess = { + "ToTensor": PytorchMxnetWrapFunction( + torchvision.transforms.ToTensor), + "ToPILImage": PytorchMxnetWrapFunction( + torchvision.transforms.ToPILImage), + "CenterCrop": PytorchMxnetWrapFunction( + torchvision.transforms.CenterCrop), + "RandomCrop": PytorchMxnetWrapFunction( + torchvision.transforms.RandomCrop), + "RandomHorizontalFlip": PytorchMxnetWrapFunction( + torchvision.transforms.RandomHorizontalFlip), + "RandomVerticalFlip": PytorchMxnetWrapFunction( + torchvision.transforms.RandomVerticalFlip), + "Pad": PytorchMxnetWrapFunction( + torchvision.transforms.Pad), + "ColorJitter": PytorchMxnetWrapFunction( + torchvision.transforms.ColorJitter), + } + preprocess.update(PYTORCH_TRANSFORMS["preprocess"]) + return preprocess + + def _get_postprocess(self): + """Pytorch get postprocess method. + + Returns: + postprocess: a dict including all the registered postprocess methods + """ + postprocess = {} + postprocess.update(PYTORCH_TRANSFORMS["postprocess"]) + return postprocess + + def _get_general(self): + """Pytorch get general method. + + Returns: + general: a dict including all the registered general methods + """ + general = { + "Compose": torchvision.transforms.Compose, + } + general.update(PYTORCH_TRANSFORMS["general"]) + return general + +class ONNXRTQLTransforms(Transforms): + """Onnxrt_qlinearops Transforms subclass.""" + + def _get_preprocess(self): + """Onnxrt_qlinearops get preprocessing method. + + Returns: + preprocess: a dict including all the registered preprocess methods + """ + preprocess = {} + preprocess.update(ONNXRT_QL_TRANSFORMS["preprocess"]) + return preprocess + + def _get_postprocess(self): + """Onnxrt_qlinearops get postprocess method. + + Returns: + postprocess: a dict including all the registered postprocess methods + """ + postprocess = {} + postprocess.update(ONNXRT_QL_TRANSFORMS["postprocess"]) + return postprocess + + def _get_general(self): + """Onnxrt_qlinearops get general method. + + Returns: + general: a dict including all the registered general methods + """ + general = {} + general.update(ONNXRT_QL_TRANSFORMS["general"]) + return general + +class ONNXRTITTransforms(Transforms): + """Onnxrt_integerops Transforms subclass.""" + + def _get_preprocess(self): + """Onnxrt_integerops get preprocessing method. + + Returns: + preprocess: a dict including all the registered preprocess methods + """ + preprocess = {} + preprocess.update(ONNXRT_IT_TRANSFORMS["preprocess"]) + return preprocess + + def _get_postprocess(self): + """Onnxrt_integerops get postprocess method. + + Returns: + postprocess: a dict including all the registered postprocess methods + """ + postprocess = {} + postprocess.update(ONNXRT_IT_TRANSFORMS["postprocess"]) + return postprocess + + def _get_general(self): + """Onnxrt_integerops get general method. + + Returns: + general: a dict including all the registered general methods + """ + general = {} + general.update(ONNXRT_IT_TRANSFORMS["general"]) + return general + + +framework_transforms = {"tensorflow": TensorflowTransforms, + "tensorflow_itex": TensorflowTransforms, + "mxnet": MXNetTransforms, + "pytorch": PyTorchTransforms, + "pytorch_ipex": PyTorchTransforms, + "pytorch_fx": PyTorchTransforms, + "onnxrt_qlinearops": ONNXRTQLTransforms, + "onnxrt_integerops": ONNXRTITTransforms, + "onnxrt_qoperator": ONNXRTQLTransforms, + "onnxrt_qdq": ONNXRTQLTransforms} + +# transform registry will register transforms into these dicts +TENSORFLOW_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} +TENSORFLOW_ITEX_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} +MXNET_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} +PYTORCH_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} +ONNXRT_QL_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} +ONNXRT_IT_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} + +registry_transforms = {"tensorflow": TENSORFLOW_TRANSFORMS, + "tensorflow_itex": TENSORFLOW_ITEX_TRANSFORMS, + "mxnet": MXNET_TRANSFORMS, + "pytorch": PYTORCH_TRANSFORMS, + "pytorch_ipex": PYTORCH_TRANSFORMS, + "pytorch_fx": PYTORCH_TRANSFORMS, + "onnxrt_qlinearops": ONNXRT_QL_TRANSFORMS, + "onnxrt_qdq": ONNXRT_QL_TRANSFORMS, + "onnxrt_qoperator": ONNXRT_QL_TRANSFORMS, + "onnxrt_integerops": ONNXRT_IT_TRANSFORMS, + } + +class TRANSFORMS(object): + """Transforms collection class. + + Provide register method to register new Transforms + and provide __getitem__ method to get Transforms according to Transforms type. + """ + + def __init__(self, framework, process): + """Initialize `TRANSFORMS` class. + + Args: + framework (str): different framework type like tensorflow, pytorch and so on + process (str): process type, the value can be preprocess, postprocess or general + """ + assert framework in ("tensorflow", "tensorflow_itex", "onnxrt_qoperator", \ + "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", \ + "onnxrt_qlinearops", "onnxrt_integerops", "mxnet"), \ + "framework support tensorflow pytorch mxnet onnxrt" + assert process in ("preprocess", "postprocess", + "general"), "process support preprocess postprocess, general" + self.transforms = framework_transforms[framework](process).transforms + self.framework = framework + self.process = process + + def __getitem__(self, transform_type): + """Get Transform according to Transforms type. + + Args: + transform_type (str): the value can be preprocess, postprocess or general + + Returns: + Transforms: the registered Transforms + """ + assert transform_type in self.transforms.keys(), "transform support {}".\ + format(self.transforms.keys()) + return self.transforms[transform_type] + + def register(self, name, transform_cls): + """Register new Transform according to Transforms type. + + Args: + name (str): process name + transform_cls (class): process function wrapper class + """ + assert name not in registry_transforms[self.framework][self.process].keys(), \ + 'register transform name already exists.' + registry_transforms[self.framework][self.process].update({name: transform_cls}) + + +def transform_registry(transform_type, process, framework): + """Class decorator used to register all transform subclasses. + + Args: + transform_type (str): Transform registration name + process (str): support 3 process including 'preprocess', 'postprocess', 'general' + framework (str): support 4 framework including 'tensorflow', 'pytorch', 'mxnet', 'onnxrt' + cls (class): The class of register. + + Returns: + cls: The class of register. + """ + def decorator_transform(cls): + for single_framework in [fwk.strip() for fwk in framework.split(',')]: + assert single_framework in [ + "tensorflow", + "tensorflow_itex", + "mxnet", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qlinearops", + "onnxrt_qdq", + "onnxrt_integerops", + "onnxrt_qoperator", + ], "The framework support tensorflow mxnet pytorch onnxrt" + if transform_type in registry_transforms[single_framework][process].keys(): + raise ValueError('Cannot have two transforms with the same name') + registry_transforms[single_framework][process][transform_type] = cls + return cls + return decorator_transform + + +class BaseTransform(object): + """The base class for transform.""" + + @abstractmethod + def __call__(self, *args, **kwargs): + """__call__ method is needed when write user specific transform.""" + raise NotImplementedError + + +class TensorflowWrapFunction(object): + """Tensorflow wrapper function class.""" + + def __init__(self, transform_func): + """Initialize `TensorflowWrapFunction` class. + + Args: + transform_func (function): tensorflow tranform function + """ + self.transform_func = transform_func + + def __call__(self, **kwargs): + """__call__ method. + + Returns: + TensorflowTransform class + """ + return TensorflowTransform(self.transform_func, **kwargs) + +class TensorflowTransform(BaseTransform): + """Tensorflow transform class, the subclass of BaseTransform.""" + + def __init__(self, transform_func, **kwargs): + """Initialize `TensorflowTransform` class. + + Args: + transform_func (function): tensorflow tranform function + """ + self.kwargs = kwargs + self.transform_func = transform_func + + def __call__(self, sample): + """__call__ method. + + Returns: + a tuple of image and lable which get from tensorflow tranform processing + """ + image, label = sample + image = self.transform_func(image, **self.kwargs) + return (image, label) + +class PytorchMxnetWrapFunction(object): + """Pytorch and MXNet wrapper function class.""" + + def __init__(self, transform_func): + """Initialize `PytorchMxnetWrapFunction` class. + + Args: + transform_func (function): pytorch or mxnet tranform function + """ + self.transform_func = transform_func + + def __call__(self, **args): + """__call__ method. + + Returns: + PytorchMxnetTransform class + """ + return PytorchMxnetTransform(self.transform_func(**args)) + +class PytorchMxnetTransform(BaseTransform): + """Pytorch and Mxnet transform class, the subclass of BaseTransform.""" + + def __init__(self, transform_func): + """Initialize `PytorchMxnetTransform` class. + + Args: + transform_func (function): pytorch or mxnet tranform function + """ + self.transform_func = transform_func + + def __call__(self, sample): + """__call__ method. + + Returns: + a tuple of image and lable which get from pytorch or mxnet tranform processing + """ + image, label = sample + image = self.transform_func(image) + return (image, label) + +interpolation_map = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, +} + +interpolation_pytorch_map = { + 'nearest': 0, + 'bilinear': 2, + 'bicubic': 3, +} + +interpolation_mxnet_map = { + 'nearest': 0, + 'bilinear': 1, + 'bicubic': 2, +} + +def get_torchvision_map(interpolation): + """Get torchvision interpolation map.""" + try: + from torchvision.transforms.functional import InterpolationMode + interpolation_torchvision_map = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + } + return interpolation_torchvision_map[interpolation] + except: # pragma: no cover + return interpolation + +@transform_registry(transform_type="Compose", process="general", \ + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex") +class ComposeTransform(BaseTransform): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ + framework="pytorch") +class CropToBoundingBox(BaseTransform): + """Crops an image to a specified bounding box. + + Args: + offset_height (int): Vertical coordinate of the top-left corner of the result in the input + offset_width (int): Horizontal coordinate of the top-left corner of the result in the input + target_height (int): Height of the result + target_width (int): Width of the result + + Returns: + tuple of processed image and label + """ + + def __init__(self, offset_height, offset_width, target_height, target_width): + """Initialize `CropToBoundingBox` class.""" + self.offset_height = offset_height + self.offset_width = offset_width + self.target_height = target_height + self.target_width = target_width + + def __call__(self, sample): + """Call torchvision.transforms.functional.crop.""" + image, label = sample + image = torchvision.transforms.functional.crop( + image, + self.offset_height, + self.offset_width, + self.target_height, + self.target_width) + return (image, label) + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ + framework="mxnet") +class MXNetCropToBoundingBox(CropToBoundingBox): + """Crops an image to a specified bounding box. + + Args: + offset_height (int): Vertical coordinate of the top-left corner of the result in the input + offset_width (int): Horizontal coordinate of the top-left corner of the result in the input + target_height (int): Height of the result + target_width (int): Width of the result + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Call mx.image.fixed_crop.""" + image, label = sample + image = mx.image.fixed_crop( + image, + self.offset_height, + self.offset_width, + self.target_height, + self.target_width) + return (image, label) + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class ONNXRTCropToBoundingBox(CropToBoundingBox): + """Crops an image to a specified bounding box. + + Args: + offset_height (int): Vertical coordinate of the top-left corner of the result in the input + offset_width (int): Horizontal coordinate of the top-left corner of the result in the input + target_height (int): Height of the result + target_width (int): Width of the result + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Crop the image in sample.""" + image, label = sample + image = image[self.offset_height : self.offset_height+self.target_height, + self.offset_width : self.offset_width+self.target_width, :] + return (image, label) + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class TensorflowCropToBoundingBox(CropToBoundingBox): + """Crops an image to a specified bounding box. + + Args: + offset_height (int): Vertical coordinate of the top-left corner of the result in the input + offset_width (int): Horizontal coordinate of the top-left corner of the result in the input + target_height (int): Height of the result + target_width (int): Width of the result + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Crop the image in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.crop_to_bounding_box(image, self.offset_height, + self.offset_width, self.target_height, self.target_width) + else: + image = image[self.offset_height : self.offset_height+self.target_height, + self.offset_width : self.offset_width+self.target_width, :] + return (image, label) + +@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops, pytorch, mxnet") +class ResizeWithRatio(BaseTransform): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array. + + Args: + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim + + Returns: + tuple of processed image and label + """ + + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `ResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value + + def __call__(self, sample): + """Resize the image with ratio in sample.""" + image, label = sample + height, width = image.shape[:2] + scale = 1 + if self.min_dim: + scale = max(1, self.min_dim / min(height, width)) + if self.max_dim: + image_max = max(height, width) + if round(image_max * scale) > self.max_dim: + scale = self.max_dim / image_max + if scale != 1: + image = cv2.resize(image, (round(height * scale), round(width * scale))) + + bbox, str_label, int_label, image_id = label + + if self.padding: + h, w = image.shape[:2] + pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], + [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], + [0, 0]] + if not isinstance(bbox, np.ndarray): + bbox = np.array(bbox) + resized_box = bbox * [height, width, height, width] * scale + moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ + (self.max_dim-h)//2, (self.max_dim-w)//2]) + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = np.pad(image, pad_param, mode='constant', constant_values=self.constant_value) + return image, (bbox, str_label, int_label, image_id) + +@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class TensorflowResizeWithRatio(BaseTransform): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array or tf.Tensor. + + Args: + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim + + Returns: + tuple of processed image and label + """ + + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `TensorflowResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value + + def __call__(self, sample): + """Resize the image with ratio in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + shape = tf.shape(input=image) + height = tf.cast(shape[0], dtype=tf.float32) + width = tf.cast(shape[1], dtype=tf.float32) + scale = 1 + if self.min_dim: + scale = tf.maximum(1., tf.cast(self.min_dim / tf.math.minimum(height, width),\ + dtype=tf.float32)) + if self.max_dim: + image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32) + scale = tf.cond(pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), \ + true_fn=lambda: self.max_dim / image_max, + false_fn=lambda: scale) + image = tf.image.resize(image, (tf.math.round(height * scale), \ + tf.math.round(width * scale))) + bbox, str_label, int_label, image_id = label + + if self.padding: + shape = tf.shape(input=image) + h = tf.cast(shape[0], dtype=tf.float32) + w = tf.cast(shape[1], dtype=tf.float32) + pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], + [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], + [0, 0]] + resized_box = bbox * [height, width, height, width] * scale + moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ + (self.max_dim-h)//2, (self.max_dim-w)//2]) + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = tf.pad(image, pad_param, constant_values=self.constant_value) + else: + transform = ResizeWithRatio(self.min_dim, self.max_dim, self.padding) + image, (bbox, str_label, int_label, image_id) = transform(sample) + return image, (bbox, str_label, int_label, image_id) + +@transform_registry(transform_type="Transpose", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class Transpose(BaseTransform): + """Transpose image according to perm. + + Args: + perm (list): A permutation of the dimensions of input image + + Returns: + tuple of processed image and label + """ + + def __init__(self, perm): + """Initialize `Transpose` class.""" + self.perm = perm + + def __call__(self, sample): + """Transpose the image according to perm in sample.""" + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = np.transpose(image, axes=self.perm) + return (image, label) + +@transform_registry(transform_type="Transpose", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class TensorflowTranspose(Transpose): + """Transpose image according to perm. + + Args: + perm (list): A permutation of the dimensions of input image + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Transpose the image according to perm in sample.""" + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + if isinstance(image, tf.Tensor): + image = tf.transpose(image, perm=self.perm) + else: + image = np.transpose(image, axes=self.perm) + return (image, label) + +@transform_registry(transform_type="Transpose", process="preprocess", framework="mxnet") +class MXNetTranspose(Transpose): + """Transpose image according to perm. + + Args: + perm (list): A permutation of the dimensions of input image + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Transpose the image according to perm in sample.""" + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = mx.ndarray.transpose(image, self.perm) + return (image, label) + +@transform_registry(transform_type="Transpose", process="preprocess", framework="pytorch") +class PyTorchTranspose(Transpose): + """Transpose image according to perm. + + Args: + perm (list): A permutation of the dimensions of input image + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Transpose the image according to perm in sample.""" + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = image.permute(self.perm) + return (image, label) + +@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class RandomVerticalFlip(BaseTransform): + """Vertically flip the given image randomly. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Vertically flip the image in sample.""" + image, label = sample + if np.random.rand(1)[0] > 0.5: + image = np.flipud(image) + return (image, label) + +@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class TensorflowRandomVerticalFlip(BaseTransform): + """Vertically flip the given image randomly. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Vertically flip the image in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.random_flip_up_down(image) + else: + if np.random.rand(1)[0] > 0.5: + image = np.flipud(image) + return (image, label) + +@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class RandomHorizontalFlip(BaseTransform): + """Horizontally flip the given image randomly. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Horizontally flip the image in sample.""" + image, label = sample + if np.random.rand(1)[0] > 0.5: + image = np.fliplr(image) + return (image, label) + +@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class TensorflowRandomHorizontalFlip(BaseTransform): + """Horizontally flip the given image randomly. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Horizontally flip the image in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.random_flip_left_right(image) + else: + if np.random.rand(1)[0] > 0.5: + image = np.fliplr(image) + return (image, label) + +@transform_registry(transform_type="ToArray", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex, pytorch, mxnet") +class ToArray(BaseTransform): + """Convert PIL Image or NDArray to numpy array. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Convert image in sample to numpy array.""" + from PIL import Image + image, label = sample + if isinstance(image, Image.Image): + image = np.array(image) + elif isinstance(image, mx.ndarray.NDArray): # pylint: disable=no-member + image = image.asnumpy() + else: + raise ValueError("Unknown image type!") + return (image, label) + +np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, + 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, + 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, + 'float16': np.float16, 'float64': np.float64, 'bool': bool, + 'string': str, 'complex128': np.complex128, 'int16': np.int16} + +@transform_registry(transform_type="Cast", process="general", \ + framework="tensorflow, tensorflow_itex") +class CastTFTransform(BaseTransform): + """Convert image to given dtype. + + Args: + dtype (str, default='float32'): A dtype to convert image to + + Returns: + tuple of processed image and label + """ + + def __init__(self, dtype='float32'): + """Initialize `CastTFTransform` class.""" + self.tf_dtype_map = {'int16': tf.int16, 'uint8': tf.uint8, 'uint16': tf.uint16, + 'uint32':tf.uint32, 'uint64': tf.uint64, 'complex64': tf.complex64, + 'int32': tf.int32, 'int64':tf.int64, 'float32': tf.float32, + 'float16': tf.float16, 'float64':tf.float64, 'bool': tf.bool, + 'string': tf.string, 'int8': tf.int8, 'complex128': tf.complex128} + + assert dtype in self.tf_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + """Convert image in sample to given dtype.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.convert_image_dtype(image, dtype=self.tf_dtype_map[self.dtype]) + else: + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +@transform_registry(transform_type="Cast", process="general", + framework="onnxrt_qlinearops, onnxrt_integerops") +class CastONNXTransform(BaseTransform): + """Convert image to given dtype. + + Args: + dtype (str, default='float32'): A dtype to convert image to + + Returns: + tuple of processed image and label + """ + + def __init__(self, dtype='float32'): + """Initialize `CastONNXTransform` class.""" + assert dtype in np_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + """Convert image in sample to given dtype.""" + image, label = sample + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +@transform_registry(transform_type="Cast", process="general", framework="pytorch") +class CastPyTorchTransform(BaseTransform): + """Convert image to given dtype. + + Args: + dtype (str, default='float32'): A dtype to convert image to + + Returns: + tuple of processed image and label + """ + + def __init__(self, dtype='float32'): + """Initialize `CastPyTorchTransform` class.""" + dtype_map = {'int8': torch.int8, 'uint8': torch.uint8, 'complex128': torch.complex128, + 'int32':torch.int32, 'int64':torch.int64, 'complex64': torch.complex64, + 'bfloat16':torch.bfloat16, 'float64':torch.float64, 'bool': torch.bool, + 'float16':torch.float16, 'int16':torch.int16, 'float32': torch.float32} + assert dtype in dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype_map[dtype] + + def __call__(self, sample): + """Convert image in sample to given dtype.""" + image, label = sample + image = image.type(self.dtype) + return (image, label) + +@transform_registry(transform_type="CenterCrop", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class CenterCropTFTransform(BaseTransform): + """Crops the given image at the center to the given size. + + Args: + size (list or int): Size of the result + + Returns: + tuple of processed image and label + """ + + def __init__(self, size): + """Initialize `CenterCropTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + def __call__(self, sample): + """Crops image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + if len(image.shape) == 3: + height, width = image.shape[0:2] + elif len(image.shape) == 4: + height, width = image.shape[1:3] + else: + raise ValueError("Unknown image shape") + if height < self.size[0] or width < self.size[1]: + raise ValueError("Target size shouldn't be lager than image size") + y0 = (height - self.size[0]) // 2 + x0 = (width - self.size[1]) // 2 + image = tf.image.crop_to_bounding_box(image, y0, x0, self.size[0], self.size[1]) + else: + transform = CenterCropTransform(self.size) + image, label = transform(sample) + return (image, label) + +@transform_registry(transform_type="PaddedCenterCrop", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class PaddedCenterCropTransform(BaseTransform): + """Crops the given image at the center to the given size with padding. + + Args: + size (list or int): Size of the result + crop_padding (int): crop padding number + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, crop_padding=0): + """Initialize `PaddedCenterCropTransform` class.""" + if isinstance(size, int): + self.image_size = size + elif isinstance(size, list): + if len(size) == 1: + self.image_size = size[0] + elif len(size) == 2: + if size[0] != size[1]: + raise ValueError("'crop height must eaqual to crop width'") + self.image_size = size[0] + self.crop_padding = crop_padding + + def __call__(self, sample): + """Crops image in sample to the given size with padding.""" + image, label = sample + h, w = image.shape[0], image.shape[1] + + padded_center_crop_size = \ + int((self.image_size / (self.image_size + self.crop_padding)) * min(h, w)) + + y0 = (h - padded_center_crop_size + 1) // 2 + x0 = (w - padded_center_crop_size + 1) // 2 + image = image[y0:y0 + padded_center_crop_size, x0:x0 + padded_center_crop_size, :] + return (image, label) + +@transform_registry(transform_type="Resize", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class ResizeTFTransform(BaseTransform): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation='bilinear'): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ['bilinear', 'nearest', 'bicubic']: + raise ValueError('Unsupported interpolation type!') + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, + interpolation=interpolation_map[self.interpolation]) + return (image, label) + +@transform_registry(transform_type="Resize", process="preprocess", \ + framework="pytorch") +class ResizePytorchTransform(BaseTransform): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation='bilinear'): + """Initialize `ResizePytorchTransform` class.""" + self.size = size + if interpolation in interpolation_pytorch_map.keys(): + self.interpolation = get_torchvision_map(interpolation_pytorch_map[interpolation]) + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + transformer = torchvision.transforms.Resize(size=self.size, + interpolation=self.interpolation) + return (transformer(image), label) + +@transform_registry(transform_type="RandomCrop", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class RandomCropTFTransform(BaseTransform): + """Crop the image at a random location to the given size. + + Args: + size (list or tuple or int): Size of the result + + Returns: + tuple of processed image and label + """ + + def __init__(self, size): + """Initialize `RandomCropTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list) or isinstance(size, tuple): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + def __call__(self, sample): + """Crop the image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + if len(image.shape) == 3: + height, width = image.shape[0:2] + elif len(image.shape) == 4: + height, width = image.shape[1:3] + + if self.size[0] > height or self.size[1] > width: + raise ValueError('Crop size must be smaller than image size') + + if self.size[0] == height and self.size[1] == width: + return (image, label) + + height = tf.cast(height, dtype=tf.float32) + width = tf.cast(width, dtype=tf.float32) + offset_height = (height - self.size[0]) / 2 + offset_width = (width - self.size[1]) / 2 + offset_height = tf.cast(offset_height, dtype=tf.int32) + offset_width = tf.cast(offset_width, dtype=tf.int32) + + image = tf.image.crop_to_bounding_box(image, offset_height, + offset_width, self.size[0], self.size[1]) + else: + transform = RandomCropTransform(self.size) + image, label = transform(sample) + return (image, label) + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ + framework="pytorch") +class RandomResizedCropPytorchTransform(BaseTransform): + """Crop the given image to random size and aspect ratio. + + Args: + size (list or int): + Size of the result + scale (tuple or list, default=(0.08, 1.0)): + range of size of the origin size cropped + ratio (tuple or list, default=(3. / 4., 4. / 3.)): + range of aspect ratio of the origin aspect ratio cropped + interpolation (str, default='bilinear'): + Desired interpolation type, support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation='bilinear'): + """Initialize `RandomResizedCropPytorchTransform` class.""" + self.size = size + self.scale = scale + self.ratio = ratio + + if interpolation in interpolation_pytorch_map.keys(): + self.interpolation = get_torchvision_map(interpolation_pytorch_map[interpolation]) + else: + raise ValueError("Undefined interpolation type") + + if scale[0] > scale[1] or ratio[0] > ratio[1]: + raise ValueError("Scale and ratio should be of kind (min, max)") + + def __call__(self, sample): + """Crop the image in sample to the random size.""" + image, label = sample + transformer = torchvision.transforms.RandomResizedCrop(size=self.size, + scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + return (transformer(image), label) + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ + framework="mxnet") +class RandomResizedCropMXNetTransform(BaseTransform): + """Crop the given image to random size and aspect ratio. + + Args: + size (list or int): + Size of the result + scale (tuple or list, default=(0.08, 1.0)): + range of size of the origin size cropped + ratio (tuple or list, default=(3. / 4., 4. / 3.)): + range of aspect ratio of the origin aspect ratio cropped + interpolation (str, default='bilinear'): + Desired interpolation type, support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation='bilinear'): + """Initialize `RandomResizedCropMXNetTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[1], size[0] + self.scale = scale + self.ratio = ratio + + if interpolation in interpolation_mxnet_map.keys(): + self.interpolation = interpolation_mxnet_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + if scale[0] > scale[1] or ratio[0] > ratio[1]: + raise ValueError("Scale and ratio should be of kind (min, max)") + + def __call__(self, sample): + """Crop the image in sample to the random size.""" + image, label = sample + transformer = mx.gluon.data.vision.transforms.RandomResizedCrop(size=self.size, + scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + return (transformer(image), label) + + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class RandomResizedCropTFTransform(BaseTransform): + """Crop the given image to random size and aspect ratio. + + Args: + size (list or int): + Size of the result + scale (tuple or list, default=(0.08, 1.0)): + range of size of the origin size cropped + ratio (tuple or list, default=(3. / 4., 4. / 3.)): + range of aspect ratio of the origin aspect ratio cropped + interpolation (str, default='bilinear'): + Desired interpolation type, support 'bilinear', 'nearest' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=( + 3. / 4., 4. / 3.), interpolation='bilinear'): + """Initialize `RandomResizedCropTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + if self.interpolation not in ['bilinear', 'nearest']: + raise ValueError('Unsupported interpolation type!') + if scale[0] > scale[1] or ratio[0] > ratio[1]: + raise ValueError("Scale and ratio should be of kind (min, max)") + + def get_params(self, image, scale, ratio): + """Get the image prameters: position, height and width.""" + shape = image.shape + height = tf.cast(shape[0], dtype=tf.float32) + width = tf.cast(shape[1], dtype=tf.float32) + src_area = height * width + + for _ in range(10): + target_area = np.random.uniform(scale[0], scale[1]) * src_area + log_ratio = (np.log(ratio[0]), np.log(ratio[1])) + new_ratio = np.exp(np.random.uniform(log_ratio[0], log_ratio[1])) + + new_w = tf.math.round( + tf.math.sqrt(tf.math.multiply(target_area, new_ratio))) + new_h = tf.math.round( + tf.math.sqrt(tf.math.divide(target_area, new_ratio))) + + x0, y0 = tf.case( + [(tf.math.logical_and( + tf.math.greater(width, new_w), tf.math.greater(height, new_h)), + lambda: (tf.random.uniform( + shape=[], maxval=tf.math.subtract(width, new_w)), + tf.random.uniform( + shape=[], maxval=tf.math.subtract(height, new_h))) + )], + default=lambda: (-1.0, -1.0)) + if x0 != -1.0 and y0 != -1.0: + return y0, x0, new_h, new_w + + in_ratio = width / height + new_w, new_h = tf.case([(tf.math.greater(min(ratio), in_ratio), + lambda: (width, tf.math.round(width / min(ratio)))), + (tf.math.greater(in_ratio, max(ratio)), + lambda: (height, tf.math.round(height * max(ratio))))], + default=lambda: (width, height)) + + y0 = (height - new_h) / 2 + x0 = (width - new_w) / 2 + return y0, x0, new_h, new_w + + def __call__(self, sample): + """Crop the image in sample to the random size.""" + image, label = sample + if isinstance(image, tf.Tensor): + y0, x0, h, w = self.get_params(image, self.scale, self.ratio) + squeeze = False + if len(image.shape) == 3: + squeeze = True + image = tf.expand_dims(image, axis=0) + height, width = image.shape[1:3] + height = tf.cast(height, dtype=tf.float32) + width = tf.cast(width, dtype=tf.float32) + box_indices = tf.range(0, image.shape[0], dtype=tf.int32) + boxes = [y0/height, x0/width, (y0+h)/height, (x0+w)/width] + boxes = tf.broadcast_to(boxes, [image.shape[0], 4]) + image = tf.image.crop_and_resize(image, boxes, box_indices, + self.size, self.interpolation) + if squeeze: + image = tf.squeeze(image, axis=0) + else: + transform = RandomResizedCropTransform(self.size, self.scale, + self.ratio, self.interpolation) + image, label = transform(sample) + return (image, label) + +@transform_registry(transform_type="Normalize", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class NormalizeTFTransform(BaseTransform): + """Normalize a image with mean and standard deviation. + + Args: + mean (list, default=[0.0]): + means for each channel, if len(mean)=1, mean will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + std (list, default=[1.0]): + stds for each channel, if len(std)=1, std will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + + Returns: + tuple of processed image and label + """ + + def __init__(self, mean=[0.0], std=[1.0], rescale=None): + """Initialize `NormalizeTFTransform` class.""" + self.mean = mean + self.std = std + self.rescale = rescale + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + """Normalize the image in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + orig_dtype = image.dtype + mean = tf.broadcast_to(self.mean, tf.shape(input=image)) + mean = tf.cast(mean, dtype=image.dtype) + std = tf.broadcast_to(self.std, tf.shape(input=image)) + std = tf.cast(std, dtype=image.dtype) + image = (image - mean) / std + image = tf.cast(image, dtype=orig_dtype) + else: + transform = NormalizeTransform(self.mean, self.std) + image, label = transform(sample) + if self.rescale: + image /= self.rescale[0] + image -= self.rescale[1] + return (image, label) + +@transform_registry(transform_type='KerasRescale', process="preprocess", \ + framework='tensorflow, tensorflow_itex') +class RescaleKerasPretrainTransform(BaseTransform): + """Scale the values of image to [0,1]. + + Returns: + tuple of processed image and label + """ + + def __init__(self, rescale=None): + """Initialize `RescaleKerasPretrainTransform` class.""" + self.rescale = rescale + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if self.rescale: + image /= self.rescale[0] + image -= self.rescale[1] + return (image, label) + +@transform_registry(transform_type='Rescale', process="preprocess", \ + framework='tensorflow, tensorflow_itex') +class RescaleTFTransform(BaseTransform): + """Scale the values of image to [0,1]. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.cast(image, tf.float32) / 255. + else: + image = image.astype('float32') / 255. + return (image, label) + +@transform_registry(transform_type='Rescale', process="preprocess", \ + framework='onnxrt_qlinearops, onnxrt_integerops') +class RescaleTransform(BaseTransform): + """Scale the values of image to [0,1]. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + return (image, label) + +@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ + framework='tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, mxnet') +class AlignImageChannelTransform(BaseTransform): + """Align image channel, now just support [H,W]->[H,W,dim], [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. + + Input image must be np.ndarray. + + Returns: + tuple of processed image and label + """ + + def __init__(self, dim=3): + """Initialize `AlignImageChannelTransform` class.""" + logger.warning("This transform is going to be deprecated") + if dim < 1 or dim > 4: + raise ValueError('Unsupport image dim!') + self.dim = dim + + def __call__(self, sample): + """Align channel of the image in sample.""" + image, label = sample + if len(image.shape) == 2: + image = np.dstack([image]*self.dim) + if isinstance(image, np.ndarray) and image.shape[-1] != self.dim: + if image.shape[-1] == 4 and self.dim == 3: + image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) + elif image.shape[-1] == 3 and self.dim == 1: + image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + image = np.expand_dims(image, axis=-1) + else: + raise ValueError('Unsupport conversion!') + return (image, label) + +@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ + framework='pytorch') +class PyTorchAlignImageChannel(BaseTransform): + """Align image channel, now just support [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. + + Input image must be PIL Image. + + Returns: + tuple of processed image and label + """ + + def __init__(self, dim=3): + """Initialize `PyTorchAlignImageChannel` class.""" + logger.warning("This transform is going to be deprecated") + if dim != 1 and dim != 3: + raise ValueError('Unsupport image dim!') + self.dim = dim + + def __call__(self, sample): + """Align channel of the image in sample.""" + from PIL import Image + image, label = sample + assert isinstance(image, Image.Image), 'Input image must be PIL Image' + if self.dim == 3: + image = image.convert('RGB') + elif self.dim == 1: + image = image.convert('L') + else: + raise ValueError('Unsupport conversion!') + return (image, label) + +@transform_registry(transform_type="ToNDArray", process="preprocess", \ + framework="mxnet") +class ToNDArrayTransform(BaseTransform): + """Convert np.array to NDArray. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Convert np.array of the image in sample.""" + image, label = sample + image = mx.nd.array(image) + return image, label + +@transform_registry(transform_type="Resize", process="preprocess", framework="mxnet") +class ResizeMXNetTransform(BaseTransform): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation='bilinear'): + """Initialize `ResizeMXNetTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[1], size[0] + + if interpolation in interpolation_mxnet_map.keys(): + self.interpolation = interpolation_mxnet_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + transformer = mx.gluon.data.vision.transforms.Resize(size=self.size, + interpolation=self.interpolation) + return (transformer(image), label) + + +@transform_registry(transform_type="Resize", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class ResizeTransform(BaseTransform): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic'. + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation='bilinear'): + """Initialize `ResizeTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + image = cv2.resize(image, self.size, interpolation=self.interpolation) + if len(image.shape) == 2: + image = np.expand_dims(image, -1) + return (image, label) + +@transform_registry(transform_type="CropResize", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class CropResizeTFTransform(BaseTransform): + """Crop the input image with given location and resize it. + + Args: + x (int):Left boundary of the cropping area + y (int):Top boundary of the cropping area + width (int):Width of the cropping area + height (int):Height of the cropping area + size (list or int): resize to new size after cropping + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, x, y, width, height, size, interpolation='bilinear'): + """Initialize `CropResizeTFTransform` class.""" + if interpolation not in ['bilinear', 'nearest', 'bicubic']: + raise ValueError('Unsupported interpolation type!') + self.interpolation = interpolation + self.x = x + self.y = y + self.width = width + self.height = height + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + def __call__(self, sample): + """Resize the input image in sample with given location.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.crop_to_bounding_box( + image, self.y, self.x, self.height, self.width) + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + transform = CropResizeTransform(self.x, self.y, self.width, + self.height, self.size, self.interpolation) + image, label = transform(sample) + return (image, label) + +@transform_registry(transform_type="CropResize", process="preprocess", framework="pytorch") +class PyTorchCropResizeTransform(BaseTransform): + """Crop the input image with given location and resize it. + + Args: + x (int):Left boundary of the cropping area + y (int):Top boundary of the cropping area + width (int):Width of the cropping area + height (int):Height of the cropping area + size (list or int): resize to new size after cropping + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, x, y, width, height, size, interpolation='bilinear'): + """Initialize `PyTorchCropResizeTransform` class.""" + if interpolation in interpolation_pytorch_map.keys(): + self.interpolation = get_torchvision_map(interpolation_pytorch_map[interpolation]) + else: + raise ValueError("Undefined interpolation type") + self.x = x + self.y = y + self.width = width + self.height = height + self.size = size + + def __call__(self, sample): + """Resize the input image in sample with given location.""" + image, label = sample + image = image.crop((self.x, self.y, self.x + self.width, self.y + self.height)) + transformer = torchvision.transforms.Resize(size=self.size, + interpolation=self.interpolation) + return (transformer(image), label) + +@transform_registry(transform_type="CropResize", process="preprocess", framework="mxnet") +class MXNetCropResizeTransform(BaseTransform): + """Crop the input image with given location and resize it. + + Args: + x (int):Left boundary of the cropping area + y (int):Top boundary of the cropping area + width (int):Width of the cropping area + height (int):Height of the cropping area + size (list or int): resize to new size after cropping + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, x, y, width, height, size, interpolation='bilinear'): + """Initialize `MXNetCropResizeTransform` class.""" + if interpolation in interpolation_mxnet_map.keys(): + self.interpolation = interpolation_mxnet_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + self.x = x + self.y = y + self.width = width + self.height = height + self.size = size + + def __call__(self, sample): + """Resize the input image in sample with given location.""" + image, label = sample + transformer = mx.gluon.data.vision.transforms.CropResize(self.x, self.y, self.width, + self.height, self.size, self.interpolation) + return (transformer(image), label) + +@transform_registry(transform_type="CropResize", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class CropResizeTransform(BaseTransform): + """Crop the input image with given location and resize it. + + Args: + x (int):Left boundary of the cropping area + y (int):Top boundary of the cropping area + width (int):Width of the cropping area + height (int):Height of the cropping area + size (list or int): resize to new size after cropping + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, x, y, width, height, size, interpolation='bilinear'): + """Initialize `CropResizeTransform` class.""" + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + self.x = x + self.y = y + self.width = width + self.height = height + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list) or isinstance(size, tuple): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + def __call__(self, sample): + """Crop the input image in sample with given location.""" + image, label = sample + image = image[self.y:self.y+self.height, self.x:self.x+self.width, :] + image = cv2.resize(image, self.size, interpolation=self.interpolation) + return (image, label) + +@transform_registry(transform_type="CenterCrop", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class CenterCropTransform(BaseTransform): + """Crops the given image at the center to the given size. + + Args: + size (list or int): Size of the result + + Returns: + tuple of processed image and label + """ + + def __init__(self, size): + """Initialize `CenterCropTransform` class.""" + if isinstance(size, int): + self.height, self.width = size, size + elif isinstance(size, list) or isinstance(size, tuple): + if len(size) == 1: + self.height, self.width = size[0], size[0] + elif len(size) == 2: + self.height, self.width = size[0], size[1] + + def __call__(self, sample): + """Crop the input image in sample at the center to the given size.""" + image, label = sample + h, w = image.shape[0], image.shape[1] + if h + 1 < self.height or w + 1 < self.width: + raise ValueError( + "Required crop size {} is larger then input image size {}".format( + (self.height, self.width), (h, w))) + + if self.height == h and self.width == w: + return (image, label) + + y0 = (h - self.height) // 2 + x0 = (w - self.width) // 2 + image = image[y0:y0 + self.height, x0:x0 + self.width, :] + return (image, label) + +@transform_registry(transform_type="Normalize", process="preprocess", framework="mxnet") +class MXNetNormalizeTransform(BaseTransform): + """Normalize a image with mean and standard deviation. + + Args: + mean (list, default=[0.0]): + means for each channel, if len(mean)=1, mean will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + std (list, default=[1.0]): + stds for each channel, if len(std)=1, std will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + + Returns: + tuple of processed image and label + """ + + def __init__(self, mean=[0.0], std=[1.0]): + """Initialize `MXNetNormalizeTransform` class.""" + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + """Normalize the image in sample.""" + image, label = sample + axes = [len(image.shape) - 1] + axes.extend(list(np.arange(len(image.shape)-1))) + image = mx.ndarray.transpose(image, axes) + assert len(self.mean) == image.shape[0], 'Mean channel must match image channel' + transformer = mx.gluon.data.vision.transforms.Normalize(self.mean, self.std) + image = transformer(image) + axes = list(np.arange(1, len(image.shape))) + axes.extend([0]) + image = mx.ndarray.transpose(image, axes) + return (image, label) + +@transform_registry(transform_type="Normalize", process="preprocess", framework="pytorch") +class PyTorchNormalizeTransform(MXNetNormalizeTransform): + """Normalize a image with mean and standard deviation. + + Args: + mean (list, default=[0.0]): + means for each channel, if len(mean)=1, mean will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + std (list, default=[1.0]): + stds for each channel, if len(std)=1, std will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Normalize the image in sample.""" + image, label = sample + transformer = torchvision.transforms.Normalize(self.mean, self.std) + image = transformer(image) + return (image, label) + +@transform_registry(transform_type="Normalize", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class NormalizeTransform(BaseTransform): + """Normalize a image with mean and standard deviation. + + Args: + mean (list, default=[0.0]): + means for each channel, if len(mean)=1, mean will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + std (list, default=[1.0]): + stds for each channel, if len(std)=1, std will be broadcasted to each channel, + otherwise its length should be same with the length of image shape + + Returns: + tuple of processed image and label + """ + + def __init__(self, mean=[0.0], std=[1.0]): + """Initialize `NormalizeTransform` class.""" + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + """Normalize the image in sample.""" + image, label = sample + assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + image = (image - self.mean) / self.std + return (image, label) + +@transform_registry(transform_type="RandomCrop", process="preprocess", \ + framework="mxnet, onnxrt_qlinearops, onnxrt_integerops") +class RandomCropTransform(BaseTransform): + """Crop the image at a random location to the given size. + + Args: + size (list or tuple or int): Size of the result + + Returns: + tuple of processed image and label + """ + + def __init__(self, size): + """Initialize `RandomCropTransform` class.""" + if isinstance(size, int): + self.height, self.width = size, size + elif isinstance(size, list) or isinstance(size, tuple): + if len(size) == 1: + self.height, self.width = size[0], size[0] + elif len(size) == 2: + self.height, self.width = size[0], size[1] + + def __call__(self, sample): + """Crop the image in sample to the given size.""" + image, label = sample + h, w = image.shape[0], image.shape[1] + if h + 1 < self.height or w + 1 < self.width: + raise ValueError( + "Required crop size {} is larger then input image size {}".format( + (self.height, self.width), (h, w))) + + if self.height == h and self.width == w: + return (image, label) + + rand_h = np.random.randint(0, h - self.height + 1) + rand_w = np.random.randint(0, w - self.width + 1) + if len(image.shape) == 2: + image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width] + else: + image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width, :] + return (image, label) + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class RandomResizedCropTransform(BaseTransform): + """Crop the given image to random size and aspect ratio. + + Args: + size (list or int): + Size of the result + scale (tuple or list, default=(0.08, 1.0)): + range of size of the origin size cropped + ratio (tuple or list, default=(3. / 4., 4. / 3.)): + range of aspect ratio of the origin aspect ratio cropped + interpolation (str, default='bilinear'): + Desired interpolation type, support 'bilinear', 'nearest' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=( + 3. / 4., 4. / 3.), interpolation='bilinear'): + """Initialize `RandomResizedCropTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list) or isinstance(size, tuple): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + self.scale = scale + self.ratio = ratio + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + if scale[0] > scale[1] or ratio[0] > ratio[1]: + raise ValueError("Scale and ratio should be of kind (min, max)") + + def get_params(self, image, scale, ratio): + """Get the image prameters: position, height and width.""" + h, w = image.shape[0], image.shape[1] + src_area = h * w + + for _ in range(10): + target_area = np.random.uniform(scale[0], scale[1]) * src_area + log_ratio = (np.log(ratio[0]), np.log(ratio[1])) + new_ratio = np.exp(np.random.uniform(log_ratio[0], log_ratio[1])) + + new_w = int(np.round(np.sqrt(target_area * new_ratio))) + new_h = int(np.round(np.sqrt(target_area / new_ratio))) + + if new_w < w and new_h < h: + x0 = np.random.randint(0, w - new_w) + y0 = np.random.randint(0, h - new_h) + return y0, x0, new_h, new_w + + in_ratio = float(w) / float(h) + if in_ratio < min(ratio): + new_w = w + new_h = int(round(new_w / min(ratio))) + elif in_ratio > max(ratio): + new_h = h + new_w = int(round(new_h * max(ratio))) + else: + new_w = w + new_h = h + y0 = (h - new_h) // 2 + x0 = (w - new_w) // 2 + return y0, x0, new_h, new_w + + def __call__(self, sample): + """Crop the image in sample to random size.""" + image, label = sample + y0, x0, h, w = self.get_params(image, self.scale, self.ratio) + crop_img = image[y0:y0 + h, x0:x0 + w, :] + image = cv2.resize(crop_img, self.size, interpolation=self.interpolation) + return (image, label) + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + import math + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + +def get_final_text(pred_text, orig_text, do_lower_case): + """Project the tokenized prediction back to the original text.""" + import six + from . import tokenization + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case) + tok_text = " ".join(tokenizer.tokenize(orig_text)) + start_position = tok_text.find(pred_text) + if start_position == -1: + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + return orig_text + + tok_s_to_ns_map = {} + for (i, tok_index) in six.iteritems(tok_ns_to_s_map): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + +class SquadExample(object): + """A single training/test example for simple sequence classification. + + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=False): + """Initialize `SquadExample` class.""" + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + """Initialize `InputFeatures` class.""" + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + +def read_squad_examples(input_file): + """Read a SQuAD json file into a list of SquadExample.""" + import json + with tf.io.gfile.GFile(input_file, "r") as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + return examples + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, output_fn): + """Load a data file into a list of `InputBatch`s.""" + unique_id = 1000000000 + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + + feature = InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible) + # Run callback + output_fn(feature) + unique_id += 1 + +@transform_registry(transform_type="Collect", \ + process="postprocess", framework="tensorflow") +class CollectTransform(BaseTransform): + """Postprocess the predictions, collect data.""" + + def __init__(self, length=10833): + """Initialize `CollectTransform` class.""" + self.length = length + self.unique_id = [] + self.start_logits = [] + self.end_logits = [] + self.all_sample = (None, None) + self.idx = 1000000000 + + def __call__(self, sample): + """Collect postprocess data.""" + all_results, label = sample + result_list = [np.expand_dims(result, 0) for result in all_results] + for result in result_list: + if len(self.unique_id) < self.length: + result = result.transpose(2,0,1) + self.unique_id.append(self.idx) + self.start_logits.append(result[0]) + self.end_logits.append(result[1]) + self.idx += 1 + if len(self.unique_id) == self.length: + self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) + return self.all_sample + +@transform_registry(transform_type="SquadV1", process="postprocess", \ + framework="tensorflow, tensorflow_itex") +class TFSquadV1PostTransform(BaseTransform): + """Postprocess the predictions of bert on SQuAD. + + Args: + label_file (str): path of label file + vocab_file(str): path of vocabulary file + n_best_size (int, default=20): + The total number of n-best predictions to generate in nbest_predictions.json + max_seq_length (int, default=384): + The maximum total input sequence length after WordPiece tokenization. + Sequences longer than this will be truncated, shorter than this will be padded + max_query_length (int, default=64): + The maximum number of tokens for the question. + Questions longer than this will be truncated to this length + max_answer_length (int, default=30): + The maximum length of an answer that can be generated. This is needed because + the start and end predictions are not conditioned on one another + do_lower_case (bool, default=True): + Whether to lower case the input text. + Should be True for uncased models and False for cased models + doc_stride (int, default=128): + When splitting up a long document into chunks, + how much stride to take between chunks + + Returns: + tuple of processed prediction and label + """ + + def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ + max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + """Initialize `TFSquadV1PostTransform` class.""" + from . import tokenization + self.eval_examples = read_squad_examples(label_file) + tokenizer = tokenization.FullTokenizer( + vocab_file=vocab_file, do_lower_case=do_lower_case) + + self.eval_features = [] + def append_feature(feature): + self.eval_features.append(feature) + + convert_examples_to_features( + examples=self.eval_examples, + tokenizer=tokenizer, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + output_fn=append_feature) + + self.n_best_size = n_best_size + self.max_answer_length = max_answer_length + self.do_lower_case = do_lower_case + self.RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + def process_result(self, results): + """Get the processed results.""" + processed_results = [] + # notice the result list sequence + for unique_id, start_logits, end_logits in zip(*results): + processed_results.append( + self.RawResult( + unique_id=int(unique_id), + start_logits=[float(x) for x in start_logits.flat], + end_logits=[float(x) for x in end_logits.flat])) + + return processed_results + + def get_postprocess_result(self, sample): + """Get the post processed results.""" + if sample == (None, None): + return (None, None) + all_results, label = sample + all_results = self.process_result(all_results) + example_index_to_features = collections.defaultdict(list) + for feature in self.eval_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + for (example_index, example) in enumerate(self.eval_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + min_null_feature_index = 0 # the paragraph slice with min mull score + null_start_logit = 0 # the start logit at the slice with min null score + null_end_logit = 0 # the end logit at the slice with min null score + for (feature_index, feature) in enumerate(features): + # skip the case that is not predicted + if not feature.unique_id in unique_id_to_result: + all_predictions[example.qas_id] = "*#skip this example#*" + continue + result = unique_id_to_result[feature.unique_id] + start_indexes = _get_best_indexes(result.start_logits, self.n_best_size) + end_indexes = _get_best_indexes(result.end_logits, self.n_best_size) + + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > self.max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= self.n_best_size: + break + feature = features[pred.feature_index] + if pred.start_index > 0: # this is a non-null prediction + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, self.do_lower_case) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + else: + final_text = "" + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry: + if entry.text: + best_non_null_entry = entry + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_json.append(output) + + assert len(nbest_json) >= 1 + all_predictions[example.qas_id] = nbest_json[0]["text"] + return (all_predictions, label) + + def __call__(self, sample): + """Call the get_postprocess_result.""" + return self.get_postprocess_result(sample) + + +@transform_registry(transform_type="ModelZooCollect", \ + process="postprocess", framework="tensorflow, tensorflow_itex") +class TFModelZooCollectTransform(CollectTransform): + """Postprocess the predictions of model zoo, collect data.""" + + def __call__(self, sample): + """Collect postprocess data.""" + all_results, label = sample + all_results = zip(all_results[0], all_results[1]) + for start_logits, end_logits in all_results: + if len(self.unique_id) < self.length: + self.unique_id.append(self.idx) + self.start_logits.append(start_logits) + self.end_logits.append(end_logits) + self.idx += 1 + if len(self.unique_id) == self.length: + self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) + return self.all_sample + +@transform_registry(transform_type="SquadV1ModelZoo", \ + process="postprocess", framework="tensorflow, \ + tensorflow_itex") +class TFSquadV1ModelZooPostTransform(TFSquadV1PostTransform): + """Postprocess the predictions of bert on SQuADV1.1. + + See class TFSquadV1PostTransform for more details + """ + + def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ + max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + """Initialize `TFSquadV1ModelZooPostTransform` class.""" + super().__init__(label_file, vocab_file, n_best_size, max_seq_length, \ + max_query_length, max_answer_length, do_lower_case, doc_stride) + self.length = len(self.eval_features) + self.collect_data = TFModelZooCollectTransform(length=self.length) + + def __call__(self, sample): + """Collect data and get postprocess results.""" + sample = self.collect_data(sample) + return self.get_postprocess_result(sample) + +@transform_registry(transform_type="ParseDecodeVoc", process="preprocess", \ + framework="tensorflow, tensorflow_itex") +class ParseDecodeVocTransform(BaseTransform): + """Parse features in Example proto. + + Returns: + tuple of parsed image and labels + """ + + def __call__(self, sample): + """Parse decode voc.""" + # Currently only supports jpeg and png. + # Need to use this logic because the shape is not known for + # tf.image.decode_image and we rely on this info to + # extend label if necessary. + def _decode_image(content, channels): + """Decode the image with content.""" + return tf.cond( + tf.image.is_jpeg(content), + lambda: tf.image.decode_jpeg(content, channels), + lambda: tf.image.decode_png(content, channels)) + + features = { + 'image/encoded': + tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), + 'image/filename': + tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), + 'image/format': + tf.compat.v1.FixedLenFeature((), tf.string, default_value='jpeg'), + 'image/height': + tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + 'image/width': + tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + 'image/segmentation/class/encoded': + tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), + 'image/segmentation/class/format': + tf.compat.v1.FixedLenFeature((), tf.string, default_value='png'), + } + + parsed_features = tf.compat.v1.parse_single_example(sample, features) + + image = _decode_image(parsed_features['image/encoded'], channels=3) + + label = None + label = _decode_image( + parsed_features['image/segmentation/class/encoded'], channels=1) + + sample = { + 'image': image, + } + + label.set_shape([None, None, 1]) + + sample['labels_class'] = label + + return sample['image'], sample['labels_class'] diff --git a/neural_compressor/experimental/__init__.py b/neural_compressor/experimental/__init__.py index 6bbbea2db59..265db4851ba 100644 --- a/neural_compressor/experimental/__init__.py +++ b/neural_compressor/experimental/__init__.py @@ -27,6 +27,8 @@ from .model_conversion import ModelConversion from .distillation import Distillation from .nas import NAS +from . import export __all__ = ['Component', 'Quantization', 'Pruning', 'Benchmark', 'Graph_Optimization', \ - 'GraphOptimization', 'ModelConversion', 'Distillation', 'NAS', 'MixedPrecision'] + 'GraphOptimization', 'ModelConversion', 'Distillation', 'NAS', 'MixedPrecision', \ + 'export'] diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index 4ee2f8e6fd6..f59f2541a85 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -30,7 +30,7 @@ from ..conf.dotdict import DotDict from ..utils import logger from ..utils import OPTIONS -from ..utils.utility import set_backend, GLOBAL_STATE, MODE +from ..utils.utility import GLOBAL_STATE, MODE from ..utils.create_obj_from_config import create_eval_func, create_dataloader from ..conf.dotdict import deep_get, deep_set from ..model import BaseModel @@ -166,7 +166,6 @@ def __init__(self, conf_fname_or_obj=None): self.conf = BenchmarkConf(conf_fname_or_obj) if self.conf.usr_cfg.model.framework != 'NA': self.framework = self.conf.usr_cfg.model.framework.lower() - set_backend(self.framework) def __call__(self, mode='performance'): """Directly call a Benchmark object. @@ -181,8 +180,8 @@ def __call__(self, mode='performance'): assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...' assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' set_all_env_var(deep_get(cfg, 'evaluation.{}.configs'.format(mode))) - # disable multi-instance for accuracy mode - if mode == "accuracy": + # disable multi-instance for accuracy mode or running bechmark on GPU device + if mode == "accuracy" or cfg.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") @@ -226,13 +225,13 @@ def config_instance(self): num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) - if(get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): + if(sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): raise OSError('Currently no support on AMD with hyperthreads') - else: + elif sys.platform in ['linux']: bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) for i in range(0, num_of_instance): - if get_architecture() == 'x86_64': + if sys.platform in ['linux'] and get_architecture() == 'x86_64': core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance core_list = np.array(bounded_threads)[core_list_idx] else: @@ -298,19 +297,22 @@ def run_instance(self, mode): GLOBAL_STATE.STATE = MODE.BENCHMARK framework_specific_info = {'device': cfg.device, \ 'approach': cfg.quantization.approach, \ - 'random_seed': cfg.tuning.random_seed} + 'random_seed': cfg.tuning.random_seed, + 'backend': cfg.model.get('backend', 'default'), + 'format': cfg.model.get('quant_format', 'default')} framework = cfg.model.framework.lower() if 'tensorflow' in framework: framework_specific_info.update({"inputs": cfg.model.inputs, \ "outputs": cfg.model.outputs, \ "recipes": cfg.model.recipes, \ 'workspace_path': cfg.tuning.workspace.path}) + if framework == 'keras': + framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path}) if framework == 'mxnet': framework_specific_info.update({"b_dataloader": self._b_dataloader}) - if 'onnxrt' in framework.lower(): + if 'onnx' in framework.lower(): framework_specific_info.update( - {"backend": framework.lower().split('_')[-1], \ - 'workspace_path': cfg.tuning.workspace.path, \ + {'workspace_path': cfg.tuning.workspace.path, \ 'graph_optimization': OPTIONS[framework].graph_optimization}) if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': framework_specific_info.update({"workspace_path": cfg.tuning.workspace.path, @@ -338,7 +340,9 @@ def run_instance(self, mode): b_dataloader_cfg = deep_get(cfg, 'evaluation.{}.dataloader'.format(mode)) self._b_dataloader = create_dataloader(self.framework, b_dataloader_cfg) + is_measure = False if self._b_func is None: + is_measure = True self._b_func = create_eval_func(self.framework, \ self._b_dataloader, \ adaptor, \ @@ -353,10 +357,11 @@ def run_instance(self, mode): assert len(objectives) == 1, 'benchmark supports one objective at a time' self.objectives = MultiObjective(objectives, cfg.tuning.accuracy_criterion, - is_measure=True) + is_measure=is_measure) if self._custom_b_func: val = self.objectives.evaluate(self._b_func, self._model.model) + return else: val = self.objectives.evaluate(self._b_func, self._model) # measurer contain info not only performance(eg, memory, model_size) @@ -471,17 +476,34 @@ def model(self, user_model): be careful of the name of the model configured in the yaml file, make sure the name is in the supported slim model list. """ - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - self._model = NCModel(user_model) - else: - self._model = user_model - cfg = self.conf.usr_cfg if cfg.model.framework == 'NA': + assert not isinstance(user_model, BaseModel), \ + "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) + if self.framework == "tensorflow": + from ..model.tensorflow_model import get_model_type + if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': + self.framework = 'keras' + if self.framework == "pytorch": + if cfg.model.backend == "default": + self.framework = "pytorch_fx" + elif cfg.model.backend == "ipex": + self.framework = "pytorch_ipex" + import intel_extension_for_pytorch cfg.model.framework = self.framework - set_backend(self.framework) + + if not isinstance(user_model, BaseModel): + logger.warning("Force convert framework model to neural_compressor model.") + self._model = NCModel(user_model, framework=self.framework) + else: + # It is config of neural_compressor version < 2.0, no need in 2.0 + if cfg.model.framework == "pytorch_ipex": + from neural_compressor.model.torch_model import IPEXModel + if not isinstance(user_model, IPEXModel): + self._model = NCModel(user_model.model, framework=cfg.model.framework) + return + self._model = user_model # (TODO) ugly to set these params, but tensorflow need if 'tensorflow' in self.framework: diff --git a/neural_compressor/experimental/common/criterion.py b/neural_compressor/experimental/common/criterion.py index 11308854d10..0273695287a 100644 --- a/neural_compressor/experimental/common/criterion.py +++ b/neural_compressor/experimental/common/criterion.py @@ -130,7 +130,7 @@ class TensorFlowCrossEntropyLoss(object): """TensorFlow CrossEntropyLoss criterion.""" def __init__(self, param_dict): - """Initialize the DATASETS class. + """Initialize the Datasets class. Args: param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion. @@ -164,7 +164,7 @@ class TensorFlowSparseCategoricalCrossentropy(object): """TensorFlow SparseCategoricalCrossentropyLoss criterion.""" def __init__(self, param_dict): - """Initialize the DATASETS class. + """Initialize the Datasets class. Args: param_dict (string): param_dict. @@ -1454,6 +1454,36 @@ def loss_cal(self, student_outputs): self.loss += tmp_loss return self.loss + def teacher_model_forward(self, input, teacher_model=None, device=None): + """Teacher model forward. + + Args: + input (tensor): input data + teacher_model (torch.nn.model, optional): teacher model. Defaults to None. + device (torch.device, optional): device. Defaults to None. + + Returns: + tensor: output + """ + outputs = None + if self.loss_weights[1] > 0: + model = self.teacher_model if teacher_model is None else teacher_model + assert isinstance(model, torch.nn.Module), \ + 'Teacher model should be a torch Module instead of {}'.format(type(model)) + model.eval() + try: + model_device = next(model.parameters()).device + except: + logger.warning("Cannot get model device, assuming it's in CPU.") + model_device = "cpu" + device = model_device if device is None else device + if device != model_device: + model.to(device) + with torch.no_grad(): + outputs = pytorch_forward_wrapper(model, input, device=device) + self.teacher_outputs = outputs + return outputs + @criterion_registry('SelfKnowledgeDistillationLoss', 'pytorch') class PyTorchSelfKnowledgeDistillationLossWrapper(object): @@ -1518,4 +1548,4 @@ def __call__(self, **kwargs): class: PyTorchSelfKnowledgeDistillationLoss param dict (dict): param dict """ - return PyTorchSelfKnowledgeDistillationLoss, self._param_check() \ No newline at end of file + return PyTorchSelfKnowledgeDistillationLoss, self._param_check() diff --git a/neural_compressor/experimental/common/model.py b/neural_compressor/experimental/common/model.py index 5ecf273b183..07f33501e84 100644 --- a/neural_compressor/experimental/common/model.py +++ b/neural_compressor/experimental/common/model.py @@ -17,10 +17,9 @@ """common Model just collects the information to construct a Model.""" -import sys -from neural_compressor.model.model import get_model_fwk_name, MODELS, get_model_type +from neural_compressor.model.model import get_model_fwk_name, MODELS +from neural_compressor.model.tensorflow_model import get_model_type from neural_compressor.utils import logger -from neural_compressor.utils.utility import get_backend class Model(object): """A wrapper of the information needed to construct a Model.""" @@ -38,19 +37,20 @@ def __new__(cls, root, **kwargs): Returns: BaseModel: neural_compressor built-in model """ - backend = get_backend() - framework = get_model_fwk_name(root) + framework = kwargs.get("framework", "NA") + if framework == "NA": + framework = get_model_fwk_name(root) - if framework == 'tensorflow': + if 'tensorflow' in framework: if 'modelType' in kwargs: model_type = kwargs['modelType'] else: model_type = get_model_type(root) model = MODELS['tensorflow'](model_type, root, **kwargs) + elif framework == 'keras': + model = MODELS['keras'](root, **kwargs) elif framework == 'pytorch': - if backend == 'NA': - backend = 'pytorch' - model = MODELS[backend](root, **kwargs) + model = MODELS[framework](root, **kwargs) else: model = MODELS[framework](root, **kwargs) return model diff --git a/neural_compressor/experimental/component.py b/neural_compressor/experimental/component.py index 25ab4d4ba93..4025fc5bb90 100644 --- a/neural_compressor/experimental/component.py +++ b/neural_compressor/experimental/component.py @@ -22,7 +22,7 @@ from ..conf.config import Conf from ..utils import logger -from ..utils.utility import set_backend, required_libs +from ..utils.utility import required_libs from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func from ..model import BaseModel from .common import Model @@ -96,7 +96,6 @@ def _init_with_conf(self): self.cfg = self.conf.usr_cfg if self.cfg.model.framework != 'NA': self.framework = self.cfg.model.framework.lower() - set_backend(self.framework) if self.framework in required_libs: for lib in required_libs[self.framework]: try: @@ -131,7 +130,9 @@ def prepare_qat(self): framework_specific_info = {'device': self.cfg.device, 'random_seed': self.cfg.tuning.random_seed, 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + 'q_dataloader': None, + 'backend': self.cfg.model.get('backend', 'default'), + 'format': self.cfg.model.get('quant_format', 'default')} if self.cfg.quantization.approach is not None: framework_specific_info['approach'] = self.cfg.quantization.approach @@ -469,18 +470,33 @@ def model(self, user_model): make sure the name is in supported slim model list. """ + if self.cfg.model.framework == 'NA': + assert not isinstance(user_model, BaseModel), \ + "Please pass an original framework model but not neural compressor model!" + self.framework = get_model_fwk_name(user_model) + if self.framework == "tensorflow": + from ..model.tensorflow_model import get_model_type + if get_model_type(user_model) == 'keras' and self.cfg.model.backend == 'itex': + self.framework = 'keras' + if self.framework == "pytorch": + if self.cfg.model.backend == "default": + self.framework = "pytorch_fx" + elif self.cfg.model.backend == "ipex": + self.framework = "pytorch_ipex" + self.cfg.model.framework = self.framework + if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - self._model = Model(user_model) + self._model = Model(user_model, framework=self.framework) else: - self._model = user_model + # It is config of neural_compressor version < 2.0, no need in 2.0 + if self.cfg.model.framework == "pytorch_ipex": + from neural_compressor.model.torch_model import IPEXModel + if not isinstance(user_model, IPEXModel): + self._model = Model(user_model.model, framework=self.cfg.model.framework) + return - if self.cfg.model.framework == 'NA': - self.framework = get_model_fwk_name(user_model) - if self.framework == 'onnxruntime': - self.framework = 'onnxrt_qoperator' - self.cfg.model.framework = self.framework - set_backend(self.framework) + self._model = user_model if 'tensorflow' in self.framework: self._model.name = self.cfg.model.name diff --git a/neural_compressor/experimental/data/__init__.py b/neural_compressor/experimental/data/__init__.py index e78431a0c48..bdc10fbbff9 100644 --- a/neural_compressor/experimental/data/__init__.py +++ b/neural_compressor/experimental/data/__init__.py @@ -18,14 +18,14 @@ """Built-in dataloaders, datasets, transforms, filters for multiple framework backends.""" -from .datasets import DATASETS, Dataset, IterableDataset, dataset_registry +from .datasets import Datasets, Dataset, IterableDataset, dataset_registry from .transforms import TRANSFORMS, BaseTransform, transform_registry from .dataloaders import DATALOADERS from .filters import FILTERS, Filter, filter_registry __all__ = [ "DATALOADERS", - "DATASETS", + "Datasets", "Dataset", "IterableDataset", "dataset_registry", diff --git a/neural_compressor/experimental/data/dataloaders/dataloader.py b/neural_compressor/experimental/data/dataloaders/dataloader.py index c3463b875eb..c879b0b45d1 100644 --- a/neural_compressor/experimental/data/dataloaders/dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/dataloader.py @@ -25,6 +25,7 @@ DATALOADERS = {"tensorflow": TensorflowDataLoader, "tensorflow_itex": TensorflowDataLoader, + "keras": TensorflowDataLoader, "mxnet": MXNetDataLoader, "pytorch": PyTorchDataLoader, "pytorch_ipex": PyTorchDataLoader, diff --git a/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py b/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py index fbbf80b3ee1..dcc462ae616 100644 --- a/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Initialize the DATASETS class.""" +"""Initialize the Datasets class.""" import numpy as np from neural_compressor.utils.utility import LazyImport @@ -66,4 +66,4 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, pin_memory=pin_memory, sampler=sampler, batch_sampler=batch_sampler) - \ No newline at end of file + diff --git a/neural_compressor/experimental/data/datasets/__init__.py b/neural_compressor/experimental/data/datasets/__init__.py index 9d74d15bcec..c2460d737ed 100644 --- a/neural_compressor/experimental/data/datasets/__init__.py +++ b/neural_compressor/experimental/data/datasets/__init__.py @@ -17,7 +17,7 @@ """Built-in datasets class for multiple framework backends.""" -from .dataset import DATASETS, Dataset, IterableDataset, dataset_registry +from .dataset import Datasets, Dataset, IterableDataset, dataset_registry from os.path import dirname, basename, isfile, join import glob @@ -28,4 +28,4 @@ __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["DATASETS", "Dataset", "IterableDataset", "dataset_registry"] +__all__ = ["Datasets", "Dataset", "IterableDataset", "dataset_registry"] diff --git a/neural_compressor/experimental/data/datasets/dataset.py b/neural_compressor/experimental/data/datasets/dataset.py index 2c7d602f4a6..b591ebb074f 100644 --- a/neural_compressor/experimental/data/datasets/dataset.py +++ b/neural_compressor/experimental/data/datasets/dataset.py @@ -153,12 +153,12 @@ def __getitem__(self, index): The naming convention of new dataset subclass should be something like ImageClassifier, user could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. - DATASETS variable is used to store all implemented Dataset subclasses to support + Datasets variable is used to store all implemented Dataset subclasses to support model specific dataset. """ -class DATASETS(object): +class Datasets(object): """A base class for all framework datasets. Args: diff --git a/neural_compressor/experimental/data/datasets/imagenet_dataset.py b/neural_compressor/experimental/data/datasets/imagenet_dataset.py index 350408437e8..8d5c52ee528 100644 --- a/neural_compressor/experimental/data/datasets/imagenet_dataset.py +++ b/neural_compressor/experimental/data/datasets/imagenet_dataset.py @@ -36,6 +36,7 @@ import numpy as np from PIL import Image from neural_compressor.utils.utility import LazyImport +from neural_compressor.utils import logger from .dataset import dataset_registry, IterableDataset, Dataset tf = LazyImport('tensorflow') mx = LazyImport('mxnet') @@ -146,3 +147,73 @@ def __getitem__(self, index): elif type(image).__name__ == 'EagerTensor': image = image.numpy() return (image, label) + +@dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format='') +class TensorflowImagenetDataset(IterableDataset): + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter=None): + """New a imagenet dataset for tensorflow.""" + assert subset in ('validation', 'train'), \ + 'only support subset (validation, train)' + logger.warning("This api is going to be deprecated, " + "please use ImageRecord instead.") + + from tensorflow.python.platform import gfile + glob_pattern = os.path.join(root, '%s-*-of-*' % subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + + from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=num_cores)) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + +@dataset_registry(dataset_type="Imagenet", framework="onnxrt_qlinearops, \ + onnxrt_integerops", dataset_format='') +class ONNXRTImagenetDataset(Dataset): + """Configuration for Imagenet dataset.""" + + def __init__(self, root, subset='val', num_cores=28, transform=None, filter=None): + """Initialize `ONNXRTImagenetDataset` class.""" + self.val_dir = os.path.join(root, subset) + assert os.path.exists(self.val_dir), "find no val dir in {}".format(root) + \ + "please make sure there are train/val subfolders" + import glob + logger.warning("This api is going to be deprecated, " + "please use ImageRecord instead.") + + self.transform = transform + self.image_list = [] + files = glob.glob(os.path.join(self.val_dir, '*')) + files.sort() + for idx, file in enumerate(files): + imgs = glob.glob(os.path.join(file, '*')) + for img in imgs: + self.image_list.append((img, idx)) + + def __len__(self): + """Return the number of images.""" + return len(self.image_list) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + from PIL import Image + sample = self.image_list[index] + image = Image.open(sample[0]) + if self.transform is not None: + image, label = self.transform((image, sample[1])) + return (image, label) + diff --git a/neural_compressor/experimental/data/transforms/imagenet_transform.py b/neural_compressor/experimental/data/transforms/imagenet_transform.py index bb7bfc4a3f8..5afe6b24c06 100644 --- a/neural_compressor/experimental/data/transforms/imagenet_transform.py +++ b/neural_compressor/experimental/data/transforms/imagenet_transform.py @@ -33,6 +33,7 @@ import numpy as np from neural_compressor.utils.utility import LazyImport +from neural_compressor.utils import logger from .transform import transform_registry, BaseTransform tf = LazyImport('tensorflow') cv2 = LazyImport('cv2') @@ -132,5 +133,289 @@ def __call__(self, sample): image = features['image/encoded'] image = tf.image.decode_jpeg( image, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + return (image, label) + +@transform_registry(transform_type="ParseDecodeImagenet", \ + process="preprocess", framework="tensorflow") +class ParseDecodeImagenetTransform(BaseTransform): + """Imagenet decoding will be performed automatically from Neural Compressor v1.4. + + Returns: + sample + """ + + def __call__(self, sample): + """Convert `ParseDecodeImagenetTransform` feature.""" + logger.warning("This transform is going to be deprecated, " \ + "imagenet decoding will be performed automatically from Neural Compressor v1.4.") + return sample + +@transform_registry(transform_type="ResizeCropImagenet", \ + process="preprocess", framework="tensorflow") +class TensorflowResizeCropImagenetTransform(BaseTransform): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height (int): Height of the result + width (int): Width of the result + random_crop (bool, default=False): whether to random crop + resize_side (int, default=256):desired shape after resize operation + random_flip_left_right (bool, default=False): whether to random flip left and right + mean_value (list, default=[0.0,0.0,0.0]):means for each channel + scale (float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, random_crop=False, resize_side=256, \ + resize_method='bilinear', random_flip_left_right=False, \ + mean_value=[0.0,0.0,0.0], scale=1.0, \ + data_format='channels_last', subpixels='RGB'): + """Initialize `TensorflowResizeCropImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.random_crop = random_crop + self.random_flip_left_right = random_flip_left_right + self.resize_side = resize_side + self.resize_method = resize_method + self.data_format = data_format + self.subpixels = subpixels + + # sample is (images, labels) + def __call__(self, sample): + """Convert `TensorflowResizeCropImagenetTransform` feature.""" + image, label = sample + shape = tf.shape(input=image) + + height = tf.cast(shape[0], dtype=tf.float32) \ + if self.data_format=="channels_last" else tf.cast(shape[1], dtype=tf.float32) + width = tf.cast(shape[1], dtype=tf.float32) \ + if self.data_format=="channels_last" else tf.cast(shape[2], dtype=tf.float32) + scale = tf.cond(pred=tf.greater(height, width), \ + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height,) + + scale = tf.cast(scale, dtype=tf.float32) + new_height = tf.cast(tf.math.rint(height*scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width*scale), dtype=tf.int32) + if self.subpixels=='BGR' and self.data_format=='channels_first': + # 'RGB'->'BGR' + image = tf.cond(tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1)) + elif self.subpixels=='BGR': + # 'RGB'->'BGR' + image = image[..., ::-1] + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [new_height, new_width], + method=self.resize_method) + image = tf.squeeze(image) + shape = tf.shape(input=image) + if self.random_crop: + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height +1), + dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width +1), + dtype=tf.dtypes.int32) + else: + y0 = (shape[0] - self.height) // 2 + x0 = (shape[1] - self.width) // 2 + + image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width) + image.set_shape([self.height, self.width, 3]) + if self.random_flip_left_right: + image = tf.image.random_flip_left_right(image) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + +@transform_registry(transform_type="BilinearImagenet", \ + process="preprocess", framework="tensorflow") +class BilinearImagenetTransform(BaseTransform): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, + mean_value=[0.0,0.0,0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], \ + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale return (image, label) + +@transform_registry(transform_type="BilinearImagenet", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class OnnxBilinearImagenetTransform(BaseTransform): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, + mean_value=[0.0,0.0,0.0], scale=1.0): + """Initialize `OnnxBilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + def __call__(self, sample): + """Convert `OnnxBilinearImagenetTransform` feature.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + img_shape = image.shape + depth = img_shape[2] + img_hd = float(img_shape[0]) + bbox_h_start = int((img_hd - img_hd * self.central_fraction) / 2) + img_wd = float(img_shape[1]) + bbox_w_start = int((img_wd - img_wd * self.central_fraction) / 2) + + bbox_h_size = img_shape[0] - bbox_h_start * 2 + bbox_w_size = img_shape[1] - bbox_w_start * 2 + + image = image[bbox_h_start:bbox_h_start+bbox_h_size, bbox_w_start:bbox_w_start+bbox_w_size] + + if self.height and self.width: + image = cv2.resize(image, (self.width, self.height), interpolation=cv2.INTER_LINEAR) + + image = np.subtract(image, 0.5) + image = np.multiply(image, 2.0) + means = np.broadcast_to(self.mean_value, image.shape) + image = (image - means) * self.scale + image = image.astype(np.float32) + return (image, label) + +@transform_registry(transform_type="ResizeCropImagenet", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class ONNXResizeCropImagenetTransform(BaseTransform): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, random_crop=False, resize_side=256, \ + mean_value=[0.0,0.0,0.0], std_value=[0.229, 0.224, 0.225], \ + resize_method='bilinear', data_format='channels_last', subpixels='RGB'): + """Initialize `ONNXResizeCropImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.std_value = std_value + self.random_crop = random_crop + self.resize_side = resize_side + self.resize_method = resize_method + self.data_format = data_format + self.subpixels = subpixels + + # sample is (images, labels) + def __call__(self, sample): + """Convert `ONNXResizeCropImagenetTransform` feature.""" + # TODO Support optional resize_method, data_format, subpixels for ONNX + image, label = sample + height, width = image.shape[0], image.shape[1] + scale = self.resize_side / width if height > width else self.resize_side / height + new_height = int(height*scale) + new_width = int(width*scale) + image = cv2.resize(image, (new_height, new_width)) + image = image / 255. + shape = image.shape + if self.random_crop: + y0 = np.random.randint(low=0, high=(shape[0] - self.height +1)) + x0 = np.random.randint(low=0, high=(shape[1] - self.width +1)) + else: + y0 = (shape[0] - self.height) // 2 + x0 = (shape[1] - self.width) // 2 + if len(image.shape) == 2: + image = np.array([image]) + image = np.repeat(image, 3, axis=0) + image = image.transpose(1, 2, 0) + image = image[y0:y0+self.height, x0:x0+self.width, :] + image = ((image - self.mean_value)/self.std_value).astype(np.float32) + return (image.transpose(2, 0, 1), label) + +@transform_registry(transform_type="ResizeWithAspectRatio", process="preprocess", \ + framework="onnxrt_qlinearops, onnxrt_integerops") +class ResizeWithAspectRatio(BaseTransform): + """Resize the image with aspect ratio. + + Returns: + image and label + """ + + def __init__(self, height, width, scale=87.5, inter_pol=cv2.INTER_AREA): + """Initialize `ResizeWithAspectRatio` class.""" + self.height = height + self.width = width + self.scale = scale + self.inter_pol = inter_pol + + def __call__(self, sample): + """Convert `ResizeWithAspectRatio` feature.""" + (img, label) = sample + assert len(img.shape) == 3 + height, width, _ = img.shape + new_height = int(100. * self.height / self.scale) + new_width = int(100. * self.width / self.scale) + if height > width: + w = new_width + h = int(new_height * height / width) + else: + h = new_height + w = int(new_width * width / height) + img = cv2.resize(img, (w, h), interpolation=self.inter_pol) + return img, label diff --git a/neural_compressor/experimental/distillation.py b/neural_compressor/experimental/distillation.py index c87ef341f22..85ae6e52c85 100644 --- a/neural_compressor/experimental/distillation.py +++ b/neural_compressor/experimental/distillation.py @@ -226,7 +226,9 @@ def pre_process(self): framework_specific_info = {'device': self.cfg.device, 'random_seed': self.cfg.tuning.random_seed, 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + 'q_dataloader': None, + 'format': 'default', + 'backend': 'default'} if self.framework == 'tensorflow': framework_specific_info.update( @@ -237,11 +239,9 @@ def pre_process(self): self.generate_hooks() assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for distillation....' - if self._train_dataloader is None and self._train_func is None: + if self._train_dataloader is None and self._train_func is None and \ + self.cfg.distillation.train.dataloader is not None: train_dataloader_cfg = self.cfg.distillation.train.dataloader - assert train_dataloader_cfg is not None, \ - 'dataloader field of train field of distillation section ' \ - 'in yaml file should be configured as train_dataloader property is NOT set!' self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) @@ -258,11 +258,12 @@ def pre_process(self): if self._train_func is None: self.create_criterion() self.create_optimizer() - self._train_func = create_train_func(self.framework, \ - self.train_dataloader, \ - self.adaptor, \ - self._train_cfg, \ - hooks=self.hooks) + if self._train_dataloader is not None: + self._train_func = create_train_func(self.framework, \ + self.train_dataloader, \ + self.adaptor, \ + self._train_cfg, \ + hooks=self.hooks) if self.cfg.evaluation and self.eval_dataloader and self._eval_func is None: # eval section in yaml file should be configured. eval_cfg = self.cfg.evaluation diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/download_librispeech.sh b/neural_compressor/experimental/export/__init__.py similarity index 52% rename from examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/download_librispeech.sh rename to neural_compressor/experimental/export/__init__.py index ee322fe3043..529ea48ed35 100644 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/eager/pytorch/scripts/download_librispeech.sh +++ b/neural_compressor/experimental/export/__init__.py @@ -1,10 +1,13 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -13,16 +16,7 @@ # limitations under the License. -#!/usr/bin/env bash +"""Intel Neural Compressor Export.""" -DATA_SET="LibriSpeech" -DATA_ROOT_DIR="/datasets" -DATA_DIR="${DATA_ROOT_DIR}/${DATA_SET}" -if [ ! -d "$DATA_DIR" ] -then - mkdir $DATA_DIR - chmod go+rx $DATA_DIR - python utils/download_librispeech.py utils/librispeech.csv $DATA_DIR -e ${DATA_ROOT_DIR}/ -else - echo "Directory $DATA_DIR already exists." -fi +from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx +from .qlinear2qdq import onnx_qlinear_to_qdq diff --git a/neural_compressor/experimental/export/qlinear2qdq.py b/neural_compressor/experimental/export/qlinear2qdq.py new file mode 100644 index 00000000000..10c0b74d7ef --- /dev/null +++ b/neural_compressor/experimental/export/qlinear2qdq.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions to export onnx model from QLinearops to QDQ.""" + +from neural_compressor.utils import logger +from neural_compressor.adaptor.ox_utils.util import find_by_name +from neural_compressor.utils.utility import LazyImport + +numpy_helper = LazyImport('onnx.numpy_helper') + +def check_model(model): + """Check optype for input model. + + Args: + model (ModelProto): onnx model. + """ + has_integerop = False + has_qlinearop = False + for node in model.graph.node: + if node.op_type.endswith('Integer'): + has_integerop = True + elif node.op_type.startswith('QLinear'): + has_qlinearop = True + elif node.op_type in ['QAttention', 'QGemm', 'QEmbedLayerNormalization']: + has_qlinearop = True + elif node.op_type in ['Gather']: + input_data = find_by_name(node.input[0], model.graph.initializer) + if input_data is not None and \ + numpy_helper.to_array(input_data).dtype in ['int8', 'uint8']: + has_qlinearop = True + if has_integerop: + logger.info("This model has Integer ops, these ops will be skipped.") + if has_qlinearop: + return True + else: + logger.info("This model has no QLinear ops, save the original model.") + return False + +def onnx_qlinear_to_qdq( + model, + input_name_to_nodes, +): + """Export ONNX QLinearops model into QDQ model. + + Args: + model (ModelProto): int8 onnx model. + input_name_to_nodes (dict): the mapping of tensor name and its destination nodes. + """ + from neural_compressor.adaptor.ox_utils.operators import QOPERATORS + add_nodes = [] + remove_nodes = [] + inits = [] + if check_model(model): + for node in model.graph.node: + if node.op_type in QOPERATORS: + if node.output[0] not in input_name_to_nodes: + continue + children = [] + for out in node.output: + children.extend(input_name_to_nodes[node.output[0]]) + converter = QOPERATORS[node.op_type]( + node, + children, + model.graph.initializer) + done, add_node, init = converter.convert() + if done: + add_nodes.extend(add_node) + inits.extend(init) + remove_nodes.append(node) + return add_nodes, remove_nodes, inits diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py new file mode 100644 index 00000000000..7487a226a55 --- /dev/null +++ b/neural_compressor/experimental/export/torch2onnx.py @@ -0,0 +1,827 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions to export model from PyTorch/TensorFlow to ONNX.""" + +import os +import numpy as np +from collections import UserDict +from neural_compressor.adaptor.torch_utils.util import input2tuple +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport + +torch = LazyImport('torch') +onnx = LazyImport('onnx') +ort = LazyImport('onnxruntime') +ortq = LazyImport('onnxruntime.quantization') + + +def update_weight_bias( + int8_model, + fp32_onnx_path, +): + """Update wegiht and bias of FP32 ONNX model with QAT INT8 PyTorch model . + + Args: + int8_model (torch.nn.module): int8 model. + fp32_onnx_path (str): path to fp32 onnx model. + """ + # collect weights, bias from int8 PT model + fp32_onnx_model = onnx.load(fp32_onnx_path) + model_dict = int8_model.state_dict() + int8_model_dict = {} + for name, param in model_dict.items(): + # '_packed_params._packed_weight' is specific for quantized Embedding + if '_packed_params._packed_weight' in name: + name = name.replace('._packed_params._packed_weight', '').split('.module')[0] + int8_model_dict[name+'.weight'] = param.dequantize() + # '_packed_params._packed_params' is specific for quantized Linear + elif '_packed_params._packed_params' in name and isinstance(param, tuple): + name = name.replace('._packed_params._packed_params', '').split('.module')[0] + int8_model_dict[name+'.bias'] = param[1] + int8_model_dict[name+'.weight'] = param[0].dequantize() + # '.weight' and '.bias' is specific for quantized Conv + elif '.weight' in name: + int8_model_dict[name] = param.dequantize() + elif '.bias' in name: + int8_model_dict[name] = param + else: + int8_model_dict[name] = param + + # replace weight and bias in onnx fp32 model for QAT + from onnx import helper + tensor_list = [tensor for tensor in fp32_onnx_model.graph.initializer] + for tensor in tensor_list: + if tensor.name in int8_model_dict: + np_tensor = int8_model_dict[tensor.name].detach().cpu().numpy() + new_tensor = helper.make_tensor( + name=tensor.name, + data_type=tensor.data_type, + dims=tensor.dims, + vals=np_tensor, + ) + fp32_onnx_model.graph.initializer.remove(tensor) + fp32_onnx_model.graph.initializer.append(new_tensor) + onnx.save(fp32_onnx_model, fp32_onnx_path) + + +def set_data_type( + dtype, +): + """Set data type of activation and weight with string dtype. + + Args: + dtype (str): data type description. + + Returns: + activation_type: activation type. + weight_type: weight type. + """ + # Get data type for activation and weight from dtype + if 'U8U8' in dtype: # pragma: no cover + activation_type = ortq.QuantType.QUInt8 + weight_type = ortq.QuantType.QUInt8 + elif 'S8S8' in dtype: # pragma: no cover + activation_type = ortq.QuantType.QInt8 + weight_type = ortq.QuantType.QInt8 + elif 'U8S8' in dtype: + activation_type = ortq.QuantType.QUInt8 + weight_type = ortq.QuantType.QInt8 + else: # pragma: no cover + logger.error("Right now, we don't support dtype: {}, \ + please use U8U8/U8S8/S8S8.".format(dtype)) + logger.info("Weight type: {}.".format(weight_type)) + logger.info("Activation type: {}.".format(activation_type)) + return activation_type, weight_type + + +def get_node_mapping( + fp32_model, + fp32_onnx_path, +): + """Get PyTorch module and ONNX node mapping. + + Args: + fp32_model (torch.nn.Module): quantization configuration from PyTorch. + fp32_onnx_path (str): path to fp32 onnx model. + + Returns: + module_node_mapping: op mapping from PyTorch to ONNX. + """ + def check_data(op_type, data, module_dict): + for name, value in module_dict.items(): + if value.shape == data.shape: + if (value == data).all(): + module_dict.pop(name) + return name + elif op_type == 'Conv': + # Convolution weight data have fluction and BN fusion will insert scale. + # We use the weight scale of the first output channel to check. + weight_scale = value[0] / data[0] + if np.allclose(weight_scale - np.mean(weight_scale), 0, atol=1.e-5): + module_dict.pop(name) + return name + return None + + module_dict = {} + for name, module in fp32_model.named_modules(): + if 'Conv' in str(module.__class__.__name__) or \ + 'Embedding' in str(module.__class__.__name__) or \ + 'Linear' in str(module.__class__.__name__): + if hasattr(module, 'weight'): + value = module.weight.detach().cpu().numpy() + module_dict[name] = value + + module_node_mapping = {} + fp32_onnx_model = onnx.load(fp32_onnx_path) + initializer_data = {tensor.name: tensor for tensor in fp32_onnx_model.graph.initializer} + from onnx import numpy_helper + for node in fp32_onnx_model.graph.node: + if node.op_type in op_types_to_quantize: + if node.op_type == 'MatMul' and node.input[1] in initializer_data: + data = numpy_helper.to_array(initializer_data[node.input[1]]).T + elif node.op_type == 'Gather' and node.input[0] in initializer_data: + data = numpy_helper.to_array(initializer_data[node.input[0]]) + elif node.op_type in ['Conv', 'Gemm']: + data = numpy_helper.to_array(initializer_data[node.input[1]]) + else: + continue + pt_name = check_data(node.op_type, data, module_dict) + if pt_name: + module_node_mapping[pt_name] = node.name + return module_node_mapping + + +def get_quantizable_onnx_ops( + int8_model, + module_node_mapping +): + """Get quantizable onnx ops. + + Args: + int8_model (torch.nn.Module): PyTorch int8 model. + module_node_mapping (dict): op mapping from PyTorch to ONNX. + + Returns: + quantize_nodes: all onnx node that should be quantized. + """ + quantize_nodes = [] + for name, module in int8_model.named_modules(): + if 'Conv' in str(module.__class__.__name__) or \ + 'Embedding' in str(module.__class__.__name__) or \ + 'Linear' in str(module.__class__.__name__): + if hasattr(module, 'weight') and callable(module.weight): + if module.weight().dtype in [torch.qint8, torch.quint8]: + if name.split('.module')[0] in module_node_mapping: + node = module_node_mapping[name.split('.module')[0]] + quantize_nodes.append(node) + return quantize_nodes + + +def build_scale_mapping( + fp32_onnx_path, + module_node_mapping, + int8_scale_info, +): + """Build scale mapping. + + Args: + fp32_onnx_path (str): path to fp32 onnx model. + module_node_mapping (dict): op mapping from PyTorch to ONNX. + int8_scale_info (dict): int8 scale infomation. + + Returns: + scale_zp_dict: scale and zero_point dict. + """ + node_module_mapping = {} + for module_name, node_name in module_node_mapping.items(): + node_module_mapping[node_name] = module_name + # Match scale and zeropoint from PyTorch to ONNX node + scale_zp_dict = {} + fp32_onnx_model = onnx.load(fp32_onnx_path) + for node in fp32_onnx_model.graph.node: + if node.name in node_module_mapping: + module_name = node_module_mapping[node.name] + + # For fine-grained fx and fuse pattern + if module_name + '.module' in int8_scale_info: + module_name = module_name + '.module' + elif module_name + '.0' in int8_scale_info: + module_name = module_name + '.0' + elif module_name + '.module.0' in int8_scale_info: + module_name = module_name + '.module.0' + + if module_name in int8_scale_info: + recoder = int8_scale_info[module_name] + input_scale_args = node.input[0] + '_scale' + input_zp_args = node.input[0] + '_zero_point' + scale_zp_dict[input_scale_args] = recoder['input_scale'] + scale_zp_dict[input_zp_args] = recoder['input_zeropoint'] + ### We need Matmul+Add to match Linear for output scale and zero-point + output_scale_args = node.output[0] + '_scale' + output_zp_args = node.output[0] + '_zero_point' + scale_zp_dict[output_scale_args] = recoder['output_scale'] + scale_zp_dict[output_zp_args] = recoder['output_zeropoint'] + return scale_zp_dict + + +def set_scale_info( + int8_onnx_model, + scale_zp_dict, + activation_type, +): + """Set scale to ONNX model. + + Args: + int8_onnx_path (str): path to onnx file. + scale_zp_dict (dict): scale zero_point dict. + activation_type : activation type. + + Returns: + int8_onnx_model: int8 onnx model object. + """ + # set scale and zeropoint from PyTorch int8 model to ONNX int8 model + from onnx import helper + tensor_list = [tensor for tensor in int8_onnx_model.graph.initializer] + for tensor in tensor_list: + if tensor.name in scale_zp_dict: + value = scale_zp_dict[tensor.name] + if 'zero_point' in tensor.name and activation_type == ortq.QuantType.QInt8: + value -= 128 + new_tensor = helper.make_tensor( + name=tensor.name, + data_type=tensor.data_type, + dims=tensor.dims, + vals=[value], + ) + int8_onnx_model.graph.initializer.remove(tensor) + int8_onnx_model.graph.initializer.append(new_tensor) + return int8_onnx_model + + +def recalculate_bias( + int8_onnx_path, + scale_zp_dict, + quantize_nodes, + quant_format, +): + """Recalculate bias. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + scale_zp_dict (dict): scale zero_point dict. + quantize_nodes (list): quantize nodes list. + quant_format (QuantFormat): quantization format. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + int8_onnx_model = onnx.load(int8_onnx_path) + model = ortq.onnx_model.ONNXModel(int8_onnx_model) + if quant_format == ortq.QuantFormat.QDQ: + for node in int8_onnx_model.graph.node: + if node.name in quantize_nodes and (node.op_type == 'Conv' or node.op_type == 'Gemm'): + input_name, weight_name, bias_name = node.input[:3] + for parent in model.get_parents(node): + if parent.output[0] == input_name: + input_scale_name = parent.input[1] + elif parent.output[0] == weight_name: + weight_scale_name = parent.input[1] + elif parent.output[0] == bias_name: + bias_quantized_name = parent.input[0] + bias_scale_name = parent.input[1] + weight_scale_data = onnx.numpy_helper.to_array(model.get_initializer(weight_scale_name)) + new_input_scale_data = scale_zp_dict[input_scale_name] + origin_bias_quantized_data = onnx.numpy_helper.to_array(model.get_initializer(bias_quantized_name)) + origin_bias_scale_data = onnx.numpy_helper.to_array(model.get_initializer(bias_scale_name)) + origin_bias_data = origin_bias_quantized_data * origin_bias_scale_data + new_bias_scale_data = new_input_scale_data * weight_scale_data + new_bias_quantized_data = (origin_bias_data / new_bias_scale_data).round().astype(np.int32) + model.get_initializer(bias_scale_name).raw_data = new_bias_scale_data.tobytes() + model.get_initializer(bias_quantized_name).raw_data = new_bias_quantized_data.tobytes() + elif quant_format == ortq.QuantFormat.QOperator: + for node in int8_onnx_model.graph.node: + if node.op_type == 'QLinearConv' or node.op_type == 'QGemm': + input_scale_name, weight_scale_name = node.input[1], node.input[4] + bias_quantized_name = node.input[8] if node.op_type == 'QLinearConv' else node.input[6] + weight_scale_data = onnx.numpy_helper.to_array(model.get_initializer(weight_scale_name)) + new_input_scale_data = scale_zp_dict[input_scale_name] + origin_input_scale_data = onnx.numpy_helper.to_array(model.get_initializer(input_scale_name)) + origin_bias_quantized_data = onnx.numpy_helper.to_array(model.get_initializer(bias_quantized_name)) + origin_bias_scale_data = origin_input_scale_data * weight_scale_data + origin_bias_data = origin_bias_quantized_data * origin_bias_scale_data + new_bias_scale_data = new_input_scale_data * weight_scale_data + new_bias_quantized_data = (origin_bias_data / new_bias_scale_data).round().astype(np.int32) + model.get_initializer(bias_quantized_name).raw_data = new_bias_quantized_data.tobytes() + return int8_onnx_model + + +def remove_nodes_by_name(int8_onnx_model, node_names): + """Remove nodes from model by names. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + node_names (list): names of nodes to remove. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + while node_names: + for node in int8_onnx_model.graph.node: + if node.name in node_names: + int8_onnx_model.graph.node.remove(node) + node_names.remove(node.name) + return int8_onnx_model + + +def sub_graph_with_int32_bias( + int8_onnx_model, + node, + a_info, + b_info, + bias_name, + output_name, +): + """Generate a sub graph with int32 bias. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + node (NodeProto): MatMul node belonging to nn.quantized.Linear module. + a_info (list): info of input a for nn.quantized.Linear module. + b_info (list): info of input b for nn.quantized.Linear module. + bias_name (str): name of bias. + output_name (_type_): output name of the sub graph. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + from onnx import TensorProto + a, a_scale, a_zero_point = a_info + b, b_scale, b_zero_point = b_info + a_scale = ortq.onnx_model.ONNXModel(int8_onnx_model).get_initializer(a_scale) + a_scale = onnx.numpy_helper.to_array(a_scale) + b_scale = ortq.onnx_model.ONNXModel(int8_onnx_model).get_initializer(b_scale) + b_scale = onnx.numpy_helper.to_array(b_scale) + bias = ortq.onnx_model.ONNXModel(int8_onnx_model).get_initializer(bias_name) + bias_dims = bias.dims + bias = onnx.numpy_helper.to_array(bias) + bias_scale = a_scale * b_scale + quantized_bias = (bias / bias_scale).round().astype(np.int32) + quantized_bias = np.asarray(quantized_bias, dtype=np.int32).reshape(bias_dims) + packed_bias_initializer = onnx.numpy_helper.from_array(quantized_bias, + bias_name + "_quantized") + int8_onnx_model.graph.initializer.extend([packed_bias_initializer]) + + matmul_node = onnx.helper.make_node("MatMulInteger", + inputs=[a, b, a_zero_point, b_zero_point], + outputs=[node.output[0] + '_matmulinteger'], + name = node.name + '_matmulinteger') + add_node = onnx.helper.make_node("Add", + inputs=[node.output[0] + '_matmulinteger', bias_name + '_quantized'], + outputs=[node.output[0] + '_add'], + name = node.name + '_add' + ) + cast_node = onnx.helper.make_node("Cast", + inputs=[node.output[0] + '_add'], + outputs=[node.output[0] + '_cast'], + to=getattr(TensorProto, 'FLOAT'), + name = node.name + '_cast') + + new_tensor = onnx.helper.make_tensor( + name=node.name + '_bias_scale', + data_type=TensorProto.FLOAT, + dims=list(bias_scale.shape), + vals=bias_scale, + ) + int8_onnx_model.graph.initializer.append(new_tensor) + + mul_node = onnx.helper.make_node("Mul", + inputs=[node.output[0] + '_cast', node.name + '_bias_scale'], + outputs=[output_name], + name=node.name + '_mul') + + int8_onnx_model.graph.node.extend([matmul_node, add_node, cast_node, mul_node]) + return int8_onnx_model + +def qdq_fp32_bias( + int8_onnx_model, + quant_format, +): + """Excute post-process on int8 onnx model with recipe 'QDQ_OP_FP32_BIAS'. + + Insert QDQ before quantizable op and using fp32 bias. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + quant_format (QuantFormat): quantization format. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + # For QDQ quantization format, nn.quantized.Linear module will be + # converted to the following format: + # QuantizeLinear + # | + # DequantizeLinear + # | + # MatMul + # | + # Add + # + # For QOperator quantization format, nn.quantized.Lienar module will be + # converted to the following format: + # QuantizeLinear + # | + # MatMulIntegerToFloat + # | + # Add + if quant_format == ortq.QuantFormat.QDQ: + return int8_onnx_model + elif quant_format == ortq.QuantFormat.QOperator: + remove_nodes = set() + for node in int8_onnx_model.graph.node: + if node.op_type == 'QLinearMatMul': + dequantizelinear_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(node)[0] + add_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(dequantizelinear_node)[0] + a = node.input[0] + a_scale = node.input[1] + a_zero_point = node.input[2] + b = node.input[3] + b_scale = node.input[4] + b_zero_point = node.input[5] + matmulintegertofloat_node = onnx.helper.make_node("MatMulIntegerToFloat", + inputs=[a, b, a_scale, b_scale, a_zero_point, b_zero_point], + outputs=[node.output[0]], + name=node.name + '_matmulintegertofloat', + domain='com.microsoft') + for idx in range(len(add_node.input)): + if add_node.input[idx] == dequantizelinear_node.output[0]: + add_node.input[idx] = node.output[0] + remove_nodes.add(node.name) + remove_nodes.add(dequantizelinear_node.name) + int8_onnx_model.graph.node.extend([matmulintegertofloat_node]) + + int8_onnx_model = remove_nodes_by_name(int8_onnx_model, remove_nodes) + return int8_onnx_model + +def qdq_int32_bias( + int8_onnx_model, + quantize_nodes, + quant_format, +): + """Excute post-process on int8 onnx model with recipe 'QDQ_OP_INT32_BIAS'. + + Insert QDQ before quantizable op and using int32 bias. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + quantize_nodes (list): quantize nodes list. + quant_format (QuantFormat): quantization format. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + # For QDQ/Operator quantization format, nn.quantized.Linear module will be + # converted to the following format: + # QuantizeLinear + # | + # MatMulInteger + # | + # Add + # | + # Cast + # | + # Mul + if quant_format == ortq.QuantFormat.QDQ: + remove_nodes = set() + replace_input = {} + for node in int8_onnx_model.graph.node: + if node.name in quantize_nodes and node.op_type == 'MatMul': + parents = ortq.onnx_model.ONNXModel(int8_onnx_model).get_parents(node) + add_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(node)[0] + bias_name = None + for inp in add_node.input: + if inp.endswith('.bias'): + bias_name = inp + if not bias_name: # pragma: no cover + continue + + for parent in parents: + grand_parent = ortq.onnx_model.ONNXModel(int8_onnx_model).get_parents(parent) + if grand_parent: + replace_input[parent.output[0]] = grand_parent[0].input[0] + + int8_onnx_model = sub_graph_with_int32_bias(int8_onnx_model, + node, + parents[0].input[:3], + parents[1].input[:3], + bias_name, + add_node.output[0]) + remove_nodes.add(node.name) + remove_nodes.add(parents[0].name) + remove_nodes.add(parents[1].name) + remove_nodes.add(add_node.name) + int8_onnx_model = remove_nodes_by_name(int8_onnx_model, remove_nodes) + for node in int8_onnx_model.graph.node: # pragma: no cover + for i in range(len(node.input)): + if node.input[i] in replace_input: + node.input[i] = replace_input[node.input[i]] + elif quant_format == ortq.QuantFormat.QOperator: + remove_nodes = set() + for node in int8_onnx_model.graph.node: + if node.op_type == 'QLinearMatMul': + dequantizelinear_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(node)[0] + add_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(dequantizelinear_node)[0] + + bias_name = None + for inp in add_node.input: + if inp.endswith('.bias'): + bias_name = inp + if not bias_name: # pragma: no cover + continue + + int8_onnx_model = sub_graph_with_int32_bias(int8_onnx_model, + node, + node.input[:3], + node.input[3:6], + bias_name, + add_node.output[0]) + remove_nodes.add(node.name) + remove_nodes.add(add_node.name) + remove_nodes.add(dequantizelinear_node.name) + + int8_onnx_model = remove_nodes_by_name(int8_onnx_model, remove_nodes) + return int8_onnx_model + +def qdq_fp32_bias_qdq( + int8_onnx_model, + quantize_nodes, + quant_format, +): + """Excute post-process on onnx int8 model with recipe 'QDQ_OP_FP32_BIAS_QDQ'. + + Insert QDQ before and after quantizable op and using fp32 bias. + + Args: + int8_onnx_model (ModelProto): onnx int8 model to process. + quantize_nodes (list): quantize nodes list. + quant_format (QuantFormat): quantization format. + + Returns: + int8_onnx_model: processed onnx int8 model. + """ + # For QDQ quantization format, nn.quantized.Linear module will be + # converted to the following format: + # QuantizeLinear + # | + # DequantizeLinear + # | + # MatMul + # | + # Add + # | + # QuantizeLinear + # | + # DequantizeLinear + # + # For QOperator quantization format, nn.quantized.Lienar module will be + # converted to the following format: + # QuantizeLinear + # | + # MatMulIntegerToFloat + # | + # Add + # | + # QuantizeLinear + # | + # DequantizeLinear + if quant_format == ortq.QuantFormat.QDQ: + for node in int8_onnx_model.graph.node: + if node.name in quantize_nodes and node.op_type == 'MatMul': + quantizelinear_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(node)[0] + deqauntizelinear_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(quantizelinear_node)[0] + add_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(deqauntizelinear_node)[0] + deqauntizelinear_node.output[0] = add_node.output[0] + add_node.output[0] = add_node.output[0] + '_add' + for i in range(len(add_node.input)): + if not add_node.input[i].endswith('.bias'): + add_node.input[i] = node.output[0] + quantizelinear_node.input[0] = add_node.output[0] + elif quant_format == ortq.QuantFormat.QOperator: + import copy + remove_nodes = set() + for node in int8_onnx_model.graph.node: + if node.op_type == 'QLinearMatMul': + dequantizelinear_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(node)[0] + add_node = ortq.onnx_model.ONNXModel(int8_onnx_model).get_children(dequantizelinear_node)[0] + a, a_scale, a_zero_point, b, b_scale, b_zero_point, y_scale, y_zero_point = node.input[:8] + matmulintegertofloat_node = onnx.helper.make_node("MatMulIntegerToFloat", + inputs=[a, b, a_scale, b_scale, a_zero_point, b_zero_point], + outputs=[node.output[0]], + name=node.name + '_matmulintegertofloat', + domain='com.microsoft') + + for idx in range(len(add_node.input)): + if add_node.input[idx] == dequantizelinear_node.output[0]: + add_node.input[idx] = node.output[0] + + quantizelinear_node = onnx.helper.make_node("QuantizeLinear", + inputs=[add_node.output[0] +'_add', y_scale, y_zero_point], + outputs=[node.output[0] + '_quantizelinear'], + name=node.name + '_quantizelinear') + + dequantizelinear_node.input[0] = node.output[0] + '_quantizelinear' + dequantizelinear_node.output[0] = copy.deepcopy(add_node.output[0]) + add_node.output[0] = add_node.output[0] +'_add' + + remove_nodes.add(node.name) + int8_onnx_model.graph.node.extend([matmulintegertofloat_node, quantizelinear_node]) + + int8_onnx_model = remove_nodes_by_name(int8_onnx_model, remove_nodes) + return int8_onnx_model + +def torch_to_fp32_onnx( + fp32_model, + save_path, + example_inputs, + opset_version=14, + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + input_names=None, + output_names=None, + do_constant_folding=True, + verbose=True, +): + """Export FP32 PyTorch model into FP32 ONNX model. + + Args: + fp32_model (torch.nn.module): fp32 model. + int8_model (torch.nn.module): int8 model. + save_path (str): save path of ONNX model. + example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. + opset_version (int, optional): opset version. Defaults to 14. + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, + "output": {0: "batch_size"}}. + input_names (list, optional): input names. Defaults to None. + output_names (list, optional): output names. Defaults to None. + do_constant_folding (bool, optional): do constant folding or not. Defaults to True. + verbose (bool, optional): dump verbose or not. Defaults to True. + """ + if input_names: + example_input_names = input_names + else: + example_input_names = ['input'] + if isinstance(example_inputs, dict) or isinstance(example_inputs, UserDict): + example_input_names = list(example_inputs.keys()) + + torch.onnx.export( + fp32_model, + input2tuple(example_inputs), + save_path, + opset_version=opset_version, + input_names=example_input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + do_constant_folding=do_constant_folding, + ) + if verbose: + info = "The FP32 ONNX Model exported to path: {0}".format(save_path) + logger.info("*"*len(info)) + logger.info(info) + logger.info("*"*len(info)) + + +def torch_to_int8_onnx( + fp32_model, + int8_model, + q_config, + save_path, + example_inputs, + opset_version: int = 14, + dynamic_axes: dict = {"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + input_names=None, + output_names=None, + quant_format: str = 'QDQ', + dtype: str = 'U8S8', + recipe: str = 'QDQ_OP_FP32_BIAS', +): + """Export INT8 PyTorch model into INT8 ONNX model. + + Args: + fp32_model (torch.nn.module): fp32 model. + int8_model (torch.nn.module): int8 model. + q_config (dict): containing quantization configuration. + save_path (str): save path of ONNX model. + example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. + opset_version (int, optional): opset version. Defaults to 14. + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, + "output": {0: "batch_size"}}. + input_names (list, optional): input names. Defaults to None. + output_names (list, optional): output names. Defaults to None. + quant_format (str, optional): quantization format of ONNX model. Defaults to 'QDQ'. + dtype (str, optional): data types of activation and weight of ONNX model. Defaults to 'U8S8'. + recipe (str, optionl): Recipe for processing nn.quantized.Linear module. + 'QDQ_OP_FP32_BIAS': inserting QDQ before quantizable op and using fp32 bias. + 'QDQ_OP_INT32_BIAS': inserting QDQ before quantizable op and using int32 bias. + 'QDQ_OP_FP32_BIAS_QDQ': inserting QDQ before and after quantizable op and using fp32 bias. + Defaults to 'QDQ_OP_FP32_BIAS'. + """ + global op_types_to_quantize + if q_config['approach'] == 'post_training_dynamic_quant': + op_types_to_quantize=['MatMul', 'Gemm', 'Gather'] + else: + op_types_to_quantize=['MatMul', 'Gemm', 'Gather', 'Conv'] + + if quant_format == 'QDQ' and opset_version < 13: # pragma: no cover + opset_version = 13 + logger.warning("QDQ format requires opset_version >= 13, " + + "we reset opset_version={} here".format(opset_version)) + + # pylint: disable=E1101 + fp32_onnx_path = save_path + '.tmp' if save_path else 'int8-model.onnx.tmp' + torch_to_fp32_onnx( + fp32_model, + fp32_onnx_path, + example_inputs, + opset_version=opset_version, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + verbose=False, + ) + + activation_type, weight_type = set_data_type(dtype) + module_node_mapping = get_node_mapping(fp32_model, fp32_onnx_path) + quantize_nodes = get_quantizable_onnx_ops(int8_model, module_node_mapping) + + if q_config['approach'] == 'quant_aware_training': + update_weight_bias(int8_model, fp32_onnx_path) + if q_config['approach'] != 'post_training_dynamic_quant': + int8_scale_info = q_config['scale_info'] + scale_mapping = build_scale_mapping(fp32_onnx_path, module_node_mapping, int8_scale_info) + + quant_format = ortq.QuantFormat.QOperator if quant_format != 'QDQ' else ortq.QuantFormat.QDQ + + extra_options = {'OpTypesToExcludeOutputQuantizatioin': ['MatMul']} \ + if (recipe != 'QDQ_OP_FP32_BIAS_QDQ' and quant_format == ortq.QuantFormat.QDQ) else {} + + if q_config['approach'] == 'post_training_dynamic_quant': + logger.info("Quantization format is not avalible when executing dynamic quantization.") + ortq.quantize_dynamic( + fp32_onnx_path, + save_path, + per_channel=True, + weight_type=weight_type, + nodes_to_quantize=quantize_nodes, + nodes_to_exclude=[], + extra_options={} + ) + + else: + from .utils import DummyDataReader + dummy_datareader = DummyDataReader(fp32_onnx_path) + ortq.quantize_static( + fp32_onnx_path, + save_path, + dummy_datareader, + quant_format=quant_format, + per_channel=True, + weight_type=weight_type, + activation_type=activation_type, + nodes_to_quantize=quantize_nodes, + nodes_to_exclude=[], + extra_options=extra_options, + ) + + int8_onnx_model = recalculate_bias(save_path, scale_mapping, quantize_nodes, quant_format) + int8_onnx_model = set_scale_info(int8_onnx_model, scale_mapping, activation_type) + + if recipe == 'QDQ_OP_FP32_BIAS': + int8_onnx_model = qdq_fp32_bias(int8_onnx_model, quant_format) + elif recipe == 'QDQ_OP_INT32_BIAS': + int8_onnx_model = qdq_int32_bias(int8_onnx_model, quantize_nodes, quant_format) + elif recipe == 'QDQ_OP_FP32_BIAS_QDQ': + int8_onnx_model = qdq_fp32_bias_qdq(int8_onnx_model, quantize_nodes, quant_format) + + onnx.save(int8_onnx_model, save_path) + + os.remove(fp32_onnx_path) + info = "The INT8 ONNX Model is exported to path: {0}".format(save_path) + logger.info("*"*len(info)) + logger.info(info) + logger.info("*"*len(info)) diff --git a/neural_compressor/experimental/export/utils.py b/neural_compressor/experimental/export/utils.py new file mode 100644 index 00000000000..08d1b59e9a9 --- /dev/null +++ b/neural_compressor/experimental/export/utils.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions to export model from PyTorch/TensorFlow to ONNX.""" + +import numpy as np +from neural_compressor.utils.utility import LazyImport + +ort = LazyImport('onnxruntime') +ortq = LazyImport('onnxruntime.quantization') + + +def ONNX2Numpy_dtype(onnx_node_type): + """Get Numpy data type from onnx data type. + + Args: + onnx_node_type (str): data type description. + + Returns: + dtype: numpy data type + """ + # Only record sepcial data type + ONNX2Numpy_dtype_mapping = { + "tensor(float)": np.float32, + "tensor(double)": np.float64, + } + if onnx_node_type in ONNX2Numpy_dtype_mapping: + dtype = ONNX2Numpy_dtype_mapping[onnx_node_type] + return dtype + else: + tmp = onnx_node_type.lstrip('tensor(').rstrip(')') + dtype = eval(f'np.{tmp}') + return dtype + + +class DummyDataReader(ortq.CalibrationDataReader): + """Build dummy datareader for onnx static quantization.""" + + def __init__(self, fp32_onnx_path): + """Initialize data reader. + + Args: + fp32_onnx_path (str): path to onnx file + """ + session = ort.InferenceSession(fp32_onnx_path, None) + input_tensors = session.get_inputs() + input = {} + for node in input_tensors: + shape = [] + for dim in node.shape: + shape.append(dim if isinstance(dim, int) else 1) + dtype = ONNX2Numpy_dtype(node.type) + input[node.name] = np.ones(shape).astype(dtype) + self.data = [input] + self.data = iter(self.data) + + def get_next(self): + """Generate next data.""" + return next(self.data, None) diff --git a/neural_compressor/experimental/graph_optimization.py b/neural_compressor/experimental/graph_optimization.py index 5940db61d84..70d65084cdf 100644 --- a/neural_compressor/experimental/graph_optimization.py +++ b/neural_compressor/experimental/graph_optimization.py @@ -28,9 +28,10 @@ from ..strategy import STRATEGIES from ..utils import logger from ..utils.create_obj_from_config import create_dataloader -from ..utils.utility import CpuInfo, time_limit, set_backend +from ..utils.utility import CpuInfo, time_limit from .common import Model as NCModel from ..model import BaseModel +from ..model.model import get_model_fwk_name class Graph_Optimization(): """Graph_Optimization class. @@ -70,7 +71,6 @@ def __init__(self, conf_fname_or_obj=None): cfg = self.conf.usr_cfg if cfg.model.framework != 'NA': self.framework = cfg.model.framework.lower() - set_backend(self.framework) cfg.tuning.strategy.name = 'automixedprecision' seed = cfg.tuning.random_seed @@ -188,8 +188,8 @@ def __call__(self): def dataset(self, dataset_type, *args, **kwargs): """Get dataset.""" - from .data import DATASETS - return DATASETS(self.framework)[dataset_type](*args, **kwargs) + from .data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) def set_config_by_model(self, model_obj): """Set model config.""" @@ -300,15 +300,23 @@ def model(self, user_model): """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - self._model = NCModel(user_model) + if self.conf.usr_cfg.model.framework == 'NA': + self.framework = get_model_fwk_name(user_model) + if self.framework == "pytorch": + if self.conf.usr_cfg.model.backend == "default": + self.framework = "pytorch_fx" + elif self.conf.usr_cfg.model.backend == "ipex": + self.framework = "pytorch_ipex" + self.conf.usr_cfg.model.framework = self.framework + self._model = NCModel(user_model, framework=self.framework) + self.set_config_by_model(self._model) + else: + self._model = NCModel(user_model, framework=self.framework) else: + assert self.conf.usr_cfg.model.framework != 'NA', \ + "Please pass an original framework model but not neural compressor model!" self._model = user_model - if self.conf.usr_cfg.model.framework == 'NA': - self.set_config_by_model(self._model) - self.framework = self.conf.usr_cfg.model.framework.lower() - set_backend(self.framework) - @property def metric(self): """Get metric.""" diff --git a/neural_compressor/experimental/metric/metric.py b/neural_compressor/experimental/metric/metric.py index b02b52cc861..31f0550b071 100644 --- a/neural_compressor/experimental/metric/metric.py +++ b/neural_compressor/experimental/metric/metric.py @@ -113,6 +113,7 @@ def __init__(self) -> None: framework_metrics = {"tensorflow": TensorflowMetrics, "tensorflow_itex": TensorflowMetrics, + "keras": TensorflowMetrics, "mxnet": MXNetMetrics, "pytorch": PyTorchMetrics, "pytorch_ipex": PyTorchMetrics, @@ -132,6 +133,7 @@ def __init__(self) -> None: registry_metrics = {"tensorflow": TENSORFLOW_METRICS, "tensorflow_itex": TENSORFLOW_ITEX_METRICS, + "keras": TENSORFLOW_METRICS, "mxnet": MXNET_METRICS, "pytorch": PYTORCH_METRICS, "pytorch_ipex": PYTORCH_METRICS, @@ -156,7 +158,7 @@ def __init__(self, framework: str): Args: framework: The framwork name. """ - assert framework in ("tensorflow", "tensorflow_itex", + assert framework in ("tensorflow", "tensorflow_itex","keras", "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", "onnxrt_qoperator"), \ diff --git a/neural_compressor/experimental/mixed_precision.py b/neural_compressor/experimental/mixed_precision.py index 9a57f67ba63..e3abe923d5e 100644 --- a/neural_compressor/experimental/mixed_precision.py +++ b/neural_compressor/experimental/mixed_precision.py @@ -27,7 +27,7 @@ from ..strategy import STRATEGIES from ..utils import logger from ..utils.create_obj_from_config import create_dataloader -from ..utils.utility import CpuInfo, time_limit, set_backend +from ..utils.utility import CpuInfo, time_limit from ..model import BaseModel from .graph_optimization import GraphOptimization @@ -65,7 +65,6 @@ def __init__(self, conf_fname_or_obj=None): cfg = self.conf.usr_cfg if cfg.model.framework != 'NA': self.framework = cfg.model.framework.lower() - set_backend(self.framework) cfg.tuning.strategy.name = 'automixedprecision' seed = cfg.tuning.random_seed @@ -104,12 +103,8 @@ def __call__(self): if 'onnx' in self.framework and 'bf16' in self._precisions: logger.warning("Mixed precision doesn't support bf16 for ONNX models.") sys.exit(0) - elif 'onnx' not in self.framework and 'fp16' in self._precisions: - logger.warning("Mixed precision doesn't support fp16 for {} models.".format( - self.framework)) - sys.exit(0) - - if self._precisions == ['bf16'] and not CpuInfo().bf16: # pragma: no cover + + if 'bf16' in self._precisions and not CpuInfo().bf16: # pragma: no cover if os.getenv('FORCE_BF16') == '1': logger.warning("Mixed precision will generate bf16 graph although " \ "the hardware doesn't support bf16 instruction.") @@ -118,15 +113,6 @@ def __call__(self): "doesn't support bf16 instruction.") sys.exit(0) - if self._precisions == ['fp16'] and self.conf.usr_cfg.device != 'gpu': - if os.getenv('FORCE_FP16') == '1': - logger.warning("Mixed precision will generate fp16 graph although " \ - "the hardware doesn't support fp16 instruction.") - else: - logger.warning("Mixed precision exits due to the hardware " \ - "doesn't support fp16 instruction.") - sys.exit(0) - cfg = self.conf.usr_cfg if self.framework == 'tensorflow': self._model.name = cfg.model.name diff --git a/neural_compressor/experimental/model_conversion.py b/neural_compressor/experimental/model_conversion.py index 489128d93e3..6ab1edbd104 100644 --- a/neural_compressor/experimental/model_conversion.py +++ b/neural_compressor/experimental/model_conversion.py @@ -17,20 +17,14 @@ """Helps convert one model format to another.""" -import pickle -import random import tempfile -import sys import datetime -import numpy as np import yaml from neural_compressor.adaptor import FRAMEWORKS from ..conf.config import Conf from ..conf.dotdict import deep_get, deep_set, DotDict -from ..strategy import STRATEGIES from ..utils import logger from ..utils.create_obj_from_config import create_dataloader, create_eval_func -from ..utils.utility import CpuInfo, set_backend from .common import Model as NCModel from ..model import BaseModel @@ -85,8 +79,6 @@ def __init__(self, conf_fname_or_obj=None): else: self.conf = None - set_backend(self.framework) - def __call__(self): """Execute model conversion process. @@ -162,8 +154,8 @@ def dataset(self, dataset_type, *args, **kwargs): Returns: class: dataset class """ - from .data import DATASETS - return DATASETS(self.framework)[dataset_type](*args, **kwargs) + from .data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) @property def source(self): diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 5f061f6006f..3c63e19d27f 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -15,8 +15,8 @@ # limitations under the License. import os -import pandas as pd +import pandas as pd from neural_compressor.conf.config import Conf, NASConfig from neural_compressor.utils import logger @@ -37,36 +37,73 @@ class DyNAS(NASBase): def __init__(self, conf_fname_or_obj): """Initialize the attributes.""" - from .dynast.dynas_manager import ParameterManager + from .dynast.dynas_manager import (ParameterManager, + TransformerLTEncoding) from .dynast.dynas_predictor import Predictor - from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager + from .dynast.dynas_search import (ProblemMultiObjective, + SearchAlgoManager) from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3, - EvaluationInterfaceResNet50, OFARunner) + EvaluationInterfaceResNet50, + EvaluationInterfaceTransformerLT, + OFARunner, TransformerLTRunner) + self.ParameterManager = ParameterManager self.Predictor = Predictor self.ProblemMultiObjective = ProblemMultiObjective self.SearchAlgoManager = SearchAlgoManager - self.OFARunner = OFARunner self.SUPERNET_PARAMETERS = { - 'ofa_resnet50': - {'d' : {'count' : 5, 'vars' : [0, 1, 2]}, - 'e' : {'count' : 18, 'vars' : [0.2, 0.25, 0.35]}, - 'w' : {'count' : 6, 'vars' : [0, 1, 2]} }, - 'ofa_mbv3_d234_e346_k357_w1.0': - {'ks' : {'count' : 20, 'vars' : [3, 5, 7]}, - 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, - 'd' : {'count' : 5, 'vars' : [2, 3, 4]} }, - 'ofa_mbv3_d234_e346_k357_w1.2': - {'ks' : {'count' : 20, 'vars' : [3, 5, 7]}, - 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, - 'd' : {'count' : 5, 'vars' : [2, 3, 4]} } - } + 'ofa_resnet50': { + 'd': {'count': 5, 'vars': [0, 1, 2]}, + 'e': {'count': 18, 'vars': [0.2, 0.25, 0.35]}, + 'w': {'count': 6, 'vars': [0, 1, 2]}, + }, + 'ofa_mbv3_d234_e346_k357_w1.0': { + 'ks': {'count': 20, 'vars': [3, 5, 7]}, + 'e': {'count': 20, 'vars': [3, 4, 6]}, + 'd': {'count': 5, 'vars': [2, 3, 4]}, + }, + 'ofa_mbv3_d234_e346_k357_w1.2': { + 'ks': {'count': 20, 'vars': [3, 5, 7]}, + 'e': {'count': 20, 'vars': [3, 4, 6]}, + 'd': {'count': 5, 'vars': [2, 3, 4]}, + }, + 'transformer_lt_wmt_en_de': { + 'encoder_embed_dim': {'count': 1, 'vars': [640, 512]}, + 'decoder_embed_dim': {'count': 1, 'vars': [640, 512]}, + 'encoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]}, + 'decoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]}, + 'decoder_layer_num': {'count': 1, 'vars': [6, 5, 4, 3, 2, 1]}, + 'encoder_self_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_self_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_ende_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_arbitrary_ende_attn': {'count': 6, 'vars': [-1, 1, 2]}, + }, + } + self.RUNNERS = { + 'ofa_resnet50': OFARunner, + 'ofa_mbv3_d234_e346_k357_w1.0': OFARunner, + 'ofa_mbv3_d234_e346_k357_w1.2': OFARunner, + 'transformer_lt_wmt_en_de': TransformerLTRunner, + } + self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50, 'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3, - 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3} + 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3, + 'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT} + self.LINAS_INNERLOOP_EVALS = {'ofa_resnet50': 5000, 'ofa_mbv3_d234_e346_k357_w1.0': 20000, - 'ofa_mbv3_d234_e346_k357_w1.2': 20000} + 'ofa_mbv3_d234_e346_k357_w1.2': 20000, + 'transformer_lt_wmt_en_de': 10000} + + self.SUPERNET_ENCODING = { + 'ofa_resnet50': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager, + 'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager, + 'transformer_lt_wmt_en_de': TransformerLTEncoding, + } + super().__init__() self.acc_predictor = None self.macs_predictor = None @@ -74,7 +111,6 @@ def __init__(self, conf_fname_or_obj): self.results_csv_path = None self.init_cfg(conf_fname_or_obj) - def estimate(self, individual): """Estimate performance of the model. @@ -85,19 +121,19 @@ def estimate(self, individual): def init_for_search(self): """Initialize the search configuration.""" - self.supernet_manager = self.ParameterManager( - param_dict=self.SUPERNET_PARAMETERS[self.supernet], - seed=self.seed + self.supernet_manager = self.SUPERNET_ENCODING[self.supernet]( + param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed ) # Validation High-Fidelity Measurement Runner - self.runner_validate = self.OFARunner( + self.runner_validate = self.RUNNERS[self.supernet]( supernet=self.supernet, acc_predictor=None, macs_predictor=None, latency_predictor=None, - imagenetpath=self.dataset_path, + datasetpath=self.dataset_path, batch_size=self.batch_size, + checkpoint_path=self.supernet_ckpt_path, ) # Setup validation interface @@ -121,16 +157,23 @@ def search(self): # Randomly sample search space for initial population # if number of results in results_csv_path smaller than population. + + if not os.path.exists(self.results_csv_path): + # Clear also creates empty CSV file. + self.validation_interface.clear_csv() + df = pd.read_csv(self.results_csv_path) - latest_population = [self.supernet_manager.random_sample() \ - for _ in range(max(self.population - df.shape[0], 0))] + latest_population = [self.supernet_manager.random_sample() + for _ in range(max(self.population - df.shape[0], 0))] # Start Lightweight Iterative Neural Architecture Search (LINAS) num_loops = round(self.num_evals/self.population) for loop in range(num_loops): - logger.info('[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops)) - for individual in latest_population: + for i, individual in enumerate(latest_population): + logger.info( + '[DyNAS-T] Starting eval {} of {} in LINAS loop {} of {}.'.format( + i+1, len(latest_population), loop+1, num_loops)) self.validation_interface.eval_subnet(individual) self.create_acc_predictor() @@ -138,13 +181,14 @@ def search(self): self.create_latency_predictor() # Inner-loop Low-Fidelity Predictor Runner, need to re-instantiate every loop - runner_predict = self.OFARunner( + runner_predict = self.RUNNERS[self.supernet]( supernet=self.supernet, acc_predictor=self.acc_predictor, macs_predictor=self.macs_predictor, latency_predictor=self.latency_predictor, - imagenetpath=self.dataset_path, + datasetpath=self.dataset_path, batch_size=self.batch_size, + checkpoint_path=self.supernet_ckpt_path ) # Setup validation interface @@ -153,7 +197,7 @@ def search(self): manager=self.supernet_manager, metrics=self.metrics, csv_path=None, - predictor_mode = True + predictor_mode=True ) problem = self.ProblemMultiObjective( @@ -163,19 +207,22 @@ def search(self): ) if self.search_algo == 'age': - search_manager = self.SearchAlgoManager(algorithm='age', seed=self.seed) + search_manager = self.SearchAlgoManager( + algorithm='age', seed=self.seed) search_manager.configure_age(population=self.population, - num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) + num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) else: - search_manager = self.SearchAlgoManager(algorithm='nsga2', seed=self.seed) + search_manager = self.SearchAlgoManager( + algorithm='nsga2', seed=self.seed) search_manager.configure_nsga2(population=self.population, - num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) + num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) results = search_manager.run_search(problem) latest_population = results.pop.get('X') - logger.info("[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path)) + logger.info( + "[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path)) output = list() for individual in latest_population: @@ -193,11 +240,11 @@ def select_model_arch(self): # pragma: no cover def create_acc_predictor(self): """Create the accuracy predictor.""" if 'acc' in self.metrics: - logger.info('Building Accuracy Predictor') + logger.info('[DyNAS-T] Building Accuracy Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='acc', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.acc_predictor = self.Predictor() self.acc_predictor.train(features, labels.ravel()) @@ -207,11 +254,11 @@ def create_acc_predictor(self): def create_macs_predictor(self): """Create the MACs predictor.""" if 'macs' in self.metrics: - logger.info('Building MACs Predictor') + logger.info('[DyNAS-T] Building MACs Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='macs', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.macs_predictor = self.Predictor() self.macs_predictor.train(features, labels.ravel()) @@ -221,11 +268,11 @@ def create_macs_predictor(self): def create_latency_predictor(self): """Create the latency predictor.""" if 'lat' in self.metrics: - logger.info('Building Latency Predictor') + logger.info('[DyNAS-T] Building Latency Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='lat', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.latency_predictor = self.Predictor() self.latency_predictor.train(features, labels.ravel()) @@ -240,11 +287,11 @@ def init_cfg(self, conf_fname_or_obj): elif isinstance(conf_fname_or_obj, NASConfig): conf_fname_or_obj.validate() self.conf = conf_fname_or_obj.usr_cfg - else: # pragma: no cover + else: # pragma: no cover raise NotImplementedError( "Please provide a str path to the config file or an object of NASConfig." ) - #self.init_search_cfg(self.conf.nas) + # self.init_search_cfg(self.conf.nas) assert 'dynas' in self.conf.nas, "Must specify dynas section." dynas_config = self.conf.nas.dynas self.search_algo = self.conf.nas.search.search_algorithm @@ -253,8 +300,9 @@ def init_cfg(self, conf_fname_or_obj): self.num_evals = dynas_config.num_evals self.results_csv_path = dynas_config.results_csv_path self.dataset_path = dynas_config.dataset_path + self.supernet_ckpt_path = dynas_config.supernet_ckpt_path self.batch_size = dynas_config.batch_size - if dynas_config.population < 10: # pragma: no cover + if dynas_config.population < 10: # pragma: no cover raise NotImplementedError( "Please specify a population size >= 10" ) diff --git a/neural_compressor/experimental/nas/dynast/__init__.py b/neural_compressor/experimental/nas/dynast/__init__.py index eeb58f6f0a7..c93d1e9d016 100644 --- a/neural_compressor/experimental/nas/dynast/__init__.py +++ b/neural_compressor/experimental/nas/dynast/__init__.py @@ -15,4 +15,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 3a4868d41b5..4ba28d087be 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -23,9 +23,8 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split - from neural_compressor.utils import logger +from sklearn.model_selection import train_test_split class ParameterManager: @@ -67,21 +66,25 @@ def process_param_dict(self) -> Tuple[list, list, int]: for i in range(options['count']): parameter_upperbound.append(len(options['vars']) - 1) index_simple = [x for x in range(len(options['vars']))] - parameter_mapper.append(dict(zip(index_simple, options['vars']))) + parameter_mapper.append( + dict(zip(index_simple, options['vars']))) - if self.verbose: # pragma: no cover + if self.verbose: # pragma: no cover logger.info( - '[DyNAS-T] Problem definition variables: {}'.format(parameter_count) + '[DyNAS-T] Problem definition variables: {}'.format( + parameter_count) ) logger.info( - '[DyNAS-T] Variable Upper Bound array: {}'.format(parameter_upperbound) + '[DyNAS-T] Variable Upper Bound array: {}'.format( + parameter_upperbound) ) logger.info( '[DyNAS-T] Mapping dictionary created of length: {}'.format( len(parameter_mapper) ) ) - logger.info('[DyNAS-T] Parameter Bound: {}'.format(parameter_bound)) + logger.info( + '[DyNAS-T] Parameter Bound: {}'.format(parameter_bound)) return parameter_mapper, parameter_upperbound, parameter_count @@ -142,7 +145,8 @@ def random_samples(self, size: int = 100, trial_limit: int = 100000) -> List[lis trials += 1 if trials >= trial_limit: - logger.warning('[DyNAS-T] Unable to create unique list of samples.') + logger.warning( + '[DyNAS-T] Unable to create unique list of samples.') return pymoo_vector_list @@ -172,7 +176,8 @@ def translate2pymoo(self, parameters: dict) -> list: param_counter = 0 for i in range(value['count']): output.append( - self.inv_mapper[mapper_counter][parameters[key][param_counter]] + self.inv_mapper[mapper_counter][parameters[key] + [param_counter]] ) mapper_counter += 1 param_counter += 1 @@ -277,3 +282,193 @@ def create_training_set( ) ) return features_train, features_test, labels_train, labels_test + + +class TransformerLTEncoding(ParameterManager): #noqa: D101 + def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): #noqa: D107 + super().__init__(param_dict, verbose, seed) + + def onehot_custom(self, subnet_cfg, provide_onehot=True): #noqa: D102 + + features = [] + features.extend(subnet_cfg['encoder_embed_dim']) + + encode_layer_num_int = 6 + + # Encoder FFN Embed Dim + encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim'] + + if encode_layer_num_int < 6: + encoder_ffn_embed_dim.extend([0]*(6-encode_layer_num_int)) + features.extend(encoder_ffn_embed_dim) + + # Encoder Self-Attn Heads + + encoder_self_attention_heads = subnet_cfg['encoder_self_attention_heads'][:encode_layer_num_int] + + if encode_layer_num_int < 6: + encoder_self_attention_heads.extend([0]*(6-encode_layer_num_int)) + features.extend(encoder_self_attention_heads) + + features.extend(subnet_cfg['decoder_embed_dim']) + + decoder_layer_num = subnet_cfg['decoder_layer_num'] + decoder_layer_num_int = decoder_layer_num[0] + features.extend(decoder_layer_num) + + # Decoder FFN Embed Dim + decoder_ffn_embed_dim = subnet_cfg['decoder_ffn_embed_dim'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_ffn_embed_dim.extend([0]*(6-decoder_layer_num_int)) + features.extend(decoder_ffn_embed_dim) + + # Decoder Attn Heads + decoder_self_attention_heads = subnet_cfg['decoder_self_attention_heads'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int)) + features.extend(decoder_self_attention_heads) + + # Decoder ENDE HEADS + + decoder_ende_attention_heads = subnet_cfg['decoder_ende_attention_heads'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int)) + + features.extend(decoder_ende_attention_heads) + + arbitrary_ende_attn_trans = [] + for i in range(decoder_layer_num_int): + if subnet_cfg['decoder_arbitrary_ende_attn'][i] == -1: + arbitrary_ende_attn_trans.append(1) + elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 1: + arbitrary_ende_attn_trans.append(2) + elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 2: + arbitrary_ende_attn_trans.append(3) + + if decoder_layer_num_int < 6: + arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int)) + features.extend(arbitrary_ende_attn_trans) + + if provide_onehot == True: + examples = np.array([features]) + one_hot_count = 0 + unique_values = self.unique_values + + for unique in unique_values: + one_hot_count += len(unique.tolist()) + + one_hot_examples = np.zeros((examples.shape[0], one_hot_count)) + for e, example in enumerate(examples): + offset = 0 + for f in range(len(example)): + index = np.where(unique_values[f] == example[f])[ + 0] + offset + one_hot_examples[e, index] = 1.0 + offset += len(unique_values[f]) + return one_hot_examples + + else: + return features + + def import_csv( + self, + filepath: str, + config: str, + objective: str, + column_names: List[str] = None, + drop_duplicates: bool = True, + ) -> pd.DataFrame: + """Import a csv file generated from a supernetwork search for the purpose of training a predictor. + + filepath - path of the csv to be imported. + config - the subnetwork configuration + objective - target/label for the subnet configuration (e.g. accuracy, latency) + column_names - a list of column names for the dataframe + df - the output dataframe that contains the original config dict, pymoo, and 1-hot + equivalent vector for training. + """ + if column_names == None: + df = pd.read_csv(filepath) + else: + df = pd.read_csv(filepath) + df.columns = column_names + df = df[[config, objective]] + # Old corner case coverage + df[config] = df[config].replace({'null': 'None'}, regex=True) + + if drop_duplicates: + df.drop_duplicates(subset=[config], inplace=True) + df.reset_index(drop=True, inplace=True) + + convert_to_dict = list() + convert_to_pymoo = list() + convert_to_onehot = list() + for i in range(len(df)): + # Elastic Param Config format + config_as_dict = ast.literal_eval(df[config].iloc[i]) + convert_to_dict.append(config_as_dict) + # PyMoo 1-D vector format + config_as_pymoo = self.translate2pymoo(config_as_dict) + convert_to_pymoo.append(config_as_pymoo) + # Onehot predictor format + config_as_onehot = self.onehot_custom( + config_as_dict, provide_onehot=False) + convert_to_onehot.append(config_as_onehot) + df[config] = convert_to_dict + df['config_pymoo'] = convert_to_pymoo + df['config_onehot'] = convert_to_onehot + + return df + + # @staticmethod + def create_training_set( + self, + dataframe: pd.DataFrame, + train_with_all: bool = True, + split: float = 0.33, + seed: bool = None, + ) -> Tuple[list, list, list, list]: + """Create a sklearn compatible test/train. + + The set is created from an imported results csv after "import_csv" method is run. + """ + collect_rows = list() + for i in range(len(dataframe)): + collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i])) + features = np.asarray(collect_rows) + labels = dataframe.drop( + columns=['config', 'config_pymoo', 'config_onehot']).values + + assert len(features) == len(labels) + one_hot_count = 0 + unique_values = [] + + for c in range(features.shape[1]): + unique_values.append(np.unique(features[:, c])) + one_hot_count += len(unique_values[-1]) + one_hot_examples = np.zeros((features.shape[0], one_hot_count)) + for e, example in enumerate(features): + offset = 0 + for f in range(len(example)): + index = np.where(unique_values[f] == example[f])[0] + offset + one_hot_examples[e, index] = 1.0 + offset += len(unique_values[f]) + + features = one_hot_examples + self.unique_values = unique_values + if train_with_all: + logger.info('[DyNAS-T] Training set size={}'.format(len(labels))) + return features, labels + else: + features_train, features_test, labels_train, labels_test = train_test_split( + features, labels, test_size=split, random_state=seed + ) + logger.info( + '[DyNAS-T] Test ({}) Train ({}) ratio is {}.'.format( + len(labels_train), len(labels_test), split + ) + ) + return features_train, features_test, labels_train, labels_test diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index fd1d80bf2b6..15b167bb86d 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -48,15 +48,25 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS, self.best_index = 0 # Create lists of regressors and associated hyper-parameters - regressors = [linear_model.Ridge(max_iter=max_iterations), - svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations)] + regressors = [ + linear_model.Ridge(max_iter=max_iterations), + svm.SVR(kernel='rbf', gamma='auto', + epsilon=0.0, max_iter=max_iterations), + ] hyper_parameters = [{'alpha': alphas}, {'C': cost_factors}] # Create list of hyper-parameter searchers self.searchers = [] for regressor, parameters in zip(regressors, hyper_parameters): - self.searchers.append(GridSearchCV(estimator=regressor, param_grid=parameters, n_jobs=-1, - scoring='neg_mean_absolute_percentage_error', verbose=SEARCHER_VERBOSITY if (verbose) else 0)) + self.searchers.append( + GridSearchCV( + estimator=regressor, + param_grid=parameters, + n_jobs=-1, + scoring='neg_mean_absolute_percentage_error', + verbose=SEARCHER_VERBOSITY if (verbose) else 0, + ) + ) def train(self, examples, labels): """Train the predictor on the specified examples and labels using the underlying regressor. @@ -65,8 +75,14 @@ def train(self, examples, labels): examples: Examples to be used for training. labels: Labels to be used for training. """ + # Compute normalization factor + max_label = np.amax(np.abs(labels)) + if max_label > 0.0: + self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0) + else: + self.normalization_factor = 1.0 + # Compute normalized labels - self.normalization_factor = 10 ** (np.floor(np.log10(np.amax(labels))) - 1.0) normalized_labels = labels / self.normalization_factor # Train regressors with optimal parameters @@ -101,7 +117,7 @@ def get_parameters(self): Optimal parameter values of the underlying regressor. """ # Retrieve optimal parameters - parameters = {} + parameters = {'best_index': self.best_index} for searcher in self.searchers: regressor_name = searcher.best_estimator_.__class__.__name__ for key in searcher.best_params_: diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py index d1a0996eb7e..e8c1c15224c 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_search.py +++ b/neural_compressor/experimental/nas/dynast/dynas_search.py @@ -22,15 +22,15 @@ import autograd.numpy as anp import numpy as np import pymoo +from neural_compressor.experimental.nas.dynast.dynas_utils import \ + EvaluationInterface +from neural_compressor.utils import logger from pymoo.algorithms.moo.age import AGEMOEA from pymoo.algorithms.moo.nsga2 import NSGA2 from pymoo.core.problem import Problem from pymoo.factory import get_crossover, get_mutation, get_sampling from pymoo.optimize import minimize -from neural_compressor.experimental.nas.dynast.dynas_utils import EvaluationInterface -from neural_compressor.utils import logger - class SearchAlgoManager: """Manage the search parameters for the DyNAS-T single/multi-objective search. @@ -60,9 +60,10 @@ def __init__( elif self.algorithm == 'age': self.configure_age() self.engine = 'pymoo' - else: # pragma: no cover + else: # pragma: no cover logger.error( - '[DyNAS-T] algorithm "{}" not implemented.'.format(self.algorithm) + '[DyNAS-T] algorithm "{}" not implemented.'.format( + self.algorithm) ) raise NotImplementedError @@ -88,8 +89,10 @@ def configure_nsga2( self.algorithm_def = NSGA2( pop_size=population, sampling=sample_strategy, - crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta), - mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta), + crossover=get_crossover( + "int_sbx", prob=crossover_prob, eta=crossover_eta), + mutation=get_mutation( + "int_pm", prob=mutation_prob, eta=mutation_eta), eliminate_duplicates=True, ) @@ -116,8 +119,10 @@ def configure_age( self.algorithm_def = AGEMOEA( pop_size=population, sampling=sample_strategy, - crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta), - mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta), + crossover=get_crossover( + "int_sbx", prob=crossover_prob, eta=crossover_eta), + mutation=get_mutation( + "int_pm", prob=mutation_prob, eta=mutation_eta), eliminate_duplicates=True, ) @@ -140,7 +145,7 @@ def run_search( save_history=save_history, verbose=self.verbose, ) - else: # pragma: no cover + else: # pragma: no cover logger.error('[DyNAS-T] Invalid algorithm engine configuration!') raise NotImplementedError @@ -194,12 +199,11 @@ def _evaluate( # Measure new individuals for i in range(len(x)): - _, objective_x, objective_y = self.evaluation_interface.eval_subnet(x[i]) + _, objective_x, objective_y = self.evaluation_interface.eval_subnet( + x[i]) objective_x_arr.append(objective_x) objective_y_arr.append(objective_y) - print('.', end='', flush=True) - # Update PyMoo with evaluation data out["F"] = anp.column_stack([objective_x_arr, objective_y_arr]) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 133010ef9dc..9c9e4b1f4ce 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -27,16 +27,23 @@ import numpy as np import ofa from fvcore.nn import FlopCountAnalysis -from ofa.imagenet_classification.data_providers.imagenet import ImagenetDataProvider -from ofa.imagenet_classification.run_manager import ImagenetRunConfig, RunManager -from ofa.tutorial.flops_table import rm_bn_from_net - -from neural_compressor.experimental.nas.dynast.dynas_manager import ParameterManager +from neural_compressor.experimental.nas.dynast.dynas_manager import \ + ParameterManager from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor +# from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import ( +# compute_bleu, compute_latency, compute_macs) from neural_compressor.utils.utility import LazyImport, logger +from ofa.imagenet_classification.data_providers.imagenet import \ + ImagenetDataProvider +from ofa.imagenet_classification.run_manager import (ImagenetRunConfig, + RunManager) +from ofa.tutorial.flops_table import rm_bn_from_net torch = LazyImport('torch') torchvision = LazyImport('torchvision') +transformer_interface = LazyImport( + 'neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface' +) def get_macs( @@ -171,8 +178,9 @@ def __init__( acc_predictor: Predictor, macs_predictor: Predictor, latency_predictor: Predictor, - imagenetpath: str, + datasetpath: str, batch_size: int, + **kwargs, ) -> None: """Initialize the attributes.""" self.supernet = supernet @@ -181,7 +189,7 @@ def __init__( self.latency_predictor = latency_predictor self.device = 'cpu' self.test_size = None - ImagenetDataProvider.DEFAULT_PATH = imagenetpath + ImagenetDataProvider.DEFAULT_PATH = datasetpath self.ofa_network = ofa.model_zoo.ofa_net(supernet, pretrained=True) self.run_config = ImagenetRunConfig(test_batch_size=64, n_worker=20) self.batch_size = batch_size @@ -251,7 +259,8 @@ def validate_top1( run_manager.reset_running_statistics(net=subnet) # Test sampled subnet - self.run_config.data_provider.assign_active_img_size(subnet_cfg['r'][0]) + self.run_config.data_provider.assign_active_img_size( + subnet_cfg['r'][0]) loss, acc = run_manager.validate(net=subnet, no_logs=False) top1 = acc[0] return top1 @@ -271,7 +280,7 @@ def validate_macs( model = self.get_subnet(subnet_cfg) input_size = (self.batch_size, 3, 224, 224) macs = get_macs(model=model, input_size=input_size, device=self.device) - logger.info('Model\'s macs: {}'.format(macs)) + logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) return macs @torch.no_grad() @@ -299,7 +308,8 @@ def measure_latency( measure_steps=measure_steps, device=self.device, ) - logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + logger.info( + '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) return latency_mean, latency_std @@ -329,6 +339,95 @@ def get_subnet( return self.subnet +class TransformerLTRunner(Runner): #noqa: D101 + + def __init__( + self, + supernet: str, + acc_predictor: Predictor, + macs_predictor: Predictor, + latency_predictor: Predictor, + datasetpath: str, + batch_size: int, + checkpoint_path: str, + **kwargs, + ) -> None: #noqa: D107 + self.supernet = supernet + self.acc_predictor = acc_predictor + self.macs_predictor = macs_predictor + self.latency_predictor = latency_predictor + self.device = 'cpu' + self.test_size = None + self.batch_size = batch_size + self.dataset_path = datasetpath + self.checkpoint_path = checkpoint_path + + def estimate_accuracy_bleu( + self, + subnet_cfg: dict, + ) -> float: #noqa: D102 + top1 = self.acc_predictor.predict(subnet_cfg) + return top1 + + def estimate_macs( + self, + subnet_cfg: dict, + ) -> int: #noqa: D102 + macs = self.macs_predictor.predict(subnet_cfg) + return macs + + def estimate_latency( + self, + subnet_cfg: dict, + ) -> float: #noqa: D102 + latency = self.latency_predictor.predict(subnet_cfg) + return latency + + def validate_bleu( + self, + subnet_cfg: dict, + ) -> float: #noqa: D102 + + bleu = transformer_interface.compute_bleu(subnet_cfg, self.dataset_path, + self.checkpoint_path) + return bleu + + def validate_macs( + self, + subnet_cfg: dict, + ) -> float: + """Measure Torch model's FLOPs/MACs as per FVCore calculation. + + Args: + subnet_cfg: sub-network Torch model + Returns: + `macs` + """ + macs = transformer_interface.compute_macs(subnet_cfg, self.dataset_path) + logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) + + return macs + + @torch.no_grad() + def measure_latency( + self, + subnet_cfg: dict, + ) -> Tuple[float, float]: + """Measure model's latency. + + Args: + subnet_cfg: sub-network Torch model + Returns: + mean latency; std latency + """ + latency_mean, latency_std = transformer_interface.compute_latency( + subnet_cfg, self.dataset_path, self.batch_size) + logger.info( + '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + + return latency_mean, latency_std + + class EvaluationInterface: """Evaluation Interface class. @@ -368,7 +467,8 @@ def clear_csv(self) -> None: if self.csv_path: f = open(self.csv_path, "w") writer = csv.writer(f) - result = ['Sub-network', 'Date', 'Latency (ms)', ' MACs', 'Top-1 Acc (%)'] + result = ['Sub-network', 'Date', + 'Latency (ms)', 'MACs', 'Top-1 Acc (%)'] writer.writerow(result) f.close() @@ -484,11 +584,14 @@ def eval_subnet( # Always evaluate/predict top1 lat, macs = 0, 0 if self.predictor_mode == True: - top1 = self.evaluator.estimate_accuracy_top1(self.manager.onehot_generic(x).reshape(1,-1))[0] + top1 = self.evaluator.estimate_accuracy_top1( + self.manager.onehot_generic(x).reshape(1, -1))[0] if 'macs' in self.metrics: - macs = self.evaluator.estimate_macs(self.manager.onehot_generic(x).reshape(1,-1))[0] + macs = self.evaluator.estimate_macs( + self.manager.onehot_generic(x).reshape(1, -1))[0] if 'lat' in self.metrics: - lat = self.evaluator.estimate_latency(self.manager.onehot_generic(x).reshape(1,-1))[0] + lat = self.evaluator.estimate_latency( + self.manager.onehot_generic(x).reshape(1, -1))[0] else: top1 = self.evaluator.validate_top1(subnet_sample) macs = self.evaluator.validate_macs(subnet_sample) @@ -510,6 +613,84 @@ def eval_subnet( return sample, macs, -top1 +class EvaluationInterfaceTransformerLT(EvaluationInterface): #noqa: D101 + def __init__( + self, + evaluator: Runner, + manager: ParameterManager, + metrics=['acc', 'macs'], + predictor_mode=False, + csv_path=None, + ) -> None: #noqa: D107 + super().__init__(evaluator, manager, metrics, predictor_mode, csv_path) + + def eval_subnet( + self, + x: list, + ) -> Tuple[dict, float, float]: #noqa: D102 + # PyMoo vector to Elastic Parameter Mapping + param_dict = self.manager.translate2param(x) + + sample = { + 'encoder': { + 'encoder_embed_dim': param_dict['encoder_embed_dim'][0], + 'encoder_layer_num': 6, # param_dict['encoder_layer_num'][0], + 'encoder_ffn_embed_dim': param_dict['encoder_ffn_embed_dim'], + 'encoder_self_attention_heads': param_dict['encoder_self_attention_heads'], + }, + 'decoder': { + 'decoder_embed_dim': param_dict['decoder_embed_dim'][0], + 'decoder_layer_num': param_dict['decoder_layer_num'][0], + 'decoder_ffn_embed_dim': param_dict['decoder_ffn_embed_dim'], + 'decoder_self_attention_heads': param_dict['decoder_self_attention_heads'], + 'decoder_ende_attention_heads': param_dict['decoder_ende_attention_heads'], + 'decoder_arbitrary_ende_attn': param_dict['decoder_arbitrary_ende_attn'] + } + } + + subnet_sample = copy.deepcopy(sample) + + # Always evaluate/predict top1 + lat, macs = 0, 0 + if self.predictor_mode == True: + bleu = self.evaluator.estimate_accuracy_bleu( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] + if 'macs' in self.metrics: + macs = self.evaluator.estimate_macs( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] + if 'lat' in self.metrics: + lat = self.evaluator.estimate_latency( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] + else: + bleu = self.evaluator.validate_bleu(subnet_sample) + macs = self.evaluator.validate_macs(subnet_sample) + if 'lat' in self.metrics: + lat, _ = self.evaluator.measure_latency(subnet_sample) + + if self.csv_path: + with open(self.csv_path, 'a') as f: + writer = csv.writer(f) + date = str(datetime.now()) + result = [param_dict, date, lat, macs, bleu, ] + writer.writerow(result) + + # PyMoo only minimizes objectives, thus accuracy needs to be negative + # Requires format: subnetwork, objective x, objective y + if 'lat' in self.metrics: + return sample, lat, -bleu + else: + return sample, macs, -bleu + + def clear_csv(self) -> None: #noqa: D102 + if self.csv_path: + f = open(self.csv_path, "w") + writer = csv.writer(f) + result = ['Sub-network', 'Date', + 'Latency (ms)', 'MACs', 'BLEU'] + writer.writerow(result) + f.close() + + def get_torchvision_model( model_name: str, ) -> torch.nn.Module: @@ -525,14 +706,15 @@ def get_torchvision_model( model = getattr(torchvision.models, model_name)(pretrained=True) model.eval() return model - except AttributeError as ae: # pragma: no cover + except AttributeError as ae: # pragma: no cover logger.error( 'Model {model_name} not available. This can be due to either a typo or the model is not ' 'available in torchvision=={torchvision_version}. \nAvailable models: {available_models}'.format( model_name=model_name, torchvision_version=torchvision.__version__, available_models=', '.join( - [m for m in dir(torchvision.models) if not m.startswith('_')] + [m for m in dir(torchvision.models) + if not m.startswith('_')] ), ) ) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py new file mode 100644 index 00000000000..451e864f2c7 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains all code related to the supernets.""" \ No newline at end of file diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py new file mode 100644 index 00000000000..9003687dcb2 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains all code related to the machine translation (Transformer LT) supernet.""" diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py new file mode 100644 index 00000000000..1a5c9739372 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -0,0 +1,638 @@ +#noqa: D100 +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Parameter +from torch.nn.modules.module import _addindent + +from neural_compressor.utils.utility import LazyImport + +fairseq = LazyImport("fairseq") + +INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0) + + +def _get_full_incremental_state_key(module_instance, key): + module_name = module_instance.__class__.__name__ + + # assign a unique ID to each module instance, so that incremental state is + # not shared across module instances + if not hasattr(module_instance, '_fairseq_instance_id'): + INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1 + module_instance._fairseq_instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name] + + return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key) + + +def get_incremental_state(module, incremental_state, key): #noqa: D102 + """Helper for getting incremental state for an nn.Module.""" + full_key = _get_full_incremental_state_key(module, key) + if incremental_state is None or full_key not in incremental_state: + return None + return incremental_state[full_key] + + +def set_incremental_state(module, incremental_state, key, value): #noqa: D102 + """Helper for setting incremental state for an nn.Module.""" + if incremental_state is not None: + full_key = _get_full_incremental_state_key(module, key) + incremental_state[full_key] = value + + +class EmbeddingSuper(nn.Embedding): #noqa: D101 + def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs): #noqa: D107 + super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs) + + # the largest embed dim + self.super_embed_dim = { + 'encoder': super_embed_dim, 'decoder': super_embed_dim} + + # the current sampled embed dim + self.sample_embed_dim = {'encoder': None, 'decoder': None} + + self.samples = {'encoder': {}, 'decoder': {}} + self.profiling = False + self.reset_parameters() + + def profile(self, mode=True): #noqa: D102 + self.profiling = mode + + def reset_parameters(self): #noqa: D102 + super().reset_parameters() + nn.init.normal_(self.weight, mean=0, std=self.embedding_dim ** -0.5) + nn.init.constant_(self.weight[self.padding_idx], 0) + + def set_sample_config(self, sample_embed_dim, part): #noqa: D102 + self.sample_embed_dim[part] = sample_embed_dim + self._sample_parameters(part) + + def _sample_parameters(self, part): + weight = self.weight[..., :self.sample_embed_dim[part]] + self.samples[part]['weight'] = weight + + return self.samples + + def sample_parameters(self, part, resample=False): #noqa: D102 + return self._sample_parameters(part) if self.profiling or resample else self.samples + + def sampled_weight(self, part): #noqa: D102 + return self.sample_parameters(part)[part]['weight'] + + def forward(self, input, part='encoder'): #noqa: D102 + return F.embedding( + input, + self.sampled_weight(part), + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) + + +class LinearSuper(nn.Linear): #noqa: D101 + def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'): #noqa: D107 + super().__init__(super_in_dim, super_out_dim, bias=bias) + + # super_in_dim and super_out_dim indicate the largest network! + self.super_in_dim = super_in_dim + self.super_out_dim = super_out_dim + + # input_dim and output_dim indicate the current sampled size + self.sample_in_dim = None + self.sample_out_dim = None + + self.samples = {} + + self._reset_parameters(bias, uniform_, non_linear) + self.profiling = False + + def profile(self, mode=True): #noqa: D102 + self.profiling = mode + + def sample_parameters(self, resample=False): #noqa: D102 + if self.profiling or resample: + return self._sample_parameters() + return self.samples + + def _reset_parameters(self, bias, uniform_, non_linear): + nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( + self.weight, non_linear=non_linear) + if bias: + nn.init.constant_(self.bias, 0.) + + def set_sample_config(self, sample_in_dim, sample_out_dim): #noqa: D102 + self.sample_in_dim = sample_in_dim + self.sample_out_dim = sample_out_dim + + self._sample_parameters() + + def _sample_parameters(self): + self.samples['weight'] = sample_weight( + self.weight, self.sample_in_dim, self.sample_out_dim) + self.samples['bias'] = self.bias + if self.bias is not None: + self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) + return self.samples + + def forward(self, x): #noqa: D102 + self.sample_parameters() + return F.linear(x, self.samples['weight'], self.samples['bias']) + + def calc_sampled_param_num(self): #noqa: D102 + assert 'weight' in self.samples.keys() + weight_numel = self.samples['weight'].numel() + + if self.samples['bias'] is not None: + bias_numel = self.samples['bias'].numel() + else: + bias_numel = 0 + + return weight_numel + bias_numel + + +def sample_weight(weight, sample_in_dim, sample_out_dim): #noqa: D103 + sample_weight = weight[:, :sample_in_dim] + sample_weight = sample_weight[:sample_out_dim, :] + + return sample_weight + + +def sample_bias(bias, sample_out_dim): #noqa: D103 + sample_bias = bias[:sample_out_dim] + + return sample_bias + + +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): #noqa: D103 + if not export and torch.cuda.is_available(): + try: + from apex.normalization import FusedLayerNorm + return FusedLayerNorm(normalized_shape, eps, elementwise_affine) + except ImportError: + pass + return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) + + +class LayerNormSuper(torch.nn.LayerNorm): #noqa: D101 + def __init__(self, super_embed_dim): #noqa: D107 + super().__init__(super_embed_dim) + + # the largest embed dim + self.super_embed_dim = super_embed_dim + + # the current sampled embed dim + self.sample_embed_dim = None + + self.samples = {} + self.profiling = False + + def profile(self, mode=True): #noqa: D102 + self.profiling = mode + + def sample_parameters(self, resample=False): #noqa: D102 + if self.profiling or resample: + return self._sample_parameters() + return self.samples + + def _sample_parameters(self): + self.samples['weight'] = self.weight[:self.sample_embed_dim] + self.samples['bias'] = self.bias[:self.sample_embed_dim] + return self.samples + + def set_sample_config(self, sample_embed_dim): # noqa: D102 + self.sample_embed_dim = sample_embed_dim + self._sample_parameters() + + def forward(self, x): # noqa: D102 + self.sample_parameters() + return F.layer_norm( + x, + (self.sample_embed_dim,), + weight=self.samples['weight'], + bias=self.samples['bias'], + eps=self.eps, + ) + + def calc_sampled_param_num(self): # noqa: D102 + assert 'weight' in self.samples.keys() + assert 'bias' in self.samples.keys() + return self.samples['weight'].numel() + self.samples['bias'].numel() + + +class MultiheadAttentionSuper(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, super_vdim=None, dropout=0., bias=True, + add_bias_kv=False, add_zero_attn=False, self_attention=False, + encoder_decoder_attention=False, out_dim=None, qkv_dim=None): # noqa: D107 + super().__init__() + + # the configs of super arch + self.super_q_embed_dim = super_embed_dim + self.super_kv_embed_dim = None + + # the configs of current sampled arch + self.sample_q_embed_dim = None + self.sample_kv_embed_dim = None + + if super_kdim is not None: + assert super_kdim == super_vdim + self.super_kv_embed_dim = super_kdim + else: + self.super_kv_embed_dim = self.super_q_embed_dim + + if qkv_dim is None: + self.qkv_dim = self.super_q_embed_dim + else: + self.qkv_dim = qkv_dim + + # this qkv same dim means the input dim for qkv are the same, not the output dim + # self.qkv_same_dim = self.kdim == self.super_embed_dim and self.vdim == self.super_embed_dim + self.qkv_same_dim = self.super_kv_embed_dim == self.super_q_embed_dim + self.encoder = is_encoder + + # Caution! these actually are the sampled num_heads, head_dim and scaling + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = self.qkv_dim // num_heads + assert self.head_dim * num_heads == self.qkv_dim, "qkv must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \ + 'value to be of the same size' + + if self.qkv_same_dim: + self.in_proj_weight = Parameter(torch.Tensor( + 3 * self.qkv_dim, self.super_q_embed_dim)) + else: + self.k_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_kv_embed_dim)) + self.v_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_kv_embed_dim)) + self.q_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_q_embed_dim)) + + if bias: + self.in_proj_bias = Parameter(torch.Tensor(3 * self.qkv_dim)) + else: + self.register_parameter('in_proj_bias', None) + + if out_dim is None: + out_dim = self.super_q_embed_dim + self.out_proj = LinearSuper( + super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.reset_parameters() + + self.onnx_trace = False + + self.enable_torch_version = False + if hasattr(F, "multi_head_attention_forward"): + self.enable_torch_version = True + else: + self.enable_torch_version = False + self.enable_torch_version = False + + def calc_sampled_param_num(self): # noqa: D102 + assert self.in_proj_weight is not None and self.in_proj_bias is not None + in_proj_q_weight_numel = self.sample_q_embed_dim * self.qkv_dim + in_proj_v_weight_numel = in_proj_k_weight_numel = self.sample_kv_embed_dim * self.qkv_dim + in_proj_bias_numel = self.in_proj_bias.numel() + + # does not count in the output proj because it will be counted in LinearSuper layer + # out_proj_weight_numel = self.qkv_dim * self.sample_q_embed_dim + # out_proj_bias_numel = self. + + return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel + + def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None): # noqa: D102 + self.sample_q_embed_dim = sample_q_embed_dim + if sample_kv_embed_dim is None: + self.sample_kv_embed_dim = sample_q_embed_dim + else: + self.sample_kv_embed_dim = sample_kv_embed_dim + + self.num_heads = sample_attention_heads + self.head_dim = self.qkv_dim // self.num_heads + assert self.head_dim * \ + self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads" + self.scaling = self.head_dim ** -0.5 + + self.out_proj.set_sample_config( + sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim) + + def prepare_for_onnx_export_(self): # noqa: D102 + self.onnx_trace = True + + def reset_parameters(self): # noqa: D102 + if self.qkv_same_dim: + nn.init.xavier_uniform_(self.in_proj_weight) + else: + nn.init.xavier_uniform_(self.k_proj_weight) + nn.init.xavier_uniform_(self.v_proj_weight) + nn.init.xavier_uniform_(self.q_proj_weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.in_proj_bias is not None: + nn.init.constant_(self.in_proj_bias, 0.) + nn.init.constant_(self.out_proj.bias, 0.) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def forward(self, query, key, value, key_padding_mask=None, incremental_state=None, + need_weights=True, static_kv=False, attn_mask=None): + """Input shape: Time x Batch x Channel. + + Timesteps can be masked by supplying a T x T mask in the + `attn_mask` argument. Padding elements can be excluded from + the key by passing a binary ByteTensor (`key_padding_mask`) with shape: + batch x src_len, where padding elements are indicated by 1s. + """ + tgt_len, bsz, embed_dim = query.size() + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if 'prev_key' in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + # self-attention + q, k, v = self.in_proj_qkv(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.in_proj_q(query) + if key is None: + assert value is None + k = v = None + else: + k = self.in_proj_k(key) + v = self.in_proj_v(key) + + else: + q = self.in_proj_q(query) + k = self.in_proj_k(key) + v = self.in_proj_v(value) + + q = q * self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1) + + q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if k is not None: + k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if 'prev_key' in saved_state: + prev_key = saved_state['prev_key'].view( + bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + k = torch.cat((prev_key, k), dim=1) + if 'prev_value' in saved_state: + prev_value = saved_state['prev_value'].view( + bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + v = torch.cat((prev_value, v), dim=1) + saved_state['prev_key'] = k.view( + bsz, self.num_heads, -1, self.head_dim) + saved_state['prev_value'] = v.view( + bsz, self.num_heads, -1, self.head_dim) + + self._set_input_buffer(incremental_state, saved_state) + + src_len = k.size(1) + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]): + key_padding_mask = None + + if key_padding_mask is not None: + fil = key_padding_mask.new_ones( + key_padding_mask.size(0), src_len-key_padding_mask.size(1)) + key_padding_mask = torch.cat((key_padding_mask, fil), dim=1) + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + src_len += 1 + k = torch.cat( + [k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat( + [v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask( + attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [ + bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + attn_weights = attn_weights.view( + bsz, self.num_heads, tgt_len, src_len) + if self.onnx_trace: + attn_weights = torch.where( + key_padding_mask.unsqueeze(1).unsqueeze(2), + torch.Tensor([float("-Inf")]), + attn_weights.float() + ).type_as(attn_weights) + else: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + float('-inf'), + ) + attn_weights = attn_weights.view( + bsz * self.num_heads, tgt_len, src_len) + + attn_weights = fairseq.utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace, + ).type_as(attn_weights) + attn_weights = F.dropout( + attn_weights, p=self.dropout, training=self.training) + + attn = torch.bmm(attn_weights, v) + + assert list(attn.size()) == [ + bsz * self.num_heads, tgt_len, self.head_dim] + + if (self.onnx_trace and attn.size(1) == 1): + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, self.qkv_dim) + else: + attn = attn.transpose(0, 1).contiguous().view( + tgt_len, bsz, self.qkv_dim) + attn = self.out_proj(attn) + + if need_weights: + # average attention weights over heads + attn_weights = attn_weights.view( + bsz, self.num_heads, tgt_len, src_len) + + attn_weights = attn_weights.sum(dim=1) / self.num_heads + else: + attn_weights = None + + return attn, attn_weights + + def in_proj_qkv(self, query): # noqa: D102 + return self._in_proj(query, sample_dim=self.sample_q_embed_dim).chunk(3, dim=-1) + + def in_proj_q(self, query): # noqa: D102 + if self.qkv_same_dim: + return self._in_proj(query, end=self.qkv_dim, sample_dim=self.sample_q_embed_dim) + else: + bias = self.in_proj_bias + if bias is not None: + bias = bias[:self.qkv_dim] + return F.linear(query, self.q_proj_weight[..., :self.sample_q_embed_dim], bias) + + def in_proj_k(self, key): # noqa: D102 + if self.qkv_same_dim: + return self._in_proj(key, start=self.qkv_dim, end=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) + else: + weight = self.k_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[self.qkv_dim:2 * self.qkv_dim] + return F.linear(key, weight[..., :self.sample_kv_embed_dim], bias) + + def in_proj_v(self, value): # noqa: D102 + if self.qkv_same_dim: + return self._in_proj(value, start=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) + else: + weight = self.v_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[2 * self.qkv_dim:] + return F.linear(value, weight[..., :self.sample_kv_embed_dim], bias) + + def _in_proj(self, input, sample_dim, start=0, end=None): + weight = self.in_proj_weight + bias = self.in_proj_bias + weight = weight[start:end, :sample_dim] + if bias is not None: + bias = bias[start:end] + return F.linear(input, weight, bias) + + def reorder_incremental_state(self, incremental_state, new_order): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer[k] = input_buffer[k].index_select(0, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return get_incremental_state( + self, + incremental_state, + 'attn_state', + ) or {} + + def _set_input_buffer(self, incremental_state, buffer): + set_incremental_state( + self, + incremental_state, + 'attn_state', + buffer, + ) + + def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): # noqa: D102 + return attn_weights + + def __repr__(self): # noqa: D105 + # We treat the extra repr like the sub-module, one item per line + extra_lines = [] + extra_repr = self.extra_repr() + # empty string will be split into list [''] + if extra_repr: + extra_lines = extra_repr.split('\n') + child_lines = [] + for key, module in self._modules.items(): + mod_str = repr(module) + mod_str = _addindent(mod_str, 2) + child_lines.append('(' + key + '): ' + mod_str) + lines = extra_lines + child_lines + + main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + \ + '\t qkv_dim:' + str(self.qkv_dim) + if lines: + # simple one-liner info, which most builtin Modules will use + if len(extra_lines) == 1 and not child_lines: + main_str += extra_lines[0] + else: + main_str += '\n ' + '\n '.join(lines) + '\n' + + main_str += ')' + return main_str diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py new file mode 100644 index 00000000000..0b76b052b21 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -0,0 +1,347 @@ +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Translate pre-processed data with a trained model.""" +import time +import warnings + +import numpy as np + +from neural_compressor.utils.utility import logger, LazyImport + +from .transformer_supernetwork import TransformerSuperNetwork + +torch = LazyImport('torch') +torchprofile = LazyImport('torchprofile') +fairseq = LazyImport('fairseq') + +warnings.filterwarnings("ignore") + + +def compute_bleu(config, dataset_path, checkpoint_path): + """Measure BLEU score of the Transformer-based model.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + MosesTokenizer = fairseq.data.encoders.moses_tokenizer.MosesTokenizer + StopwatchMeter = fairseq.meters.StopwatchMeter + progress_bar = fairseq.progress_bar + + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser, [dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = 128 + args.eval_bleu_remove_bpe = '@@ ' + args.eval_bleu_detok = 'moses' + + utils.import_user_module(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + # when running on CPU, use fp32 as default + if not use_cuda: + args.fp16 = False + + torch.manual_seed(args.seed) + + # Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + + tokenizer = MosesTokenizer(args) + task.tokenizer=tokenizer + # Set dictionaries + try: + src_dict = getattr(task, 'source_dictionary', None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + # Load ensemble + model = TransformerSuperNetwork(task) + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + + model.load_state_dict(state['model'], + strict=True) + + if use_cuda: + model.cuda() + model.set_sample_config(config) + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if args.fp16: + model.half() + if use_cuda: + model.cuda() + + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(args.replace_unk) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(args.gen_subset), + max_tokens=args.max_tokens, + max_sentences=128, + max_positions=utils.resolve_max_positions( + task.max_positions(), + *[model.max_positions()] + ), + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=args.required_batch_size_multiple, + num_shards=args.num_shards, + shard_id=args.shard_id, + num_workers=args.num_workers, + ).next_epoch_itr(shuffle=False) + + # Initialize generator + gen_timer = StopwatchMeter() + generator = task.build_generator([model], args) + + num_sentences = 0 + bleu_list = [] + with progress_bar.build_progress_bar(args, itr) as t: + for sample in t: + sample = utils.move_to_cuda(sample) if use_cuda else sample + if 'net_input' not in sample: + continue + + bleu = task._inference_with_bleu(generator,sample,model) + bleu_list.append(bleu.score) + + num_sentences += sample['nsentences'] + + bleu_score = np.mean(np.array(bleu_list)) + return bleu_score + + +def compute_latency(config, dataset_path, batch_size, get_model_parameters=False): + """Measure latency of the Transformer-based model.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser, [dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = batch_size + utils.import_user_module(args) + args.latgpu = False + args.latcpu = True + args.latiter = 100 + + # Initialize CUDA and distributed training + if torch.cuda.is_available() and not args.cpu: + torch.cuda.set_device(args.device_id) + torch.manual_seed(args.seed) + + # Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + + # Load ensemble + model = TransformerSuperNetwork(task) + + # specify the length of the dummy input for profile + # for iwslt, the average length is 23, for wmt, that is 30 + dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30} + + dummy_sentence_length = dummy_sentence_length_dict['wmt'] + + dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) + dummy_prev = [7] * (dummy_sentence_length - 1) + [2] + + src_tokens_test = torch.tensor( + [dummy_src_tokens], dtype=torch.long) + src_lengths_test = torch.tensor([dummy_sentence_length]) + prev_output_tokens_test_with_beam = torch.tensor( + [dummy_prev] * args.beam, dtype=torch.long) + bsz = 1 + new_order = torch.arange(bsz).view(-1, 1).repeat(1, + args.beam).view(-1).long() + if args.latcpu: + model.cpu() + logger.info('Measuring model latency on CPU for dataset generation...') + elif args.latgpu: + model.cuda() + src_tokens_test = src_tokens_test + src_lengths_test = src_lengths_test + prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam + logger.info('Measuring model latency on GPU for dataset generation...') + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + + model.set_sample_config(config) + + model.eval() + + with torch.no_grad(): + + # dry runs + for _ in range(15): + encoder_out_test = model.encoder( + src_tokens=src_tokens_test, src_lengths=src_lengths_test) + + encoder_latencies = [] + logger.info('[DyNAS-T] Measuring encoder for dataset generation...') + for _ in range(args.latiter): + if args.latgpu: + start = time.time() + elif args.latcpu: + start = time.time() + + model.encoder(src_tokens=src_tokens_test, + src_lengths=src_lengths_test) + + if args.latgpu: + end = time.time() + encoder_latencies.append((end - start) * 1000) + elif args.latcpu: + end = time.time() + encoder_latencies.append((end - start) * 1000) + + encoder_latencies.sort() + encoder_latencies = encoder_latencies[int( + args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + logger.info( + f'[DyNAS-T] Encoder latency for dataset generation: Mean: ' + '{np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms' + ) + + encoder_out_test_with_beam = model.encoder.reorder_encoder_out( + encoder_out_test, new_order) + + # dry runs + for _ in range(15): + model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam, + encoder_out=encoder_out_test_with_beam) + + # decoder is more complicated because we need to deal with incremental states and auto regressive things + decoder_iterations_dict = {'iwslt': 23, 'wmt': 30} + + decoder_iterations = decoder_iterations_dict['wmt'] + decoder_latencies = [] + + logger.info('[DyNAS-T] Measuring decoder for dataset generation...') + for _ in range(args.latiter): + if args.latgpu: + start = time.time() + elif args.latcpu: + start = time.time() + incre_states = {} + for k_regressive in range(decoder_iterations): + model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam[:, :k_regressive + 1], + encoder_out=encoder_out_test_with_beam, incremental_state=incre_states) + if args.latgpu: + end = time.time() + decoder_latencies.append((end - start) * 1000) + + elif args.latcpu: + end = time.time() + decoder_latencies.append((end - start) * 1000) + + # only use the 10% to 90% latencies to avoid outliers + decoder_latencies.sort() + decoder_latencies = decoder_latencies[int( + args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + + logger.info( + f'[DyNAS-T] Decoder latency for dataset generation: Mean: ' + '{np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms' + ) + + lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) + lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) + return lat_mean, lat_std + + +def compute_macs(config, dataset_path): + """Calculate MACs for Transformer-based models.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser,[dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = 128 + utils.import_user_module(args) + args.latgpu=False + args.latcpu=True + args.latiter=100 + + # Initialize CUDA and distributed training + if torch.cuda.is_available() and not args.cpu: + torch.cuda.set_device(args.device_id) + torch.manual_seed(args.seed) + + #Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + + # Load model + logger.info('[DyNAS-T] loading model(s) from {}'.format(args.path)) + model = TransformerSuperNetwork(task) + + # specify the length of the dummy input for profile + # for iwslt, the average length is 23, for wmt, that is 30 + dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30} + + dummy_sentence_length = dummy_sentence_length_dict['wmt'] + + + dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) + dummy_prev = [7] * (dummy_sentence_length - 1) + [2] + + model.eval() + model.profile(mode=True) + model.set_sample_config(config) + macs = torchprofile.profile_macs(model, args=(torch.tensor([dummy_src_tokens], dtype=torch.long), + torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long))) + + model.profile(mode=False) + + return macs diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py new file mode 100644 index 00000000000..1034c6519a8 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -0,0 +1,1088 @@ +#noqa: D100 +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch +import torch.nn.functional as F +from torch import nn + +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport + +from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper, + MultiheadAttentionSuper) + +fairseq = LazyImport("fairseq") + +DEFAULT_MAX_SOURCE_POSITIONS = 1024 +DEFAULT_MAX_TARGET_POSITIONS = 1024 + + +class TransformerSuperNetwork(fairseq.models.BaseFairseqModel): + """Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)`. + + + + Args: + encoder (TransformerEncoder): the encoder + decoder (TransformerDecoder): the decoder + + The Transformer model provides the following named architectures and + command-line arguments: + + .. argparse:: + :ref: fairseq.models.transformer_parser + :prog: + """ + + def __init__(self, task): #noqa: D107 + super().__init__() + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + encoder_config = {'encoder_embed_dim': 640, + 'encoder_layers': 6, + 'encoder_attention_heads': 8, + 'encoder_ffn_embed_dim': 3072, + 'encoder_embed_path': None} + + decoder_config = {'decoder_embed_dim': 640, + 'decoder_layers': 6, + 'decoder_attention_heads': 8, + 'decoder_ffn_embed_dim': 3072} + + encoder_embed_tokens = self.build_embedding( + src_dict, encoder_config['encoder_embed_dim'], encoder_config['encoder_embed_path'] + ) + decoder_embed_tokens = encoder_embed_tokens + self.share_decoder_input_output_embed = True + + self.encoder = TransformerEncoder( + encoder_config, src_dict, encoder_embed_tokens) + self.decoder = TransformerDecoder( + decoder_config, tgt_dict, decoder_embed_tokens) + + def build_embedding(self, dictionary, embed_dim, path=None): #noqa: D102 + utils = fairseq.utils + + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + def profile(self, mode=True): #noqa: D102 + for module in self.modules(): + if hasattr(module, 'profile') and self != module: + module.profile(mode) + + def get_sampled_params_numel(self, config): #noqa: D102 + self.set_sample_config(config) + numels = [] + for name, module in self.named_modules(): + if hasattr(module, 'calc_sampled_param_num'): + # a hacky way to skip the layers that exceed encoder-layer-num or decoder-layer-num + if ( + name.split('.')[0] == 'encoder' + and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num'] + ): + continue + if ( + name.split('.')[0] == 'decoder' + and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num'] + ): + continue + + numels.append(module.calc_sampled_param_num()) + return sum(numels) + + def set_sample_config(self, config): #noqa: D102 + logger.info('[DyNAS-T] Setting active configuration to {}'.format(config)) + self.encoder.set_sample_config(config) + self.decoder.set_sample_config(config) + + def forward(self,src_tokens,src_lengths,prev_output_token): #noqa: D102 + encoder_output = self.encoder.forward(src_tokens,src_lengths) + output = self.decoder(prev_output_token,encoder_output) + return output + + +class TransformerEncoder(fairseq.models.FairseqEncoder): + """Transformer encoder consisting of *args.encoder_layers* layers. + + Each layer is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, encoder_config, dictionary, embed_tokens): #noqa: D107 + super().__init__(dictionary) + # the configs of super arch + self.super_embed_dim = encoder_config['encoder_embed_dim'] + self.super_ffn_embed_dim = [ + encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers'] + self.super_layer_num = encoder_config['encoder_layers'] + self.super_self_attention_heads = [ + encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0 + + self.super_embed_scale = math.sqrt(self.super_embed_dim) + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim = None + self.sample_layer_num = None + self.sample_self_attention_heads = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.sample_embed_scale = None + + self.register_buffer('version', torch.Tensor([3])) + + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS + + self.embed_tokens = embed_tokens + + self.embed_positions = fairseq.modules.PositionalEmbedding( + self.max_source_positions, self.super_embed_dim, self.padding_idx, + learned=False, + ) + + self.layers = nn.ModuleList([]) + self.layers.extend([ + TransformerEncoderLayer(encoder_config, layer_idx=i) + for i in range(self.super_layer_num) + ]) + + if False: + self.layer_norm = LayerNormSuper(self.super_embed_dim) + else: + self.layer_norm = None + + self.vocab_original_scaling = False + + def set_sample_config(self, config: dict): #noqa: D102 + + self.sample_embed_dim = config['encoder']['encoder_embed_dim'] + + # Caution: this is a list for all layers + self.sample_ffn_embed_dim = config['encoder']['encoder_ffn_embed_dim'] + + self.sample_layer_num = config['encoder']['encoder_layer_num'] + + # Caution: this is a list for all layers + self.sample_self_attention_heads = config['encoder']['encoder_self_attention_heads'] + + self.sample_dropout = calc_dropout( + self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout( + self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + + self.sample_embed_scale = math.sqrt( + self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + + self.embed_tokens.set_sample_config( + sample_embed_dim=self.sample_embed_dim, part='encoder') + + if self.layer_norm is not None: + self.layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + for i, layer in enumerate(self.layers): + # not exceed sample layer number + if i < self.sample_layer_num: + layer.set_sample_config(is_identity_layer=False, + sample_embed_dim=self.sample_embed_dim, + sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[ + i], + sample_dropout=self.sample_dropout, + sample_activation_dropout=self.sample_activation_dropout) + # exceeds sample layer number + else: + layer.set_sample_config(is_identity_layer=True) + + def forward(self, src_tokens, src_lengths): + """Forward function. + + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + """ + # embed tokens and positions + x = self.sample_embed_scale * \ + self.embed_tokens(src_tokens, part='encoder') + if self.embed_positions is not None: + positions = self.embed_positions(src_tokens) + + # sample the positional embedding and add + x += positions[..., :self.sample_embed_dim] + x = F.dropout(x, p=self.sample_dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + all_x = [] + # encoder layers + for layer in self.layers: + x = layer(x, encoder_padding_mask) + all_x.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + return { + 'encoder_out': x, + 'encoder_out_all': all_x, + 'encoder_padding_mask': encoder_padding_mask, + } + + def reorder_encoder_out(self, encoder_out, new_order): + """Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if encoder_out['encoder_out'] is not None: + encoder_out['encoder_out'] = \ + encoder_out['encoder_out'].index_select(1, new_order) + if encoder_out['encoder_padding_mask'] is not None: + encoder_out['encoder_padding_mask'] = \ + encoder_out['encoder_padding_mask'].index_select(0, new_order) + # need to reorder each layer of output + if 'encoder_out_all' in encoder_out.keys(): + new_encoder_out_all = [] + for encoder_out_one_layer in encoder_out['encoder_out_all']: + new_encoder_out_all.append( + encoder_out_one_layer.index_select(1, new_order)) + encoder_out['encoder_out_all'] = new_encoder_out_all + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions()) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + utils = fairseq.utils + if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding): + weights_key = '{}.embed_positions.weights'.format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict['{}.embed_positions._float_tensor'.format( + name)] = torch.FloatTensor(1) + for i in range(len(self.layers)): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i)) + + version_key = '{}.version'.format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerDecoder(fairseq.models.FairseqIncrementalDecoder): + """Transformer decoder consisting of *args.decoder_layers* layers. + + Each layer is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False): #noqa: D107 + super().__init__(dictionary) + + # the configs of super arch + self.super_embed_dim = decoder_config['decoder_embed_dim'] + self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * \ + decoder_config['decoder_layers'] + self.super_layer_num = decoder_config['decoder_layers'] + self.super_self_attention_heads = 8 * \ + [decoder_config['decoder_attention_heads']] * \ + decoder_config['decoder_layers'] + self.super_ende_attention_heads = [ + decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] + self.super_arbitrary_ende_attn = [-1] * \ + decoder_config['decoder_layers'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0.0 + + self.super_embed_scale = math.sqrt(self.super_embed_dim) + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim = None + self.sample_layer_num = None + self.sample_self_attention_heads = None + self.sample_ende_attention_heads = None + self.sample_arbitrary_ende_attn = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.sample_embed_scale = None + + # the configs of current sampled arch + self.register_buffer('version', torch.Tensor([3])) + + self.share_input_output_embed = True + + self.output_embed_dim = decoder_config['decoder_embed_dim'] + + padding_idx = embed_tokens.padding_idx + self.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS + + self.embed_tokens = embed_tokens + + self.embed_positions = fairseq.modules.PositionalEmbedding( + self.max_target_positions, self.super_embed_dim, padding_idx, + learned=False, + ) if not False else None + + self.layers = nn.ModuleList([]) + self.layers.extend([ + TransformerDecoderLayer( + decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn) + for i in range(self.super_layer_num) + ]) + + self.adaptive_softmax = None + + self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \ + if self.super_embed_dim != self.output_embed_dim else None + + if not self.share_input_output_embed: + self.embed_out = nn.Parameter(torch.Tensor( + len(dictionary), self.output_embed_dim)) + nn.init.normal_(self.embed_out, mean=0, + std=self.output_embed_dim ** -0.5) + + self.layer_norm = None + self.get_attn = False + + self.vocab_original_scaling = False + + def set_sample_config(self, config: dict): #noqa: D102 + + self.sample_embed_dim = config['decoder']['decoder_embed_dim'] + self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim'] + + # Caution: this is a list for all layers + self.sample_ffn_embed_dim = config['decoder']['decoder_ffn_embed_dim'] + + # Caution: this is a list for all layers + self.sample_self_attention_heads = config['decoder']['decoder_self_attention_heads'] + + # Caution: this is a list for all layers + self.sample_ende_attention_heads = config['decoder']['decoder_ende_attention_heads'] + + self.sample_arbitrary_ende_attn = config['decoder']['decoder_arbitrary_ende_attn'] + + self.sample_layer_num = config['decoder']['decoder_layer_num'] + + self.sample_dropout = calc_dropout( + self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout( + self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + + self.sample_embed_scale = math.sqrt( + self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + + self.embed_tokens.set_sample_config( + sample_embed_dim=self.sample_embed_dim, part='decoder') + + if self.layer_norm is not None: + self.layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + for i, layer in enumerate(self.layers): + # not exceed sample layer number + if i < self.sample_layer_num: + layer.set_sample_config(is_identity_layer=False, + sample_embed_dim=self.sample_embed_dim, + sample_encoder_embed_dim=self.sample_encoder_embed_dim, + sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[ + i], + sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[ + i], + sample_dropout=self.sample_dropout, + sample_activation_dropout=self.sample_activation_dropout) + # exceeds sample layer number + else: + layer.set_sample_config(is_identity_layer=True) + + def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): + """Forward pass. + + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + x, extra = self.extract_features( + prev_output_tokens, encoder_out, incremental_state) + x = self.output_layer(x) + return x, extra + + def extract_features(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): + """Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + # embed positions + positions = self.embed_positions( + prev_output_tokens, + incremental_state=incremental_state, + ) if self.embed_positions is not None else None + + if positions is not None: + positions = positions[..., :self.sample_embed_dim] + + if incremental_state is not None: + # only take the last token in to the decoder + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.sample_embed_scale * \ + self.embed_tokens(prev_output_tokens, part='decoder') + + if positions is not None: + x += positions + x = F.dropout(x, p=self.sample_dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + attns = [] + inner_states = [x] + + # decoder layers + for i, layer in enumerate(self.layers): + encoder_out_feed = None + encoder_padding_mask_feed = None + + if encoder_out is not None: + # only use the last layer + if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1: + encoder_out_feed = encoder_out['encoder_out'] + # concat one second last output layer + elif self.sample_arbitrary_ende_attn[i] == 1: + encoder_out_feed = torch.cat( + [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0) + elif self.sample_arbitrary_ende_attn[i] == 2: + encoder_out_feed = torch.cat( + [encoder_out['encoder_out'], + encoder_out['encoder_out_all'][-2], + encoder_out['encoder_out_all'][-3]], + dim=0) + else: + raise NotImplementedError( + "arbitrary_ende_attn should in [-1, 1, 2]") + + if encoder_out['encoder_padding_mask'] is not None: + if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1: + encoder_padding_mask_feed = encoder_out['encoder_padding_mask'] + # concat one more + elif self.sample_arbitrary_ende_attn[i] == 1: + encoder_padding_mask_feed = torch.cat( + [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + # concat two more + elif self.sample_arbitrary_ende_attn[i] == 2: + encoder_padding_mask_feed = torch.cat( + [encoder_out['encoder_padding_mask'], + encoder_out['encoder_padding_mask'], + encoder_out['encoder_padding_mask']], + dim=1) + else: + raise NotImplementedError( + "arbitrary_ende_attn should in [-1, 1, 2]") + + x, attn = layer( + x, + encoder_out_feed, + encoder_padding_mask_feed, + incremental_state, + self_attn_mask=self.buffered_future_mask( + x) if incremental_state is None else None, + ) + inner_states.append(x) + attns.append(attn) + + if self.layer_norm: + x = self.layer_norm(x) # pylint: disable=not-callable + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + if not self.get_attn: + attns = attns[-1] + return x, {'attn': attns, 'inner_states': inner_states} + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + if self.share_input_output_embed: + return F.linear(features, self.embed_tokens.sampled_weight('decoder')) + else: + return F.linear(features, self.embed_out[:, :self.sample_embed_dim]) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions()) + + def buffered_future_mask(self, tensor): #noqa: D102 + utils = fairseq.utils + + dim = tensor.size(0) + if ( + not hasattr(self, '_future_mask') + or self._future_mask is None # pylint: disable=access-member-before-definition + or self._future_mask.device != tensor.device # pylint: disable=access-member-before-definition + or self._future_mask.size(0) < dim # pylint: disable=access-member-before-definition + ): + self._future_mask = torch.triu( # pylint: disable=access-member-before-definition + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1) + return self._future_mask[:dim, :dim] # pylint: disable=access-member-before-definition + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + utils = fairseq.utils + if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding): + weights_key = '{}.embed_positions.weights'.format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict['{}.embed_positions._float_tensor'.format( + name)] = torch.FloatTensor(1) + + for i in range(len(self.layers)): + # update layer norms + layer_norm_map = { + '0': 'self_attn_layer_norm', + '1': 'encoder_attn_layer_norm', + '2': 'final_layer_norm' + } + for old, new in layer_norm_map.items(): + for m in ('weight', 'bias'): + k = '{}.layers.{}.layer_norms.{}.{}'.format( + name, i, old, m) + if k in state_dict: + state_dict['{}.layers.{}.{}.{}'.format( + name, i, new, m)] = state_dict[k] + del state_dict[k] + + version_key = '{}.version'.format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +class TransformerEncoderLayer(nn.Module): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.encoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, encoder_config, layer_idx): #noqa: D107 + super().__init__() + + utils = fairseq.utils + + # the configs of super arch + self.super_embed_dim = encoder_config['encoder_embed_dim'] + self.super_ffn_embed_dim_this_layer = encoder_config['encoder_ffn_embed_dim'] + self.super_self_attention_heads_this_layer = encoder_config['encoder_attention_heads'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0 + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim_this_layer = None + self.sample_self_attention_heads_this_layer = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.is_identity_layer = None + + self.qkv_dim = 512 + + self.self_attn = MultiheadAttentionSuper( + super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, + is_encoder=True, dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim, + ) + + self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + self.dropout = 0.1 + self.activation_fn = utils.get_activation_fn( + activation='relu' + ) + self.normalize_before = False + + self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, + uniform_=None, non_linear='relu') # init.uniform_ + self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, + super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear') + self.final_layer_norm = LayerNormSuper(self.super_embed_dim) + + def set_sample_config( + self, + is_identity_layer, + sample_embed_dim=None, + sample_ffn_embed_dim_this_layer=None, + sample_self_attention_heads_this_layer=None, + sample_dropout=None, + sample_activation_dropout=None, + ): #noqa: D102 + + if is_identity_layer: + self.is_identity_layer = True + return + + self.is_identity_layer = False + + self.sample_embed_dim = sample_embed_dim + self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer + self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer + + self.sample_dropout = sample_dropout + self.sample_activation_dropout = sample_activation_dropout + + self.self_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, + sample_attention_heads=self.sample_self_attention_heads_this_layer) + + self.fc1.set_sample_config( + sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config( + sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + + self.final_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + def upgrade_state_dict_named(self, state_dict, name): + """Renames keys in state dict. + + Rename layer norm states from `...layer_norms.0.weight` to + `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to + `...final_layer_norm.weight` + """ + layer_norm_map = { + '0': 'self_attn_layer_norm', + '1': 'final_layer_norm' + } + for old, new in layer_norm_map.items(): + for m in ('weight', 'bias'): + k = '{}.layer_norms.{}.{}'.format(name, old, m) + if k in state_dict: + state_dict[ + '{}.{}.{}'.format(name, new, m) + ] = state_dict[k] + del state_dict[k] + + def forward(self, x, encoder_padding_mask, attn_mask=None): + """Forward pass. + + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape (T_tgt, T_src), where + T_tgt is the length of query, while T_src is the length of key, + though here both query and key is x here, + attn_mask[t_tgt, t_src] = 1 means when calculating embedding + for t_tgt, t_src is excluded (or masked out), =0 means it is + included in attention + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if self.is_identity_layer: + return x + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + if attn_mask is not None: + attn_mask = attn_mask.masked_fill(attn_mask.byte(), -1e8) + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + # TODO: to formally solve this problem, we need to change fairseq's + # MultiheadAttention. We will do this later on. + x, _ = self.self_attn(query=x, key=x, value=x, + key_padding_mask=encoder_padding_mask) + x = F.dropout(x, p=self.dropout, training=self.training) + x[:residual.size(0), :, :] = residual + x[:residual.size(0), :, :] + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.sample_activation_dropout, + training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + return x + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): #noqa: D102 + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + +class TransformerDecoderLayer(nn.Module): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.decoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + decoder_config, + layer_idx, + no_encoder_attn=False, + add_bias_kv=False, + add_zero_attn=False, + ): #noqa: D107 + super().__init__() + + utils = fairseq.utils + + # the configs of super arch + self.super_embed_dim = decoder_config['decoder_embed_dim'] + self.super_encoder_embed_dim = decoder_config['decoder_embed_dim'] + self.super_ffn_embed_dim_this_layer = decoder_config['decoder_ffn_embed_dim'] + self.super_self_attention_heads_this_layer = decoder_config['decoder_attention_heads'] + self.super_ende_attention_heads_this_layer = decoder_config['decoder_attention_heads'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0 + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_encoder_embed_dim = None + self.sample_ffn_embed_dim_this_layer = None + self.sample_self_attention_heads_this_layer = None + self.sample_ende_attention_heads_this_layer = None + self.sample_dropout = None + self.sample_activation_dropout = None + self.is_identity_layer = None + self.qkv_dim = 512 + self.layer_idx = layer_idx + + self.self_attn = MultiheadAttentionSuper( + is_encoder=False, + super_embed_dim=self.super_embed_dim, + num_heads=self.super_self_attention_heads_this_layer, + dropout=0.1, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=True, + qkv_dim=self.qkv_dim + ) + self.activation_fn = utils.get_activation_fn( + activation='relu' + ) + self.normalize_before = False + + # use layerNorm rather than FusedLayerNorm for exporting. + # char_inputs can be used to determint this. + # TODO remove this once we update apex with the fix + + self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = MultiheadAttentionSuper( + super_embed_dim=self.super_embed_dim, + num_heads=self.super_ende_attention_heads_this_layer, + is_encoder=False, + super_kdim=self.super_encoder_embed_dim, + super_vdim=self.super_encoder_embed_dim, + dropout=0.1, + encoder_decoder_attention=True, + qkv_dim=self.qkv_dim + ) + self.encoder_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + + self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, + uniform_=None, non_linear='relu') + self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, + uniform_=None, non_linear='linear') + + self.final_layer_norm = LayerNormSuper(self.super_embed_dim) + self.need_attn = True + + self.onnx_trace = False + + def set_sample_config(self, + is_identity_layer, + sample_embed_dim=None, + sample_encoder_embed_dim=None, + sample_ffn_embed_dim_this_layer=None, + sample_self_attention_heads_this_layer=None, + sample_ende_attention_heads_this_layer=None, + sample_dropout=None, + sample_activation_dropout=None, + ): #noqa: D102 + + if is_identity_layer: + self.is_identity_layer = True + return + + self.is_identity_layer = False + + self.sample_embed_dim = sample_embed_dim + self.sample_encoder_embed_dim = sample_encoder_embed_dim + self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer + self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer + self.sample_ende_attention_heads_this_layer = sample_ende_attention_heads_this_layer + + self.sample_dropout = sample_dropout + self.sample_activation_dropout = sample_activation_dropout + + self.self_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + self.encoder_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, + sample_attention_heads=self.sample_self_attention_heads_this_layer) + self.encoder_attn.set_sample_config( + sample_q_embed_dim=self.sample_embed_dim, + sample_kv_embed_dim=self.sample_encoder_embed_dim, + sample_attention_heads=self.sample_ende_attention_heads_this_layer, + ) + + self.fc1.set_sample_config( + sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config( + sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + + self.final_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + + def prepare_for_onnx_export_(self): #noqa: D102 + self.onnx_trace = True + + def forward( + self, + x, + encoder_out=None, + encoder_padding_mask=None, + incremental_state=None, + prev_self_attn_state=None, + prev_attn_state=None, + self_attn_mask=None, + self_attn_padding_mask=None, + ): + """Forward pass. + + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if self.is_identity_layer: + return x, None + + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + if prev_self_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_self_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.self_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + if self.encoder_attn is not None: + residual = x + x = self.maybe_layer_norm( + self.encoder_attn_layer_norm, x, before=True) + if prev_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.encoder_attn._set_input_buffer( + incremental_state, saved_state) + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=(not self.training and self.need_attn), + ) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm( + self.encoder_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.sample_activation_dropout, + training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + self_attn_state = saved_state["prev_key"], saved_state["prev_value"] + return x, attn, self_attn_state + return x, attn + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): #noqa: D102 + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + def make_generation_fast_(self, need_attn=False, **kwargs): #noqa: D102 + self.need_attn = need_attn + + +def calc_dropout(dropout, sample_embed_dim, super_embed_dim): #noqa: D103 + return dropout * 1.0 * sample_embed_dim / super_embed_dim + + +def Embedding(num_embeddings, embedding_dim, padding_idx): #noqa: D103 + return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) + + +def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): #noqa: D103 + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( #noqa: D103 + m.weight, non_linear=non_linear) + if bias: + nn.init.constant_(m.bias, 0.) + return m diff --git a/neural_compressor/experimental/nas/nas.py b/neural_compressor/experimental/nas/nas.py index f6580f52a72..21e88aa3563 100644 --- a/neural_compressor/experimental/nas/nas.py +++ b/neural_compressor/experimental/nas/nas.py @@ -56,13 +56,13 @@ def __new__(self, conf_fname_or_obj, *args, **kwargs): elif isinstance(conf_fname_or_obj, Config): self.conf = NASConfig() self.conf.map_pyconfig_to_cfg(conf_fname_or_obj) - else: # pragma: no cover + else: # pragma: no cover raise NotImplementedError( "Please provide a str path to the config file." ) assert self.conf.usr_cfg.nas is not None, "nas section must be set" if isinstance(self.conf.usr_cfg.nas.approach, str) and \ - self.conf.usr_cfg.nas.approach.lower() in NASMethods: + self.conf.usr_cfg.nas.approach.lower() in NASMethods: method = self.conf.usr_cfg.nas.approach.lower() else: logger.warning( @@ -127,29 +127,38 @@ def search(self, res_save_path=None): ) ) model_arch_paras = self.select_model_arch() - logger.info("Model architecture {} proposed.".format(model_arch_paras)) + logger.info( + "Model architecture {} proposed.".format(model_arch_paras)) model = self._model_builder(model_arch_paras) model_paras = self.count_model_parameters(model) logger.info( - "***** Number of model parameters: {:.2f}M *****".format(model_paras / 10**6) + "***** Number of model parameters: {:.2f}M *****".format( + model_paras / 10**6) ) - self.model_paras_num[tuple(model_arch_paras.values())] = model_paras + self.model_paras_num[tuple( + model_arch_paras.values())] = model_paras if tuple(model_arch_paras.values()) in self.search_results: - logger.info("Skip evaluated model architecture {}.".format(model_arch_paras)) + logger.info( + "Skip evaluated model architecture {}.".format(model_arch_paras)) continue if tuple(model_arch_paras.values()) in self.resumed_search_results: logger.info( - "Find previous results of model architecture: {}.".format(model_arch_paras) + "Find previous results of model architecture: {}.".format( + model_arch_paras) ) - metrics = self.resumed_search_results[tuple(model_arch_paras.values())] + metrics = self.resumed_search_results[tuple( + model_arch_paras.values())] else: - logger.info("Assessing model architecture: {}.".format(model_arch_paras)) + logger.info( + "Assessing model architecture: {}.".format(model_arch_paras)) metrics = self.estimate(model) logger.info( - "Metrics of model architecture {} is {}.".format(model_arch_paras, metrics) + "Metrics of model architecture {} is {}.".format( + model_arch_paras, metrics) ) self.search_results[tuple(model_arch_paras.values())] = metrics - self._search_algorithm.get_feedback(sum(self.metrics_conversion(metrics))) + self._search_algorithm.get_feedback( + sum(self.metrics_conversion(metrics))) self.dump_search_results( os.path.join(save_path, 'Trial_{}_results.txt'.format(i+1)) ) @@ -158,9 +167,12 @@ def search(self, res_save_path=None): if model_arch_vec not in self.search_results: self.search_results[model_arch_vec] = \ self.resumed_search_results[model_arch_vec] - model = self._model_builder(self.params_vec2params_dict(model_arch_vec)) - self.model_paras_num[model_arch_vec] = self.count_model_parameters(model) - self.dump_search_results(os.path.join(save_path, 'Final_results.txt'.format(i+1))) + model = self._model_builder( + self.params_vec2params_dict(model_arch_vec)) + self.model_paras_num[model_arch_vec] = self.count_model_parameters( + model) + self.dump_search_results(os.path.join( + save_path, 'Final_results.txt'.format(i+1))) self.find_best_model_archs() logger.info( "{fix} Found {n} best model architectures {fix}".format( @@ -168,10 +180,11 @@ def search(self, res_save_path=None): ) ) for i, model_arch in enumerate(self.best_model_archs): - logger.info("Best model architecture {}: {}".format(i+1, model_arch)) + logger.info( + "Best model architecture {}: {}".format(i+1, model_arch)) return self.best_model_archs - def estimate(self, model): # pragma: no cover + def estimate(self, model): # pragma: no cover """Estimate performance of the model. Depends on specific NAS algorithm. Returns: @@ -188,7 +201,8 @@ def count_model_parameters(self, model): if isinstance(model, torch.nn.Module): return sum(p.numel() for p in model.parameters()) else: - raise NotImplementedError("Only support torch model now.") # pragma: no cover + raise NotImplementedError( + "Only support torch model now.") # pragma: no cover def load_search_results(self, path): """Load previous search results if exist.""" @@ -196,11 +210,13 @@ def load_search_results(self, path): lastest_results_record = os.path.join(path, 'lastest_results.npy') if not os.path.exists(path) or not os.path.exists(lastest_results_record): return - self.resumed_search_results = np.load(lastest_results_record, allow_pickle=True).item() + self.resumed_search_results = np.load( + lastest_results_record, allow_pickle=True).item() os.makedirs(os.path.join(path, 'previous_results'), exist_ok=True) for f in os.listdir(path): if os.path.isfile(os.path.join(path, f)): - shutil.move(os.path.join(path, f), os.path.join(path, 'previous_results', f)) + shutil.move(os.path.join(path, f), os.path.join( + path, 'previous_results', f)) logger.info("Loaded previous results.") def dump_search_results(self, path): @@ -209,23 +225,24 @@ def dump_search_results(self, path): np.save(lastest_results_record, self.search_results, allow_pickle=True) write_contents = '=' * 30 + ' All Search Results ' + '=' * 30 + '\n\n' for model_arch_vec in self.search_results: - tmp = ','.join(['{}_{}'.format(k, v) \ - for k, v in zip(self.search_space_keys, model_arch_vec)]) + tmp = ','.join(['{}_{}'.format(k, v) + for k, v in zip(self.search_space_keys, model_arch_vec)]) write_contents += '{}: {} Paras: {}M\n'.format( tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 ) - write_contents += '\n\n\n' + '=' * 30 + ' Best Search Results ' + '=' * 30 + '\n\n' + write_contents += '\n\n\n' + '=' * 30 + \ + ' Best Search Results ' + '=' * 30 + '\n\n' self.find_best_model_archs() for i, model_arch in enumerate(self.best_model_archs): model_arch_vec = tuple(model_arch.values()) - tmp = ','.join(['{}_{}'.format(k, v) \ - for k, v in zip(self.search_space_keys, model_arch_vec)]) + tmp = ','.join(['{}_{}'.format(k, v) + for k, v in zip(self.search_space_keys, model_arch_vec)]) write_contents += \ '{}. {}: {} Paras: {}M\n'.format( i+1, tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 - ) + ) with open(path, mode='w') as f: f.write(write_contents) @@ -239,7 +256,7 @@ def params_vec2params_dict(self, paras_vec): """ assert len(paras_vec) == len(self.search_space_keys), \ "Length of paras_vec and search_space_keys should be the same." - return {k:v for k, v in zip(self.search_space_keys, paras_vec)} + return {k: v for k, v in zip(self.search_space_keys, paras_vec)} def find_best_model_archs(self): """Find the best model architectures. @@ -248,10 +265,11 @@ def find_best_model_archs(self): """ assert len(self.search_results) > 0, "Zero result in search_results." model_arches = list(self.search_results.keys()) - metrics = [self.metrics_conversion(self.search_results[ma]) for ma in model_arches] + metrics = [self.metrics_conversion( + self.search_results[ma]) for ma in model_arches] pareto_front_indices = find_pareto_front(metrics) - self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) \ - for i in pareto_front_indices] + self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) + for i in pareto_front_indices] def metrics_conversion(self, metrics): """Convert the metrics to specific format. @@ -268,11 +286,11 @@ def metrics_conversion(self, metrics): "Keys of metrics not match with metrics in the configuration." metrics = list(metrics.values()) if self.higher_is_better is None: - self.higher_is_better = [True,] * len(metrics) - logger.warning("higher_is_better not set in the configuration, " + \ - "set it to all True for every metric entry by default.") - converted_metrics = [metric if higher_is_better else -metric \ - for metric, higher_is_better in zip(metrics, self.higher_is_better)] + self.higher_is_better = [True, ] * len(metrics) + logger.warning("higher_is_better not set in the configuration, " + + "set it to all True for every metric entry by default.") + converted_metrics = [metric if higher_is_better else -metric + for metric, higher_is_better in zip(metrics, self.higher_is_better)] return converted_metrics def init_search_cfg(self, config): @@ -301,18 +319,21 @@ def init_search_cfg(self, config): if self.search_cfg.higher_is_better else None self.seed = self.search_cfg.seed self.max_trials = self.search_cfg.max_trials \ - if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials + if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials self.search_algorithm_type = self.search_cfg.search_algorithm \ if self.search_cfg.search_algorithm else None if not self.search_algorithm_type: - self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) + self._search_algorithm = BayesianOptimizationSearcher( + self.search_space, self.seed) elif self.search_algorithm_type.lower() == 'grid': self._search_algorithm = GridSearcher(self.search_space) elif self.search_algorithm_type.lower() == 'random': - self._search_algorithm = RandomSearcher(self.search_space, self.seed) + self._search_algorithm = RandomSearcher( + self.search_space, self.seed) elif self.search_algorithm_type.lower() == 'bo': - self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) - else: # pragma: no cover + self._search_algorithm = BayesianOptimizationSearcher( + self.search_space, self.seed) + else: # pragma: no cover logger.warning( 'Please be aware that \'{}\' is not a built-in search algorithm.'.format( self.search_algorithm_type @@ -322,7 +343,7 @@ def init_search_cfg(self, config): @property def search_space(self): """Getter of the search space. - + Returns: The search space. """ @@ -336,7 +357,7 @@ def search_space(self, search_space): @property def search_algorithm(self): """Getter of the search algorithm. - + Returns: The search algorithm. """ @@ -350,7 +371,7 @@ def search_algorithm(self, search_algorithm): @property def model_builder(self): """Getter of the model builder. - + Returns: The model builder. """ @@ -363,4 +384,4 @@ def model_builder(self, model_builder): def __repr__(self): """Class representation.""" - return 'Base Class of NAS' # pragma: no cover \ No newline at end of file + return 'Base Class of NAS' # pragma: no cover diff --git a/neural_compressor/experimental/nas/nas_utils.py b/neural_compressor/experimental/nas/nas_utils.py index d68556cafcd..72fe884c38b 100644 --- a/neural_compressor/experimental/nas/nas_utils.py +++ b/neural_compressor/experimental/nas/nas_utils.py @@ -35,6 +35,7 @@ def nas_registry(nas_method): cls: The class of register. """ assert isinstance(nas_method, str), "Expect nas_method to be a string." + def decorator(cls): NASMethods[nas_method.lower()] = cls return cls @@ -82,4 +83,4 @@ def find_pareto_front(metrics): pareto_front_point_indices = pareto_front_point_indices[nondominated_points] metrics = metrics[nondominated_points] next_point_idx = np.sum(nondominated_points[:next_point_idx+1]) - return pareto_front_point_indices \ No newline at end of file + return pareto_front_point_indices diff --git a/neural_compressor/experimental/nas/search_algorithms.py b/neural_compressor/experimental/nas/search_algorithms.py index bf1c804c289..72ef8a0c9c2 100644 --- a/neural_compressor/experimental/nas/search_algorithms.py +++ b/neural_compressor/experimental/nas/search_algorithms.py @@ -38,7 +38,8 @@ def __init__(self, search_space) -> None: self.search_space_keys = sorted(search_space.keys()) for k in self.search_space_keys: assert isinstance(self.search_space[k], (list, tuple)), \ - "Value of key \'{}\' must be a list or tuple to specify choices".format(k) + "Value of key \'{}\' must be a list or tuple to specify choices".format( + k) def suggest(self): """Suggest the model architecture.""" @@ -129,8 +130,10 @@ class BayesianOptimizationSearcher(Searcher): def __init__(self, search_space, seed=42) -> None: """Initialize the attributes.""" super(BayesianOptimizationSearcher, self).__init__(search_space) - idx_search_space = {k: (0, len(search_space[k])-1) for k in self.search_space_keys} - self.bo_agent = BayesianOptimization(idx_search_space, random_seed=seed) + idx_search_space = { + k: (0, len(search_space[k])-1) for k in self.search_space_keys} + self.bo_agent = BayesianOptimization( + idx_search_space, random_seed=seed) self.last_param_indices = None def suggest(self): @@ -149,7 +152,7 @@ def get_feedback(self, metric): "to get parameters and the input metric is corresponding to this parameters." try: self.bo_agent._space.register(self.last_param_indices, metric) - except KeyError: # pragma: no cover + except KeyError: # pragma: no cover logger.debug("Find registered params, skip it.") pass self.last_param_indices = None @@ -161,4 +164,4 @@ def indices2params_vec(self, indices): # keep ind within the index range of self.search_space[key] ind = int(min(max(round(ind), 0), len(self.search_space[key])-1)) res.append(self.search_space[key][ind]) - return res \ No newline at end of file + return res diff --git a/neural_compressor/experimental/pruning.py b/neural_compressor/experimental/pruning.py index 7c318e38bf9..727b437f644 100644 --- a/neural_compressor/experimental/pruning.py +++ b/neural_compressor/experimental/pruning.py @@ -17,7 +17,7 @@ # limitations under the License. from .component import Component -from ..pruners import PRUNERS +from ..pruner.pruner_legacy import PRUNERS from ..utils import logger from ..utils.utility import GLOBAL_STATE, MODE from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func @@ -126,7 +126,9 @@ def pre_process(self): framework_specific_info = {'device': self.cfg.device, 'random_seed': self.cfg.tuning.random_seed, 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + 'q_dataloader': None, + 'format': 'default', + 'backend': 'default'} if self.framework == 'tensorflow': framework_specific_info.update( diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 3d7b7811ea2..82a04f7ccec 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -28,6 +28,8 @@ from ..utils.utility import time_limit from ..utils.create_obj_from_config import create_dataloader from ..model import BaseModel +from ..model.tensorflow_model import TensorflowQATModel +from ..model.model import get_model_fwk_name from ..conf.config import QuantConf from ..conf.pythonic_config import Config from deprecated import deprecated @@ -133,6 +135,9 @@ def pre_process(self): self._create_eval_dataloader(cfg) self._create_calib_dataloader(cfg) strategy = cfg.tuning.strategy.name.lower() + if cfg.quantization.optimization_level == 0: + strategy = "conservative" + logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None @@ -229,8 +234,8 @@ def __call__(self): def dataset(self, dataset_type, *args, **kwargs): """Get dataset according to dataset_type.""" - from ..data import DATASETS - return DATASETS(self.framework)[dataset_type](*args, **kwargs) + from ..data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) @property def calib_dataloader(self): @@ -405,6 +410,39 @@ def q_func(self, user_q_func): calib_func = q_func + @property + def model(self): + """Override model getter method to handle quantization aware training case.""" + return self._model + + @model.setter + def model(self, user_model): + """Override model setter method to handle quantization aware training case. + + Args: + user_model: user are supported to set model from original framework model format + (eg, tensorflow frozen_pb or path to a saved model), + but not recommended. Best practice is to set from a initialized + neural_compressor.experimental.common.Model. + If tensorflow model is used, model's inputs/outputs will be + auto inferenced, but sometimes auto inferenced + inputs/outputs will not meet your requests, + set them manually in config yaml file. + Another corner case is slim model of tensorflow, + be careful of the name of model configured in yaml file, + make sure the name is in supported slim model list. + """ + approach_cfg = deep_get(self.cfg, 'quantization.approach') + if not self.framework: + self.framework = get_model_fwk_name(user_model) + if self.framework == 'tensorflow' and approach_cfg == 'quant_aware_training': + if type(user_model) == str: + self._model = TensorflowQATModel(user_model) + else: + self._model = TensorflowQATModel(user_model._model) + else: + Component.model.__set__(self, user_model) + def __repr__(self): """Return the class string.""" return 'Quantization' diff --git a/neural_compressor/experimental/scheduler.py b/neural_compressor/experimental/scheduler.py index 25b1b13cf96..d0ef980caa7 100644 --- a/neural_compressor/experimental/scheduler.py +++ b/neural_compressor/experimental/scheduler.py @@ -18,24 +18,20 @@ # limitations under the License. import os -from itertools import permutations -from ..conf.config import Conf from ..utils import logger from .common import Model as NCModel from ..model import BaseModel -from .common import Metric, Postprocess -from ..strategy import STRATEGIES +from ..model.model import get_model_fwk_name from .quantization import Quantization from .pruning import Pruning from .distillation import Distillation from .model_conversion import ModelConversion from .graph_optimization import Graph_Optimization -from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func from .benchmark import Benchmark from .component import Component -from ..conf.dotdict import DotDict, deep_set, deep_get +from ..conf.dotdict import DotDict, deep_set SUPPORTED_COMPONENTS = [ Quantization, diff --git a/neural_compressor/metric/__init__.py b/neural_compressor/metric/__init__.py index 66090298362..2866945f1c3 100644 --- a/neural_compressor/metric/__init__.py +++ b/neural_compressor/metric/__init__.py @@ -15,6 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..experimental.metric import METRICS, BaseMetric, metric_registry -__all__ = ["METRICS", "BaseMetric", "metric_registry"] +"""Intel Neural Compressor Metric.""" + +from .metric import METRICS, Metric, BaseMetric, metric_registry +from os.path import dirname, basename, isfile, join +import glob + +modules = glob.glob(join(dirname(__file__), "*.py")) + +for f in modules: + if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + __import__(basename(f)[:-3], globals(), locals(), level=1) + + +__all__ = ["METRICS", "Metric", "BaseMetric", "metric_registry"] diff --git a/neural_compressor/metric/bleu.py b/neural_compressor/metric/bleu.py new file mode 100644 index 00000000000..9a5e09df572 --- /dev/null +++ b/neural_compressor/metric/bleu.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script for BLEU metric.""" + +import re +import six +import sys +import unicodedata +from typing import List, Sequence + +from .bleu_util import compute_bleu +from .metric import metric_registry + + +class UnicodeRegex(object): + """Ad-hoc hack to recognize all punctuation and symbols. + + Attributes: + nondigit_punct_re: The compiled regular expressions to recognize + punctuation preceded with a digit. + punct_nondigit_re: The compiled regular expressions to recognize + punctuation followed by a digit. + symbol_re: The compiled regular expressions to recognize symbols. + """ + + def __init__(self) -> None: + """Initialize the regular expressions.""" + punctuation = self.property_chars("P") + self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])") + self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])") + self.symbol_re = re.compile("([" + self.property_chars("S") + "])") + + def property_chars(self, prefix: str) -> str: + """Collect all Unicode strings starting with a specific prefix. + + Args: + prefix: The specific prefix. + + Returns: + punctuation: The join result of all Unicode strings starting + with a specific prefix. + """ + punctuation = "".join(six.unichr(x) for x in range(sys.maxunicode) \ + if unicodedata.category(six.unichr(x)).startswith(prefix)) + return punctuation + + +uregex = UnicodeRegex() + + +def bleu_tokenize(string: str) -> List[str]: + """Tokenize a string following the official BLEU implementation. + + See https://github.com/moses-smt/mosesdecoder/" + "blob/master/scripts/generic/mteval-v14.pl#L954-L983 + + Args: + string: The string to be tokenized. + + Returns: + tokens: A list of tokens. + """ + string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string) + string = uregex.punct_nondigit_re.sub(r" \1 \2", string) + string = uregex.symbol_re.sub(r" \1 ", string) + tokens = string.split() + return tokens + + +@metric_registry('BLEU', 'tensorflow, tensorflow_itex') +class BLEU(object): + """Computes the BLEU (Bilingual Evaluation Understudy) score. + + BLEU is an algorithm for evaluating the quality of text which has + been machine-translated from one natural language to another. + This implementent approximate the BLEU score since we do not + glue word pieces or decode the ids and tokenize the output. + By default, we use ngram order of 4 and use brevity penalty. + Also, this does not have beam search. + + Attributes: + predictions: List of translations to score. + labels: List of the reference corresponding to the prediction result. + """ + + def __init__(self) -> None: + """Initialize predictions and labels.""" + self.predictions = [] + self.labels = [] + + def reset(self) -> None: + """Clear the predictions and labels in the cache.""" + self.predictions = [] + self.labels = [] + + def update(self, prediction: Sequence[str], label: Sequence[str]) -> None: + """Add the prediction and label. + + Args: + prediction: The prediction result. + label: The reference corresponding to the prediction result. + + Raises: + ValueError: An error occurred when the length of the prediction + and label are different. + """ + if len(label) != len(prediction): + raise ValueError("Reference and prediction files have different number " + "of lines. If training only a few steps (100-200), the " + "translation may be empty.") + label = [x.lower() for x in label] + prediction = [x.lower() for x in prediction] + label = [bleu_tokenize(x) for x in label] + prediction = [bleu_tokenize(x) for x in prediction] + self.labels.extend(label) + self.predictions.extend(prediction) + + def result(self) -> float: + """Compute the BLEU score. + + Returns: + bleu_score: The approximate BLEU score. + """ + bleu_score = compute_bleu(self.labels, self.predictions) * 100 + return bleu_score diff --git a/neural_compressor/metric/bleu_util.py b/neural_compressor/metric/bleu_util.py new file mode 100644 index 00000000000..875321f4dd3 --- /dev/null +++ b/neural_compressor/metric/bleu_util.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to compute BLEU score. + +Source: +https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import math + +import numpy as np + +from neural_compressor.utils.utility import LazyImport +from six.moves import xrange # pylint: disable=redefined-builtin +from typing import List, Sequence, Union + +tf = LazyImport('tensorflow') + +def _get_ngrams_with_counter(segment: Sequence[str], + max_order: List[int]) -> collections.Counter: + """Extract all n-grams up to a given maximum order from an input segment. + + Args: + segment: The text segment from which n-grams will be extracted. + max_order: The maximum length in tokens of the n-grams returned + by this methods. + + Returns: + ngram_counts: The Counter containing all n-grams up to max_order + in segment with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in xrange(1, max_order + 1): + for i in xrange(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts + + +def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]], + translation_corpus: Sequence[str], + max_order: int = 4, + use_bp: bool = True) -> float: + """Compute the BLEU score of translated segments against its references. + + Args: + reference_corpus: List of references for each translation. + Each reference should be tokenized into a list of tokens. + translation_corpus: List of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + use_bp: The flag to decide whether to apply brevity penalty. + + Returns: + bleu_score: The approximate BLEU score. + """ + reference_length = 0 + translation_length = 0 + bp = 1.0 + geo_mean = 0 + + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + precisions = [] + + for (references, translations) in zip(reference_corpus, translation_corpus): + reference_length += len(references) + translation_length += len(translations) + ref_ngram_counts = _get_ngrams_with_counter(references, max_order) + translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) + + overlap = dict((ngram, + min(count, translation_ngram_counts[ngram])) + for ngram, count in ref_ngram_counts.items()) + + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for ngram in translation_ngram_counts: + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ + ngram] + + precisions = [0] * max_order + smooth = 1.0 + + for i in xrange(0, max_order): + if possible_matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ + i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if max(precisions) > 0: + p_log_sum = sum(math.log(p) for p in precisions if p) + geo_mean = math.exp(p_log_sum / max_order) + + if use_bp: + ratio = translation_length / reference_length + bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bleu_score = np.float32(geo_mean * bp) + return bleu_score diff --git a/neural_compressor/metric/coco_label_map.py b/neural_compressor/metric/coco_label_map.py new file mode 100644 index 00000000000..82327cb6ce1 --- /dev/null +++ b/neural_compressor/metric/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} diff --git a/neural_compressor/metric/coco_tools.py b/neural_compressor/metric/coco_tools.py new file mode 100644 index 00000000000..93d7a34b231 --- /dev/null +++ b/neural_compressor/metric/coco_tools.py @@ -0,0 +1,713 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list diff --git a/neural_compressor/metric/evaluate_squad.py b/neural_compressor/metric/evaluate_squad.py new file mode 100644 index 00000000000..20fedd74538 --- /dev/null +++ b/neural_compressor/metric/evaluate_squad.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Official evaluation script for v1.1 of the SQuAD dataset. + +From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py +""" + +from __future__ import print_function +import sys +from collections import Counter +from .f1 import normalize_answer + + +def f1_score(prediction, ground_truth): + """Calculate the F1 score of the prediction and the ground_truth. + + Args: + prediction: The predicted result. + ground_truth: The ground truth. + + Returns: + The F1 score of prediction. Float point number. + """ + prediction_tokens = normalize_answer(prediction).split() + ground_truth_tokens = normalize_answer(ground_truth).split() + common = Counter(prediction_tokens) & Counter(ground_truth_tokens) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(ground_truth_tokens) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): + """Calculate the max metric for each ground truth. + + For each answer in ground_truths, evaluate the metric of prediction with + this answer, and return the max metric. + + Args: + metric_fn: The function to calculate the metric. + prediction: The prediction result. + ground_truths: A list of correct answers. + + Returns: + The max metric. Float point number. + """ + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def exact_match_score(prediction, ground_truth): + """Compute the exact match score between prediction and ground truth. + + Args: + prediction: The result of predictions to be evaluated. + ground_truth: The ground truth. + + Returns: + The exact match score. + """ + return (normalize_answer(prediction) == normalize_answer(ground_truth)) + + +def evaluate(dataset, predictions): + """Evaluate the average F1 score and the exact match score for Question-Answering results. + + Args: + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, + and a list of correct answers. For example: + predictions: The result of predictions to be evaluated. A dict mapping the id of + a question to the predicted answer of the question. + + Returns: + The F1 score and the exact match score. + + """ + f1 = exact_match = total = 0 + for article in dataset: + for paragraph in article['paragraphs']: + for qa in paragraph['qas']: + total += 1 + if qa['id'] not in predictions: + message = 'Unanswered question ' + qa['id'] + \ + ' will receive score 0.' + print(message, file=sys.stderr) + continue + ground_truths = list(map(lambda x: x['text'], qa['answers'])) + prediction = predictions[qa['id']] + exact_match += metric_max_over_ground_truths( + exact_match_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths( + f1_score, prediction, ground_truths) + + exact_match = 100.0 * exact_match / total + f1 = 100.0 * f1 / total + + return {'exact_match': exact_match, 'f1': f1} \ No newline at end of file diff --git a/neural_compressor/metric/f1.py b/neural_compressor/metric/f1.py new file mode 100644 index 00000000000..d6b0811ae3c --- /dev/null +++ b/neural_compressor/metric/f1.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Official evaluation script for v1.1 of the SQuAD dataset. + +From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py +""" + +from collections import Counter, abc +import string +import re +from typing import Any, Callable, Dict, List, TypeVar +from neural_compressor.utils import logger + +def normalize_answer(text: str) -> str: + """Normalize the answer text. + + Lower text, remove punctuation, articles and extra whitespace, + and replace other whitespace (newline, tab, etc.) to space. + + Args: + s: The text to be normalized. + + Returns: + The normalized text. + """ + + def _remove_articles(text): + return re.sub(r'\b(a|an|the)\b', ' ', text) + + def _white_space_fix(text): + return ' '.join(text.split()) + + def _remove_punc(text): + exclude = set(string.punctuation) + return ''.join(ch for ch in text if ch not in exclude) + + def _lower(text): + return text.lower() + + return _white_space_fix(_remove_articles(_remove_punc(_lower(text)))) + + +def f1_score(prediction: abc.Sequence, ground_truth: abc.Sequence): + """Calculate the F1 score of the prediction and the ground_truth. + + Args: + prediction: the predicted answer. + ground_truth: the correct answer. + + Returns: + The F1 score of prediction. Float point number. + """ + assert isinstance(prediction, abc.Sequence) and isinstance(ground_truth, abc.Sequence),\ + 'prediction and ground_truth should be Sequence' + common = Counter(prediction) & Counter(ground_truth) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction) + recall = 1.0 * num_same / len(ground_truth) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + +T = TypeVar('T') +def metric_max_over_ground_truths(metric_fn: Callable[[T, T], float], + prediction: str, ground_truths: List[str]) -> float: + """Calculate the max metric for each ground truth. + + For each answer in ground_truths, evaluate the metric of prediction with + this answer, and return the max metric. + + Args: + metric_fn: the function to calculate the metric. + prediction: the prediction result. + ground_truths: the list of correct answers. + + Returns: + The max metric. Float point number. + """ + scores_for_ground_truths = [] + for ground_truth in ground_truths: + prediction_tokens = normalize_answer(prediction).split() + ground_truth_tokens = normalize_answer(ground_truth).split() + score = metric_fn(prediction_tokens, ground_truth_tokens) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + +def evaluate(predictions: Dict[str, str], dataset: List[Dict[str, Any]]) -> float: + """Evaluate the average F1 score of Question-Answering results. + + The F1 score is the harmonic mean of the precision and recall. It can be computed + with the equation: F1 = 2 * (precision * recall) / (precision + recall). + For all question-and-answers in dataset, it evaluates the f1-score + + Args: + predictions: The result of predictions to be evaluated. A dict mapping the id of + a question to the predicted answer of the question. + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, + and a list of correct answers. For example: + + [{'paragraphs': + [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, ...], + 'question': 'Which NFL team represented the AFC at Super Bowl 50?', + 'id': '56be4db0acb8001400a502ec'}]}]}] + + Returns: + The F1 score of this prediction. Float point number in forms of a percentage. + """ + f1 = total = 0 + for article in dataset: + for paragraph in article['paragraphs']: + for qa in paragraph['qas']: + total += 1 + if qa['id'] not in predictions: + message = 'Unanswered question ' + qa['id'] + \ + ' will receive score 0.' + logger.warning(message) + continue + + ground_truths = list(map(lambda x: x['text'], qa['answers'])) + prediction = predictions[qa['id']] + + f1 += metric_max_over_ground_truths( + f1_score, prediction, ground_truths) + + f1 = 100.0 * f1 / total + return f1 diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py new file mode 100644 index 00000000000..a4786e63ab8 --- /dev/null +++ b/neural_compressor/metric/metric.py @@ -0,0 +1,1613 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Neural Compressor metrics.""" + + + +import numpy as np +from abc import abstractmethod +from ctypes import Union +from neural_compressor.utils.utility import LazyImport, singleton +from neural_compressor.utils import logger +from sklearn.metrics import accuracy_score + +torch = LazyImport('torch') +tf = LazyImport('tensorflow') +mx = LazyImport('mxnet') +transformers = LazyImport('transformers') + +class Metric(object): + """A wrapper of the information needed to construct a Metric. + + The metric class should take the outputs of the model as the metric's inputs, + neural_compressor built-in metric always take (predictions, labels) as inputs, it's + recommended to design metric_cls to take (predictions, labels) as inputs. + """ + + def __init__(self, metric_cls, name='user_metric', **kwargs): + """Initialize a Metric with needed information. + + Args: + metric_cls (cls): Should be a sub_class of neural_compressor.metric.BaseMetric, + which takes (predictions, labels) as inputs + name (str, optional): Name for metric. Defaults to 'user_metric'. + """ + self.metric_cls = metric_cls + self.name = name + self.kwargs = kwargs + +@singleton +class TensorflowMetrics(object): + """Tensorflow metrics collection. + + Attributes: + metrics: A dict to maintain all metrics for Tensorflow model. + """ + + def __init__(self) -> None: + """Initialize the metrics collection.""" + self.metrics = {} + self.metrics.update(TENSORFLOW_METRICS) + + +@singleton +class PyTorchMetrics(object): + """PyTorch metrics collection. + + Attributes: + metrics: A dict to maintain all metrics for PyTorch model. + """ + + def __init__(self) -> None: + """Initialize the metrics collection.""" + self.metrics = {} + self.metrics.update(PYTORCH_METRICS) + + +@singleton +class MXNetMetrics(object): + """MXNet metrics collection. + + Attributes: + metrics: A dict to maintain all metrics for MXNet model. + """ + + def __init__(self) -> None: + """Initialize the metrics collection.""" + from neural_compressor.adaptor.mxnet_utils.util import check_mx_version + if check_mx_version('2.0.0'): + import mxnet.gluon.metric as mx_metrics + else: + import mxnet.metric as mx_metrics + self.metrics = { + "Accuracy": WrapMXNetMetric(mx_metrics.Accuracy), + "MAE": WrapMXNetMetric(mx_metrics.MAE), + "MSE": WrapMXNetMetric(mx_metrics.MSE), + "Loss": WrapMXNetMetric(mx_metrics.Loss), + } + self.metrics.update(MXNET_METRICS) + + +@singleton +class ONNXRTQLMetrics(object): + """ONNXRT QLinear metrics collection. + + Attributes: + metrics: A dict to maintain all metrics for ONNXRT QLinear model. + """ + + def __init__(self) -> None: + """Initialize the metrics collection.""" + self.metrics = {} + self.metrics.update(ONNXRT_QL_METRICS) + + +@singleton +class ONNXRTITMetrics(object): + """ONNXRT Integer metrics collection. + + Attributes: + metrics: A dict to maintain all metrics for ONNXRT Integer model. + """ + + def __init__(self) -> None: + """Initialize the metrics collection.""" + self.metrics = {} + self.metrics.update(ONNXRT_IT_METRICS) + + +framework_metrics = {"tensorflow": TensorflowMetrics, + "tensorflow_itex": TensorflowMetrics, + "mxnet": MXNetMetrics, + "pytorch": PyTorchMetrics, + "pytorch_ipex": PyTorchMetrics, + "pytorch_fx": PyTorchMetrics, + "onnxrt_qlinearops": ONNXRTQLMetrics, + "onnxrt_integerops": ONNXRTITMetrics, + "onnxrt_qdq": ONNXRTQLMetrics, + "onnxrt_qoperator": ONNXRTQLMetrics} + +# user/model specific metrics will be registered here +TENSORFLOW_METRICS = {} +TENSORFLOW_ITEX_METRICS = {} +MXNET_METRICS = {} +PYTORCH_METRICS = {} +ONNXRT_QL_METRICS = {} +ONNXRT_IT_METRICS = {} + +registry_metrics = {"tensorflow": TENSORFLOW_METRICS, + "tensorflow_itex": TENSORFLOW_ITEX_METRICS, + "mxnet": MXNET_METRICS, + "pytorch": PYTORCH_METRICS, + "pytorch_ipex": PYTORCH_METRICS, + "pytorch_fx": PYTORCH_METRICS, + "onnxrt_qlinearops": ONNXRT_QL_METRICS, + "onnxrt_qdq": ONNXRT_QL_METRICS, + "onnxrt_integerops": ONNXRT_IT_METRICS, + "onnxrt_qoperator": ONNXRT_QL_METRICS, + } + + +class METRICS(object): + """Intel Neural Compressor Metrics. + + Attributes: + metrics: The collection of registered metrics for the specified framework. + """ + + def __init__(self, framework: str): + """Initialize the metrics collection based on the framework name. + + Args: + framework: The framwork name. + """ + assert framework in ("tensorflow", "tensorflow_itex", + "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", + "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", + "onnxrt_qoperator"), \ + "framework support tensorflow pytorch mxnet onnxrt" + self.metrics = framework_metrics[framework]().metrics + + def __getitem__(self, metric_type: str): + """Get the metric based on the specified type. + + Args: + metric_type: The metric type. + + Returns: + The metric with the specified type. + """ + assert metric_type in self.metrics.keys(), "only support metrics in {}".\ + format(self.metrics.keys()) + + return self.metrics[metric_type] + + def register(self, name, metric_cls) -> None: + """Register a metric. + + Args: + name: The name of metric. + metric_cls: The metric class. + """ + assert name not in self.metrics.keys(), 'registered metric name already exists.' + self.metrics.update({name: metric_cls}) + +def metric_registry(metric_type: str, framework: str): + """Decorate for registering all Metric subclasses. + + The cross-framework metric is supported by specifying the framework param + as one of tensorflow, pytorch, mxnet, onnxrt. + + Args: + metric_type: The metric type. + framework: The framework name. + + Returns: + decorator_metric: The function to register metric class. + """ + + def decorator_metric(cls): + for single_framework in [fwk.strip() for fwk in framework.split(',')]: + assert single_framework in [ + "tensorflow", + "tensorflow_itex", + "mxnet", + "onnxrt_qlinearops", + "onnxrt_integerops", + "onnxrt_qdq", + "onnxrt_qoperator", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + ], "The framework support tensorflow mxnet pytorch onnxrt" + + if metric_type in registry_metrics[single_framework].keys(): + raise ValueError('Cannot have two metrics with the same name') + registry_metrics[single_framework][metric_type] = cls + return cls + return decorator_metric + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output = False, hvd = None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed trainig, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed trainig. + """ + self._hvd = hvd + + +class WrapPyTorchMetric(BaseMetric): + """The wrapper of Metric class for PyTorch.""" + + def update(self, preds, labels=None, sample_weight=None): + """Convert the prediction to torch. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + """ + if self._single_output: + output = torch.as_tensor(preds) + else: + output = (torch.as_tensor(preds), torch.as_tensor(labels)) + self._metric.update(output) + + def reset(self): + """Clear the predictions and labels.""" + self._metric.reset() + + def result(self): + """Evaluate the difference between predictions and labels.""" + return self._metric.compute() + + +class WrapMXNetMetric(BaseMetric): + """The wrapper of Metric class for MXNet.""" + + def update(self, preds, labels=None, sample_weight=None): + """Convert the prediction to MXNet array. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + """ + preds = mx.nd.array(preds) + labels = mx.nd.array(labels) + self._metric.update(labels=labels, preds=preds) + + def reset(self): + """Clear the predictions and labels.""" + self._metric.reset() + + def result(self): + """Evaluate the difference between predictions and labels. + + Returns: + acc: The evaluated result. + """ + acc_name, acc = self._metric.get() + return acc + +class WrapONNXRTMetric(BaseMetric): + """The wrapper of Metric class for ONNXRT.""" + + def update(self, preds, labels=None, sample_weight=None): + """Convert the prediction to NumPy array. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + """ + preds = np.array(preds) + labels = np.array(labels) + self._metric.update(labels=labels, preds=preds) + + def reset(self): + """Clear the predictions and labels.""" + self._metric.reset() + + def result(self): + """Evaluate the difference between predictions and labels. + + Returns: + acc: The evaluated result. + """ + acc_name, acc = self._metric.get() + return acc + +def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, 'labels batch size should same with preds' + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + +def _shape_validate(preds, labels): + assert type(preds) in [int, list, np.ndarray], 'preds must be in int or list, ndarray' + assert type(labels) in [int, list, np.ndarray], 'labels must be in int or list, ndarray' + if isinstance(preds, int): + preds = [np.array([preds])] + elif isinstance(preds[0], int): + preds = [np.array(preds)] + else: + preds = [np.array(pred) for pred in preds] + if isinstance(labels, int): + labels = [np.array([labels])] + elif isinstance(labels[0], int): + labels = [np.array(labels)] + else: + labels = [np.array(label) for label in labels] + for (pred, label) in zip(preds, labels): + assert pred.shape == label.shape, \ + 'Shape mismatch, label shape {} vs pred shape {}'.format(label.shape, pred.shape) + return preds, labels + + +@metric_registry('F1', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +class F1(BaseMetric): + """F1 score of a binary classification problem. + + The F1 score is the harmonic mean of the precision and recall. + It can be computed with the equation: + F1 = 2 * (precision * recall) / (precision + recall) + """ + + def __init__(self): + """Initialize the F1 score list.""" + self._score_list = [] + + def update(self, preds, labels): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + """ + from .f1 import f1_score + if getattr(self, '_hvd', None) is not None: + gathered_preds_list = self._hvd.allgather_object(preds) + gathered_labels_list = self._hvd.allgather_object(labels) + temp_preds_list, temp_labels_list = [], [] + for i in range(0, self._hvd.size()): + temp_preds_list += gathered_preds_list[i] + temp_labels_list += gathered_labels_list[i] + preds = temp_preds_list + labels = temp_labels_list + result = f1_score(preds, labels) + self._score_list.append(result) + + def reset(self): + """Clear the predictions and labels.""" + self._score_list = [] + + def result(self): + """Compute the F1 score.""" + return np.array(self._score_list).mean() + +def _accuracy_shape_check(preds, labels): + """Check and conver the shape of predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + + Returns: + preds: The predictions in the format of NumPy array. + labels: The labels in the format of NumPy array. + """ + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + if len(labels.shape) != len(preds.shape) and len(labels.shape)+1 != len(preds.shape): + raise ValueError( + 'labels must have shape of (batch_size, ..) and preds must have' + 'shape of (batch_size, num_classes, ...) or (batch_size, ..),' + 'but given {} and {}.'.format(labels.shape, preds.shape)) + return preds, labels + +def _accuracy_type_check(preds, labels): + """Determine the type of prediction. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + + Returns: + update_type: The type of predictions. + """ + if len(preds.shape) == len(labels.shape)+1: + num_classes = preds.shape[1] + if num_classes == 1: + update_type = 'binary' + else: + update_type = 'multiclass' + elif len(preds.shape) == len(labels.shape): + if len(preds.shape) == 1 or preds.shape[1] ==1: + update_type = 'binary' + else: + update_type = 'multilabel' + return update_type + + +@metric_registry('Accuracy', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +class Accuracy(BaseMetric): + """The Accuracy for the classification tasks. + + The accuracy score is the proportion of the total number of predictions + that were correct classified. + + Attributes: + pred_list: List of prediction to score. + label_list: List of labels to score. + sample: The total number of samples. + """ + + def __init__(self): + """Initialize predictions, labels and sample.""" + self.pred_list = [] + self.label_list = [] + self.sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _accuracy_shape_check(preds, labels) + update_type = _accuracy_type_check(preds, labels) + if update_type == 'binary': + self.pred_list.extend(preds) + self.label_list.extend(labels) + self.sample += labels.shape[0] + elif update_type == 'multiclass': + self.pred_list.extend(np.argmax(preds, axis=1).astype('int32')) + self.label_list.extend(labels) + self.sample += labels.shape[0] + elif update_type == 'multilabel': + #(N, C, ...) -> (N*..., C) + num_label = preds.shape[1] + last_dim = len(preds.shape) + if last_dim-1 != 1: + trans_list = [0] + trans_list.extend(list(range(2, len(preds.shape)))) + trans_list.extend([1]) + preds = preds.transpose(trans_list).reshape(-1, num_label) + labels = labels.transpose(trans_list).reshape(-1, num_label) + self.sample += preds.shape[0]*preds.shape[1] + self.pred_list.append(preds) + self.label_list.append(labels) + + def reset(self): + """Clear the predictions and labels.""" + self.pred_list = [] + self.label_list = [] + self.sample = 0 + + def result(self): + """Compute the accuracy.""" + correct_num = np.sum( + np.array(self.pred_list) == np.array(self.label_list)) + if getattr(self, '_hvd', None) is not None: + allghter_correct_num = sum(self._hvd.allgather_object(correct_num)) + allgather_sample = sum(self._hvd.allgather_object(self.sample)) + return allghter_correct_num / allgather_sample + return correct_num / self.sample + + +class PyTorchLoss(): + """A dummy PyTorch Metric. + + A dummy metric that computes the average of predictions and prints it directly. + """ + + def __init__(self): + """Initialize the number of examples, sum of prediction. and device.""" + self._num_examples = 0 + self._device = torch.device('cpu') + self._sum = torch.tensor(0.0, device=self._device) + + def reset(self): + """Reset the number of samples and total cases to zero.""" + self._num_examples = 0 + self._sum = torch.tensor(0.0, device=self._device) + + def update(self, output): + """Add the predictions. + + Args: + output: The predictions. + """ + y_pred, y = output[0].detach(), output[1].detach() + loss = torch.sum(y_pred) + self._sum += loss.to(self._device) + self._num_examples += y.shape[0] + + def compute(self): + """Compute the average of predictions. + + Raises: + ValueError: There must have at least one example. + + Returns: + The dummy loss. + """ + if self._num_examples == 0: + raise ValueError("Loss must have at least one example \ + before it can be computed.") + return self._sum.item() / self._num_examples + + +@metric_registry('Loss', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +class Loss(BaseMetric): + """A dummy Metric. + + A dummy metric that computes the average of predictions and prints it directly. + + Attributes: + sample: The number of samples. + sum: The sum of prediction. + """ + + def __init__(self): + """Initialize the number of samples, sum of prediction.""" + self.sample = 0 + self.sum = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _shape_validate(preds, labels) + self.sample += labels[0].shape[0] + self.sum += sum([np.sum(pred) for pred in preds]) + + def reset(self): + """Reset the number of samples and total cases to zero.""" + self.sample = 0 + self.sum = 0 + + def result(self): + """Compute the average of predictions. + + Returns: + The dummy loss. + """ + if getattr(self, '_hvd', None) is not None: + allgather_sum = sum(self._hvd.allgather_object(self.sum)) + allgather_sample = sum(self._hvd.allgather_object(self.sample)) + return allgather_sum / allgather_sample + return self.sum / self.sample + + +@metric_registry('MAE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +class MAE(BaseMetric): + """Computes Mean Absolute Error (MAE) loss. + + Mean Absolute Error (MAE) is the mean of the magnitude of + difference between the predicted and actual numeric values. + + Attributes: + pred_list: List of prediction to score. + label_list: List of references corresponding to the prediction result. + compare_label (bool): Whether to compare label. False if there are no + labels and will use FP32 preds as labels. + """ + + def __init__(self, compare_label=True): + """Initialize the list of prediction and labels. + + Args: + compare_label: Whether to compare label. False if there are no + labels and will use FP32 preds as labels. + """ + self.label_list = [] + self.pred_list = [] + self.compare_label = compare_label + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _shape_validate(preds, labels) + self.label_list.extend(labels) + self.pred_list.extend(preds) + + def reset(self): + """Clear the predictions and labels.""" + self.label_list = [] + self.pred_list = [] + + def result(self): + """Compute the MAE score. + + Returns: + The MAE score. + """ + aes = [abs(a-b) for (a,b) in zip(self.label_list, self.pred_list)] + aes_sum = sum([np.sum(ae) for ae in aes]) + aes_size = sum([ae.size for ae in aes]) + assert aes_size, "predictions shouldn't be none" + if getattr(self, '_hvd', None) is not None: + aes_sum = sum(self._hvd.allgather_object(aes_sum)) + aes_size = sum(self._hvd.allgather_object(aes_size)) + return aes_sum / aes_size + + +@metric_registry('RMSE', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +class RMSE(BaseMetric): + """Computes Root Mean Squared Error (RMSE) loss. + + Attributes: + mse: The instance of MSE Metric. + + """ + + def __init__(self, compare_label=True): + """Initialize the mse. + + Args: + compare_label (bool): Whether to compare label. False if there are no labels + and will use FP32 preds as labels. + """ + self.mse = MSE(compare_label) + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + self.mse.update(preds, labels, sample_weight) + + def reset(self): + """Clear the predictions and labels.""" + self.mse.reset() + + def result(self): + """Compute the RMSE score. + + Returns: + The RMSE score. + """ + if getattr(self, '_hvd', None) is not None: + self.mse._hvd = self._hvd + return np.sqrt(self.mse.result()) + + + +@metric_registry('MSE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +class MSE(BaseMetric): + """Computes Mean Squared Error (MSE) loss. + + Mean Squared Error(MSE) represents the average of the squares of errors. + For example, the average squared difference between the estimated values + and the actual values. + + Attributes: + pred_list: List of prediction to score. + label_list: List of references corresponding to the prediction result. + compare_label (bool): Whether to compare label. False if there are no labels + and will use FP32 preds as labels. + """ + + def __init__(self, compare_label=True): + """Initialize the list of prediction and labels. + + Args: + compare_label: Whether to compare label. False if there are no + labels and will use FP32 preds as labels. + """ + self.label_list = [] + self.pred_list = [] + self.compare_label = compare_label + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _shape_validate(preds, labels) + self.pred_list.extend(preds) + self.label_list.extend(labels) + + def reset(self): + """Clear the predictions and labels.""" + self.label_list = [] + self.pred_list = [] + + def result(self): + """Compute the MSE score. + + Returns: + The MSE score. + """ + squares = [(a-b)**2.0 for (a,b) in zip(self.label_list, self.pred_list)] + squares_sum = sum([np.sum(square) for square in squares]) + squares_size = sum([square.size for square in squares]) + assert squares_size, "predictions should't be None" + if getattr(self, '_hvd', None) is not None: + squares_sum = sum(self._hvd.allgather_object(squares_sum)) + squares_size = sum(self._hvd.allgather_object(squares_size)) + return squares_sum / squares_size + + +@metric_registry('topk', 'tensorflow, tensorflow_itex') +class TensorflowTopK(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k(predictions=tf.constant(preds, dtype=tf.float32), + targets=tf.constant(labels, dtype=tf.int32), k=self.k) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, '_hvd', None) is not None: + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + +@metric_registry('topk', 'pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +class GeneralTopK(BaseMetric): + """Compute Top-k Accuracy classification score. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = _topk_shape_validate(preds, labels) + preds = preds.argsort()[..., -self.k:] + if self.k == 1: + correct = accuracy_score(preds, labels, normalize=False) + self.num_correct += correct + + else: + for p, l in zip(preds, labels): + # get top-k labels with np.argpartition + # p = np.argpartition(p, -self.k)[-self.k:] + l = l.astype('int32') + if l in p: + self.num_correct += 1 + + self.num_sample += len(labels) + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, '_hvd', None) is not None: + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + +@metric_registry('COCOmAPv2', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from .coco_label_map import category_map + if anno_path: + import os + import yaml + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from .coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from .coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +@metric_registry('mAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') +class TensorflowMAP(BaseMetric): + """Computes mean average precision.""" + + def __init__(self, + anno_path=None, + iou_thrs=0.5, + map_points=0, + map_key='DetectionBoxes_Precision/mAP'): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + """ + from .coco_label_map import category_map + if anno_path: + import os + import yaml + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + if getattr(self, '_hvd', None) is not None: + raise NotImplementedError("Metric TensorflowMAP currently do not support distribued inference.") + + from .coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if len(predicts) == 3: + for bbox, score, cls in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(bbox) + detection['scores'] = np.asarray(score) + detection['classes'] = np.asarray(cls) + detections.append(detection) + elif len(predicts) == 4: + for num, bbox, score, cls in zip(*predicts): + detection = {} + num = int(num) + detection['boxes'] = np.asarray(bbox)[0:num] + detection['scores'] = np.asarray(score)[0:num] + detection['classes'] = np.asarray(cls)[0:num] + detections.append(detection) + else: + raise ValueError("Unsupported prediction format!") + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from .coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +@metric_registry('COCOmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') +class TensorflowCOCOMAP(TensorflowMAP): + """Computes mean average precision using algorithm in COCO.""" + + def __init__(self, + anno_path=None, + iou_thrs=None, + map_points=None, + map_key='DetectionBoxes_Precision/mAP'): + """Initialize the iou threshold and max points. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + """ + super(TensorflowCOCOMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) + self.iou_thrs = '0.5:0.05:0.95' + self.map_points = 101 + +@metric_registry('VOCmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') +class TensorflowVOCMAP(TensorflowMAP): + """Computes mean average precision using algorithm in VOC.""" + + def __init__(self, + anno_path=None, + iou_thrs=None, + map_points=None, + map_key='DetectionBoxes_Precision/mAP'): + """Initialize the iou threshold and max points. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + """ + super(TensorflowVOCMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) + self.iou_thrs = 0.5 + self.map_points = 0 + + +@metric_registry('SquadF1', 'tensorflow, tensorflow_itex') +class SquadF1(BaseMetric): + """Evaluate for v1.1 of the SQuAD dataset.""" + + def __init__(self): + """Initialize the score list.""" + self._score_list = [] # squad metric only work when all data preds collected + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + if preds: + from .evaluate_squad import evaluate + if getattr(self, '_hvd', None) is not None: + gathered_preds_list = self._hvd.allgather_object(preds) + gathered_labels_list = self._hvd.allgather_object(labels) + temp_preds_list, temp_labels_list = [], [] + for i in range(0, self._hvd.size()): + temp_preds_list += gathered_preds_list[i] + temp_labels_list += gathered_labels_list[i] + preds = temp_preds_list + labels = temp_labels_list + result = evaluate(labels, preds) + self._score_list.append(result["f1"]) + + def reset(self): + """Reset the score list.""" + self._score_list = [] + + def result(self): + """Compute F1 score.""" + if len(self._score_list) == 0: + return 0. + return np.array(self._score_list).mean() + +@metric_registry('mIOU', 'tensorflow, tensorflow_itex') +class mIOU(BaseMetric): + """Compute the mean IOU(Intersection over Union) score.""" + + def __init__(self, num_classes=21): + """Initialize the number of classes. + + Args: + num_classes: The number of classes. + """ + self.num_classes = num_classes + self.hist = np.zeros((num_classes, num_classes)) + + def update(self, preds, labels): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + """ + preds = preds.flatten() + labels = labels.flatten() + p_dtype = preds.dtype + l_dtype = labels.dtype + if getattr(self, '_hvd', None) is not None: + preds = self._hvd.allgather_object(preds) + labels = self._hvd.allgather_object(labels) + preds_list, labels_list = np.array([], dtype = p_dtype), np.array([], dtype = l_dtype) + for i in range(self._hvd.size()): + preds_list = np.append(preds_list, preds[i]) + labels_list = np.append(labels_list, labels[i]) + preds, labels = preds_list, labels_list + mask = (labels >= 0) & (labels < self.num_classes) + self.hist += np.bincount( + self.num_classes * labels[mask].astype(int) + + preds[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, + self.num_classes) + + def reset(self): + """Reset the hist.""" + self.hist = np.zeros((self.num_classes, self.num_classes)) + + def result(self): + """Compute mean IOU. + + Returns: + The mean IOU score. + """ + iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - + np.diag(self.hist)) + mean_iu = np.nanmean(iu) + return mean_iu + +@metric_registry('GLUE', 'onnxrt_qlinearops, onnxrt_integerops') +class ONNXRTGLUE(BaseMetric): + """Compute the GLUE score.""" + + def __init__(self, task='mrpc'): + """Initialize the metric. + + Args: + task:The name of the task (Choices: mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli.). + """ + assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ + 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + """ + if getattr(self, '_hvd', None) is not None: + raise NotImplementedError("Metric ONNXRTGLUE currently do not support distribued inference.") + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """Reset the prediction and labels.""" + self.pred_list = None + self.label_list = None + + def result(self): + """Compute the GLUE score.""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +@metric_registry('ROC', 'pytorch') +class ROC(BaseMetric): + """Computes ROC score.""" + + def __init__(self, task='dlrm'): + """Initialize the metric. + + Args: + task:The name of the task (Choices: dlrm, dien, wide_deep.). + """ + assert task in ['dlrm', 'dien', 'wide_deep'], 'Unsupported task type' + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "dlrm": "acc", + "dien": "acc", + "wide_deep": "acc", + } + + def update(self, preds, labels): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + """ + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """Reset the prediction and labels.""" + self.pred_list = None + self.label_list = None + + def result(self): + """Compute the ROC score.""" + import sklearn.metrics + scores = np.squeeze(self.pred_list) + targets = np.squeeze(self.label_list) + roc_auc = sklearn.metrics.roc_auc_score(targets, scores) + acc = sklearn.metrics.accuracy_score(targets, np.round(scores)) + return acc diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index f89686887b7..7742f49c102 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -24,7 +24,9 @@ def fit(model, config=None, eval_func=None, eval_dataloader=None, eval_metric=No assert isinstance(config, MixedPrecisionConfig), "Please provide MixedPrecisionConfig!" conf = Config(quantization=config) converter = MixedPrecision(conf) - converter.precisions = config.extra_precisions + precisions = ["bf16", "fp32"] + precisions = list(set(precisions) - set(config.excluded_precisions)) + converter.precisions = precisions converter.model = model if eval_func is not None: converter.eval_func = eval_func diff --git a/neural_compressor/model/__init__.py b/neural_compressor/model/__init__.py index e0793e43f96..5f84dec2e4c 100644 --- a/neural_compressor/model/__init__.py +++ b/neural_compressor/model/__init__.py @@ -15,7 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .model import MODELS, BaseModel +from .model import MODELS, Model +from .base_model import BaseModel + +__all__ = ["MODELS", "Model", "BaseModel"] -__all__ = ["MODELS", "BaseModel"] diff --git a/neural_compressor/model/keras_model.py b/neural_compressor/model/keras_model.py new file mode 100644 index 00000000000..f0995ceed59 --- /dev/null +++ b/neural_compressor/model/keras_model.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from abc import abstractmethod +from neural_compressor.model.base_model import BaseModel +from neural_compressor.utils.utility import LazyImport +tf = LazyImport('tensorflow') + +class KerasModel(BaseModel): + """Build KerasModel object + + Args: + model (string or keras model object): model path or model object + kwargs (dict): other required parameters + + """ + + def __init__(self, model, **kwargs): + self.component = None + self._model = model + if not isinstance(model, tf.keras.Model): + self._model_object = tf.keras.models.load_model(self._model) + else: + self._model_object = self._model + self._q_config = None + + @property + def q_config(self): + return self._q_config + + @q_config.setter + def q_config(self, q_config): + self._q_config = q_config + + @property + def model(self): + return self._model_object + + @property + def graph_info(self): + ''' return {Node: Node_type} like {'conv0': 'conv2d'} ''' + #(TODO) get the graph info + return None + + @abstractmethod + def save(self, root, *args, **kwargs): + self._model_object.save(root) + + @abstractmethod + def export( + self, + save_path: str, + conf, + ): + pass + + @abstractmethod + def framework(self): + return 'keras' diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 59a87d51a29..b723fa57a7c 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -17,17 +17,20 @@ import copy import os -import shutil import importlib -from abc import abstractmethod -import tempfile import sys -from neural_compressor.utils.utility import LazyImport, compute_sparsity, get_backend -from neural_compressor.utils.utility import version1_lt_version2, version1_gt_version2, version1_gte_version2 +from neural_compressor.utils.utility import LazyImport from neural_compressor.utils import logger from neural_compressor.conf import config as cfg from neural_compressor.model.base_model import BaseModel from neural_compressor.model.onnx_model import ONNXModel +from neural_compressor.model.mxnet_model import MXNetModel +from neural_compressor.model.keras_model import KerasModel +from neural_compressor.model.tensorflow_model import ( + TensorflowBaseModel, + TensorflowModel, + get_model_type + ) TORCH = False if importlib.util.find_spec('torch'): @@ -43,83 +46,21 @@ json = LazyImport('json') np = LazyImport('numpy') -tensor_to_node = lambda s: list(set([x.split(':')[0] for x in s])) - -def get_model_type(model): - """Get mode type - - Args: - model (string or model object): model path or model object - - Returns: - type (string): model type - """ - - from neural_compressor.adaptor.tf_utils.util import is_saved_model_format, is_ckpt_format - if isinstance(model, tf.Graph): - return 'graph' - elif isinstance(model, tf.compat.v1.GraphDef): - return 'graph_def' - elif isinstance(model, tf.keras.Model): - return 'keras' - elif isinstance(model, tf.compat.v1.estimator.Estimator): - return 'estimator' - elif isinstance(model, str): - model = os.path.abspath(os.path.expanduser(model)) - if (model.endswith('.h5') and os.path.isfile(model)): - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' - if (model.endswith('.pb') and os.path.isfile(model)): - if is_saved_model_format(os.path.dirname(model)): - # Warning: TF compatibility issue to load saved model. TF 2.3 keras.load - # can load saved model from TF backend, but TF 2.4 cannot. - try: - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' - else: - return 'saved_model' - except: - # can't use keras load - return 'saved_model' - else: - return 'frozen_pb' - elif model.endswith('.ckpt') and os.path.isfile(model): - return 'slim' - elif os.path.isdir(model): - if is_ckpt_format(model): - return 'checkpoint' - elif is_saved_model_format(model): - # it's very ugly tf version issue, in tf2.3 keras.load can - #batch_size_(batch_size), load saved model from tf backend, but tf2.4 it will crash - try: - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' - else: - return 'saved_model' - except: - # can't use keras load - return 'saved_model' - elif os.path.isfile(model + '.pb'): - return 'frozen_pb' - - raise ValueError('model {} has not recognized model type....'.format(model)) - +MODELS = {'tensorflow': TensorflowModel, + 'tensorflow_itex': TensorflowModel, + 'keras': KerasModel, + 'mxnet': MXNetModel, + 'pytorch': PyTorchModel if TORCH else None, + 'pytorch_ipex': IPEXModel if TORCH else None, + 'pytorch_fx': PyTorchFXModel if TORCH else None, + 'onnxruntime': ONNXModel, + 'onnxrt_qlinearops': ONNXModel, + 'onnxrt_qdq': ONNXModel, + 'onnxrt_integerops': ONNXModel + } def get_model_fwk_name(model): """Detect the input model belongs to which framework - Args: model (string): framework name that supported by Neural Compressor, if there's no available fwk info, then return 'NA'. @@ -133,12 +74,12 @@ def _is_onnxruntime(model): from onnxruntime_extensions import get_library_path so.register_custom_ops_library(get_library_path()) if isinstance(model, str): - ort.InferenceSession(model, so) + ort.InferenceSession(model, so, providers=['CPUExecutionProvider']) else: - ort.InferenceSession(model.SerializeToString(), so) + ort.InferenceSession(model.SerializeToString(), so, providers=['CPUExecutionProvider']) except Exception as e: # pragma: no cover if 'Message onnx.ModelProto exceeds maximum protobuf size of 2GB' in str(e): - logger.warning('Please use model path instead of onnx model object to quantize') + logger.warning('Please use model path instead of onnx model object to quantize') else: logger.warning("If you use an onnx model with custom_ops to do quantiztaion, " "please ensure onnxruntime-extensions is installed") @@ -148,7 +89,12 @@ def _is_onnxruntime(model): def _is_pytorch(model): try: - return 'pytorch' if isinstance(model, torch.nn.Module) else 'NA' + if isinstance(model, torch.nn.Module) or isinstance( + model, torch.fx.GraphModule) or isinstance( + model, torch.jit._script.RecursiveScriptModule): + return 'pytorch' + else: + return 'NA' except: return 'NA' @@ -196,975 +142,35 @@ def _is_mxnet(model): return fwk_name -def validate_graph_node(graph_def, node_names): - """Validate nodes exist in the graph_def - - Args: - graph_def (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object - node_names (list of string): node names to be validated - """ - - if len(node_names) == 0: - return False - all_node_name = [node.name for node in graph_def.node] - for user_name in node_names: - if user_name not in all_node_name: - logger.warn( - str("Node name {} specified in yaml doesn't exist in the model."). - format(user_name)) - return False - return True - -def validate_and_inference_input_output(graph_def, \ - input_tensor_names, output_tensor_names): - """validate and inference the input and output tensor names of graph_def - - Args: - graph_def (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object - input_tensor_names (list of string): input_tensor_names of graph_def - output_tensor_names (list of string): output_tensor_names of graph_def - - Returns: - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names - temp_output_tensor_names = [] - if validate_graph_node(graph_def, tensor_to_node(input_tensor_names)): - input_tensor_names = input_tensor_names - else: - input_tensor_names, temp_output_tensor_names = get_input_output_node_names(graph_def) - - if validate_graph_node(graph_def, tensor_to_node(output_tensor_names)): - output_tensor_names = output_tensor_names - elif temp_output_tensor_names: - output_tensor_names = temp_output_tensor_names - else: - _, output_tensor_names = get_input_output_node_names(graph_def) - - return input_tensor_names, output_tensor_names - -def graph_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with tf.compat.v1.Graph - - Args: - model (tf.compat.v1.Graph): tf.compat.v1.Graph object - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - config = tf.compat.v1.ConfigProto() - config.use_per_session_threads = 1 - config.inter_op_parallelism_threads = 1 - if get_backend() == 'tensorflow_itex': - from tensorflow.core.protobuf import rewriter_config_pb2 - config.graph_options.rewrite_options.constant_folding = \ - rewriter_config_pb2.RewriterConfig.OFF - sess = tf.compat.v1.Session(graph=model, config=config) - - input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ - model.as_graph_def(), input_tensor_names, output_tensor_names) - - return sess, input_tensor_names, output_tensor_names - -def graph_def_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with tf.compat.v1.GraphDef - - Args: - model (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - graph = tf.Graph() - try: - with graph.as_default(): - tf.import_graph_def(model, name='') - except: - input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ - model, input_tensor_names, output_tensor_names) - from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def - from neural_compressor.adaptor.tf_utils.util import strip_unused_nodes - model = fix_ref_type_of_graph_def(model) - input_node_names = tensor_to_node(input_tensor_names) - output_node_names = tensor_to_node(output_tensor_names) - model = strip_unused_nodes(model, input_node_names, output_node_names) - with graph.as_default(): - tf.import_graph_def(model, name='') - - return graph_session(graph, input_tensor_names, output_tensor_names, **kwargs) - -def frozen_pb_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with frozen pb - - Args: - model (string): model path - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - graph_def = tf.compat.v1.GraphDef() - model = model if model.endswith('.pb') else model + '.pb' - with open(model, 'rb') as f: - graph_def.ParseFromString(f.read()) - return graph_def_session(graph_def, input_tensor_names, \ - output_tensor_names, **kwargs) - -def _contains_function_with_implements_attr(saved_model_proto): - meta_graph = saved_model_proto.meta_graphs[0] - for function in meta_graph.graph_def.library.function: - if function.attr.get("_implements", None) or function.attr.get( - "api_implements", None): - return True - return False - -def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_names): - """Load graph_def from saved model with the default serving signature key. - - Args: - saved_model_dir: Directory of the SavedModel. - saved_model_tags: Set of tags identifying the MetaGraphDef within the - SavedModel to analyze. - - Returns: - graph_def: The loaded GraphDef. - input_tensors: List of input tensors. - output_tensors: List of output tensors. - """ - config = tf.compat.v1.ConfigProto() - config.use_per_session_threads = 1 - config.inter_op_parallelism_threads = 1 - if get_backend() == 'tensorflow_itex': - from tensorflow.core.protobuf import rewriter_config_pb2 - config.graph_options.rewrite_options.constant_folding = \ - rewriter_config_pb2.RewriterConfig.OFF - if not os.listdir(os.path.join(model,'variables')): - sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) - loader = tf.compat.v1.saved_model.loader.load(sess, ["serve"], model) - if len(input_tensor_names) == 0: - input_tensor_names = [i.name for _, i in \ - loader.signature_def['serving_default'].inputs.items()] - else: - assert validate_graph_node(\ - sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)), \ - 'tensor names {} not in the graph'.format(input_tensor_names) - - if len(output_tensor_names) == 0: - output_tensor_names = [i.name for _, i in \ - loader.signature_def['serving_default'].outputs.items()] - else: - assert validate_graph_node(\ - sess.graph.as_graph_def(), tensor_to_node(output_tensor_names)), \ - 'tensor names {} not in the graph'.format(output_tensor_names) - - return sess.graph.as_graph_def(), input_tensor_names, output_tensor_names - else: - from tensorflow.python.eager import context - from tensorflow.python.saved_model import load - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - from tensorflow.python.framework.convert_to_constants import \ - convert_variables_to_constants_v2 - from tensorflow.python.training import saver - from tensorflow.core.protobuf import config_pb2 - from tensorflow.python.grappler import tf_optimizer - from tensorflow.core.protobuf import meta_graph_pb2 - _saved_model = load.load(model, [tag_constants.SERVING]) - func = _saved_model.signatures[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] - frozen_func = convert_variables_to_constants_v2(func) - grappler_meta_graph_def = saver.export_meta_graph( - graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) - if len(input_tensor_names) == 0: - input_tensor_names = [i.name.split(':')[0] for i in frozen_func.inputs] - if len(output_tensor_names) == 0: - output_tensor_names = [i.name.split(':')[0] for i in frozen_func.outputs] - # Add a collection 'train_op' so that Grappler knows the outputs. - fetch_collection = meta_graph_pb2.CollectionDef() - for array in frozen_func.inputs + frozen_func.outputs: - fetch_collection.node_list.value.append(array.name) - grappler_meta_graph_def.collection_def["train_op"].CopyFrom( - fetch_collection) - from tensorflow.python.eager import context - grappler_session_config = config_pb2.ConfigProto() - rewrite_options = grappler_session_config.graph_options.rewrite_options - rewrite_options.min_graph_nodes = -1 - opt = tf_optimizer.OptimizeGraph(grappler_session_config, - grappler_meta_graph_def, graph_id=b"tf_graph") - return opt, input_tensor_names, output_tensor_names - -def check_keras_format(model, saved_model_dir): - from tensorflow.python import saved_model - from tensorflow.python.saved_model.load import load - from tensorflow.python.saved_model import save_options - from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info - version = 'saved_model_v2' - try: - saved_model.save( - model, - saved_model_dir, - options=save_options.SaveOptions(save_debug_info=True)) - except: - return 'trackable_object' - saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir) - saved_model_version = saved_model_proto.saved_model_schema_version - if saved_model_version == 0: - return 'saved_model_v1' - if saved_model_version not in [1, 2]: - raise ValueError("SavedModel file format({0}) is not supported".format( - saved_model_version)) - return version - -def get_graph_from_saved_model_v2(saved_model_dir, - input_tensor_names, output_tensor_names): - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - saved_model_exported_names = [ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - ] - saved_model_tags = set([tag_constants.SERVING]) - return load_saved_model(saved_model_dir, saved_model_tags, - input_tensor_names, output_tensor_names) - -def get_graph_from_original_keras_v2(model, output_dir): - from tensorflow.python.eager import def_function - from tensorflow.lite.python.util import trace_model_call - from tensorflow.lite.python.util import model_input_signature - from tensorflow.python.framework import convert_to_constants - from tensorflow.python.framework import dtypes - from tensorflow.lite.python.util import run_graph_optimizations - from tensorflow.lite.python.convert import OpsSet - from tensorflow.lite.python.util import get_grappler_config - input_signature = None - # If the model's call is not a `tf.function`, then we need to first get its - # input signature from `model_input_signature` method. - if not isinstance(model.call, def_function.Function): - input_signature = model_input_signature(model, keep_original_batch_size=False) - - func = trace_model_call(model, input_signature) - concrete_func = func.get_concrete_function() - funcs = [concrete_func] - - frozen_func, graph_def = ( - convert_to_constants.convert_variables_to_constants_v2_as_graph( - funcs[0], lower_control_flow=False)) - - input_tensors = [ - tensor for tensor in frozen_func.inputs - if tensor.dtype != dtypes.resource - ] - output_tensors = frozen_func.outputs - # Grappler will also try to lower while loop into switch merge - # representation which is undesired for Ophints, so we simply remove - # those attributes to prevent Grappler from doing so. - graph = convert_to_constants.disable_lower_using_switch_merge(graph_def) - # Run function inlining optimization to ensure any models generated - # through the from_frozen_graph path have been inlined. - # grappler_config = get_grappler_config(['function']) - # graph_def = run_graph_optimizations( - # graph, - # input_tensors, - # output_tensors, - # config=grappler_config) - input_names = [tensor.name.split(':')[0] for tensor in input_tensors] - output_names = [tensor.name.split(':')[0] for tensor in output_tensors] - return graph_def, input_names, output_names - -def get_graph_from_saved_model_v1(model): - from tensorflow.python.framework import ops - from tensorflow.python.saved_model import constants - from tensorflow.python.client import session - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - from tensorflow.lite.python.convert_saved_model import get_meta_graph_def - from tensorflow.lite.python.convert_saved_model import get_signature_def - from tensorflow.lite.python.convert_saved_model import get_inputs_outputs - saved_model_tags = set([tag_constants.SERVING]) - signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - - meta_graph = get_meta_graph_def(model, saved_model_tags) - signature_def = get_signature_def(meta_graph, signature_key) - inputs, outputs = get_inputs_outputs(signature_def) - # Check SavedModel for assets directory. - collection_def = meta_graph.collection_def - if constants.ASSETS_KEY in collection_def: - raise ValueError("SavedModels with assets/ directory are not supported.") - - from tensorflow.python.saved_model import loader - from tensorflow.python.framework import graph_util as tf_graph_util - graph = ops.Graph() - import tensorflow as tf - with session.Session(graph=graph) as sess: - loader.load(sess, meta_graph.meta_info_def.tags, model) - sess.run(tf.compat.v1.global_variables_initializer()) - sess.run(tf.compat.v1.tables_initializer()) - output_nodes = list(set([output.split(':')[0] for output in outputs])) - node_ops = [node.op for node in graph.as_graph_def().node] - if 'MakeIterator' in node_ops: - output_nodes.append('MakeIterator') - table_ops = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) - # For table initialization - for table_op in table_ops: - output_nodes.append(table_op.name) - if len(table_ops) > 0: - output_nodes.append('init_all_tables') - graph_def = tf_graph_util.convert_variables_to_constants( - sess, graph.as_graph_def(), output_nodes) - return graph_def, inputs, outputs - -def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with keras model - - Args: - model (string or tf.keras.Model): model path or tf.keras.Model object - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - temp_dir = tempfile.mkdtemp() - if tf.version.VERSION > '2.1.0': - if not isinstance(model, tf.keras.Model): - model = tf.keras.models.load_model(model) - keras_format = check_keras_format(model, temp_dir) - if keras_format == 'saved_model_v2': - try: - graph_def, input_names, output_names = get_graph_from_saved_model_v2( - temp_dir, input_tensor_names, output_tensor_names) - if '_FusedBatchNormEx' in [node.op for node in graph_def.node]: - keras_format = 'trackable_object' - except: - keras_format = 'trackable_object' - if keras_format == 'trackable_object': - try: - graph_def, input_names, output_names = get_graph_from_original_keras_v2( - model, temp_dir) - except: - keras_format = 'saved_model_v1' - if keras_format == 'saved_model_v1': - try: - tf.keras.backend.set_learning_phase(0) - graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) - except: - raise ValueError('Not supported keras model type...') - - # tensorflow 1.x use v1 convert method - else: - tf.keras.backend.set_learning_phase(0) - graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) - shutil.rmtree(temp_dir, True) - return graph_def_session(graph_def, input_names, output_names, **kwargs) - -def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with slim model - - Args: - model (string): model path - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - assert version1_lt_version2(tf.version.VERSION, '2.0.0'), 'slim model only used in tensorflow 1.x' - from .nets_factory import TFSlimNetsFactory - factory = TFSlimNetsFactory() - assert 'name' in kwargs, 'model name should be set in slim checkpoint....' - assert kwargs['name'] in factory.default_slim_models, \ - 'only support topology {}'.format(factory.default_slim_models) - net = copy.deepcopy(factory.networks_map[kwargs['name']]) - model_func = net.pop('model') - arg_scope = net.pop('arg_scope')() - inputs_shape = net.pop('input_shape') - kwargs = net - import tf_slim as slim - with tf.Graph().as_default(): - images = tf.compat.v1.placeholder(name='input', dtype=tf.float32, \ - shape=inputs_shape) - with tf.compat.v1.Session() as sess: - with slim.arg_scope(arg_scope) as scope: # pylint: disable=not-context-manager - model_func(images, is_training=False, **kwargs) - graph_def = sess.graph.as_graph_def() - output_tensor_names = output_tensor_names if len(output_tensor_names) > 0 \ - else [graph_def.node[-1].name] - - from tensorflow.python.tools.freeze_graph import freeze_graph_with_def_protos - graph_def = freeze_graph_with_def_protos( - input_graph_def=graph_def, - input_saver_def=None, - input_checkpoint=model, - output_node_names=','.join(output_tensor_names), - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - output_graph='', - clear_devices=True, - initializer_nodes='') - - return graph_def_session(graph_def, ['input'], output_tensor_names) - -def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with ckpt model - - Args: - model (string): model path - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): validated output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - assert output_tensor_names is not None and len(output_tensor_names) > 0, \ - 'outputs should not be None of checkpoint....' - - ckpt_prefix = [os.path.splitext(i)[0] for i in os.listdir(model) \ - if i.endswith(".meta")][0] - - config = tf.compat.v1.ConfigProto() - config.use_per_session_threads = 1 - config.inter_op_parallelism_threads = 1 - if get_backend() == 'tensorflow_itex': - from tensorflow.core.protobuf import rewriter_config_pb2 - config.graph_options.rewrite_options.constant_folding = \ - rewriter_config_pb2.RewriterConfig.OFF - graph = tf.Graph() - sess = tf.compat.v1.Session(graph=graph, config=config) - with graph.as_default(): - saver = tf.compat.v1.train.import_meta_graph(\ - os.path.join(model, ckpt_prefix + '.meta'), clear_devices=True) - - sess.run(tf.compat.v1.global_variables_initializer()) - saver.restore(sess, os.path.join(model, ckpt_prefix)) - - from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names - if validate_graph_node(sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)): - input_tensor_names = input_tensor_names - else: - input_tensor_names, _ = get_input_output_node_names(sess.graph.as_graph_def()) - return sess, input_tensor_names, output_tensor_names - -def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with estimator model - - Args: - model (tf.estimator.Estimator): tf.estimator.Estimator object - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - kwargs (dict): other required parameters, like input_fn - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - - assert 'input_fn' in kwargs, 'input func should be supplied for estimator session....' - with tf.Graph().as_default() as g: - features, input_hooks = model._get_features_from_input_fn( - kwargs['input_fn'], tf.estimator.ModeKeys.PREDICT) - estimator_spec = model._call_model_fn(features, None, - tf.estimator.ModeKeys.PREDICT, model.config) - - if len(output_tensor_names) == 0: - outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ - isinstance(estimator_spec.predictions, dict) else \ - [estimator_spec.predictions.name] - else: - outputs = output_tensor_names - - logger.info("Estimator output tensor names are {}.".format(outputs)) - with tf.compat.v1.Session(graph=g) as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - # Freezing a graph requires output_node_names, which can be found in - # estimator_spec.predictions that contains prediction tensors as a - # dictionary - # When a model uses Iterator, we need to have 'MakeIterator' (default - # name used by TF) in the output_node_names as well. - output_nodes = list(set([output.split(':')[0] for output in outputs])) - if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: - output_nodes.append('MakeIterator') - - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, - g.as_graph_def(), output_nodes) - - return graph_def_session(graph_def, input_tensor_names, outputs) - -def saved_model_session(model, input_tensor_names, output_tensor_names, **kwargs): - """Build session with saved model - - Args: - model (string): model path - input_tensor_names (list of string): input_tensor_names of model - output_tensor_names (list of string): output_tensor_names of model - - Returns: - sess (tf.compat.v1.Session): tf.compat.v1.Session object - input_tensor_names (list of string): validated input_tensor_names - output_tensor_names (list of string): validated output_tensor_names - """ - try: - graph_def, input_names, output_names = get_graph_from_saved_model_v2( - model, input_tensor_names, output_tensor_names) - except: - graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) - assert graph_def is not None, 'Can not parse the saved model...' - return graph_def_session(graph_def, input_names, output_names, **kwargs) - -# it's necessary that a session with input output tensors to run the model -SESSIONS = {'frozen_pb': frozen_pb_session, - 'graph_def': graph_def_session, - 'graph': graph_session, - 'saved_model': saved_model_session, - 'keras': keras_session, - 'checkpoint': checkpoint_session, - 'estimator': estimator_session, - 'slim': slim_session,} - - -class TensorflowBaseModel(BaseModel): - """Build TensorflowBaseModel object - - Args: - model (string or tensorflow model object): model path or model object - kwargs (dict): other required parameters, like input_fn - - """ - - def __init__(self, model, **kwargs): - - self._model = model - self._name = '' - self._weights = None - self.kwargs = kwargs - self._graph_info = {} - self._input_tensor_names = [] - self._output_tensor_names = [] - self._model_type = '' - self._sess = None - self._iter_op = None - self._workspace_path = '' - self._q_config = None - - def framework(self): - return 'tensorflow' - - @property - def name(self): - return self._name - - @name.setter - def name(self, name): - self.kwargs.update({'name': name}) - self._name = name - - @property - def weights(self): - """ Getter to weights """ - return self._weights - - @weights.setter - def weights(self, new_weights): - """ Setter to weights """ - self._weights = new_weights - - @property - def q_config(self): - return self._q_config - - @q_config.setter - def q_config(self, q_config): - self._q_config = q_config - - @property - def workspace_path(self): - return self._workspace_path - - @workspace_path.setter - def workspace_path(self, path): - self._workspace_path = path - - @property - def model_type(self): - return self._model_type - - @model_type.setter - def model_type(self, model_type): - assert model_type in SESSIONS, 'model type not supported....' - self._model_type = model_type - - @property - def model(self): - return self.graph - - @property - def graph_def(self): - return self.graph.as_graph_def() - - @property - def graph_info(self): - self._graph_info = {} - for node in self.graph_def.node: - self._graph_info[node.name] = node.op - return self._graph_info - - @property - def sess(self): - if self._sess is None: - self._load_sess(self._model, **self.kwargs) - return self._sess - - @property - def graph(self): - return self.sess.graph - - @graph_def.setter - def graph_def(self, graph_def): - if self._sess is not None: - self._sess.close() - output_sess = SESSIONS['graph_def'](graph_def,\ - self._input_tensor_names, \ - self._output_tensor_names) - - self._sess = output_sess[0] - self._input_tensor_names = output_sess[1] - self._output_tensor_names = output_sess[2] - self.model_type = 'graph_def' - - def _load_sess(self, model, **kwargs): - if self.name: - kwargs.update({'name': self.name}) - # assert self.model_type, 'model type not set....' - output_sess = SESSIONS[self.model_type](model, - self._input_tensor_names, \ - self._output_tensor_names, - **kwargs) - self._sess = output_sess[0] - self._input_tensor_names = output_sess[1] - self._output_tensor_names = output_sess[2] - - tf.compat.v1.get_variable_scope().reuse_variables() - return self._sess - - @property - def iter_op(self): - self._iter_op = [] - if self._sess is None: - self._load_sess(self._model, **self.kwargs) - op_list = [node.op for node in self._sess.graph.as_graph_def().node] - if 'MakeIterator' in op_list: - self._iter_op.append(self._sess.graph.get_operation_by_name('MakeIterator')) - return self._iter_op - - @property - def input_tensor_names(self): - if self._sess is None: - self._load_sess(self._model, **self.kwargs) - return copy.deepcopy(self._input_tensor_names) - - @input_tensor_names.setter - def input_tensor_names(self, tensor_names): - if len(tensor_names) == 0: - logger.warn("Input tensor names is empty.") - return - if self._sess is not None: - assert validate_graph_node(\ - self.graph_def, tensor_to_node(tensor_names)), \ - 'tensor names {} not in graph'.format(tensor_names) - self._input_tensor_names = tensor_names - - @property - def output_tensor_names(self): - if len(self._output_tensor_names) == 0: - self._load_sess(self._model, **self.kwargs) - return copy.deepcopy(self._output_tensor_names) - - @output_tensor_names.setter - def output_tensor_names(self, tensor_names): - if len(tensor_names) == 0: - logger.warn("Output tensor names should not be empty.") - return - if self._sess is not None: - assert validate_graph_node(\ - self.graph_def, tensor_to_node(tensor_names)), \ - 'tensor names {} not in graph'.format(tensor_names) - self._output_tensor_names = tensor_names - - # input/output node names and input/output tensor - # come from input/output tensor names, so do not support assign these values - @property - def input_node_names(self): - return copy.deepcopy(tensor_to_node(self.input_tensor_names)) - - @property - def output_node_names(self): - output_node_names = tensor_to_node(self.output_tensor_names) - iter_op_list = self.iter_op - if iter_op_list != []: - output_node_names += [iter_op.name for iter_op in iter_op_list] - return copy.deepcopy(output_node_names) - - @property - def input_tensor(self): - from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name - return [get_tensor_by_name(\ - self.graph, x) for x in self.input_tensor_names] - - @property - def output_tensor(self): - from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name - return [get_tensor_by_name(\ - self.graph, x) for x in self.output_tensor_names] - - def save(self, root=None): - if not root: - root = cfg.default_workspace + '/save.pb' - root = os.path.abspath(os.path.expanduser(root)) - # if not have suffix, default append .pb - os.makedirs(os.path.dirname(root), exist_ok=True) - pb_file = root if os.path.split(root)[-1].endswith('.pb') else root + '.pb' - f = tf.io.gfile.GFile(pb_file, 'wb') - f.write(self.graph_def.SerializeToString()) - logger.info("Save quantized model to {}.".format(pb_file)) - - -class TensorflowSavedModelModel(TensorflowBaseModel): - def get_all_weight_names(self): - import tensorflow as tf - names = [] - for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): - if len(layer.weights): - names.append(index) - return names - - def update_weights(self, tensor_name, new_tensor): - pass - - def get_weight(self, tensor_name): - return self.weights[tensor_name] - - @property - def model(self): - import time - import shutil - root = os.path.abspath(os.path.expanduser(cfg.default_workspace)) - root += str(time.time()) - if os.path.exists(root): - shutil.rmtree(root) - os.makedirs(root, exist_ok=True) - if not self._sess: - self._load_sess(self._model, **self.kwargs) - _, builder = self.build_saved_model(root) - builder.save() - model = tf.saved_model.load(root) - shutil.rmtree(root) - return model - - def report_sparsity(self): - """ Get sparsity of the model - +class Model(object): + """A wrapper of the information needed to construct a Model.""" + + def __new__(cls, root, **kwargs): + """Create a new instance object of Model. Args: - + root (object): raw model format. For Tensorflow model, could be path to frozen pb file, + path to ckpt or savedmodel folder, loaded estimator/graph_def/graph/keras model object. + For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol + or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto + model object. + Returns: - df (DataFrame): DataFrame of sparsity of each weight - total_sparsity (float): total sparsity of model - + BaseModel: neural_compressor built-in model """ - import pandas as pd - import tensorflow as tf - import numpy as np - df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)", - 'Std', 'Mean', 'Abs-Mean']) - pd.set_option('display.precision', 2) - param_dims = [2, 4] - params_size = 0 - sparse_params_size = 0 - for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): - if not len(layer.weights): - continue - # Extract just the actual parameter's name, which in this context we treat - # as its "type" - weights = layer.get_weights()[0] - if weights.ndim in param_dims: - param_size, sparse_param_size, dense_param_size = compute_sparsity( - weights) - density = dense_param_size / param_size - params_size += param_size - sparse_params_size += sparse_param_size - df.loc[len(df.index)] = ([ - index, - list(weights.shape), - dense_param_size, - sparse_param_size, - (1 - density) * 100, - np.std(weights), - np.mean(weights), - np.mean(np.abs(weights)) - ]) - - total_sparsity = sparse_params_size / params_size * 100 - - df.loc[len(df.index)] = ([ - 'Total sparsity:', - params_size, - "-", - int(sparse_params_size), - total_sparsity, - 0, 0, 0]) - - return df, total_sparsity - - def build_saved_model(self, root=None): - if not root: - root = cfg.default_workspace - root = os.path.abspath(os.path.expanduser(root)) - if os.path.exists(root): - import shutil - shutil.rmtree(root) - - os.makedirs(root, exist_ok=True) - - from tensorflow.python.saved_model import signature_constants - from tensorflow.python.saved_model import tag_constants - from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(root) - sigs = {} - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - #(TODO) not directly use self._sess.graph, use self.graph - tf.import_graph_def(self.graph.as_graph_def(), name="") - g = tf.compat.v1.get_default_graph() - inp = [get_tensor_by_name(g, x) for x in self._input_tensor_names] - out = [get_tensor_by_name(g, x) for x in self._output_tensor_names] - sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \ - tf.compat.v1.saved_model.signature_def_utils.predict_signature_def( - {k: v for k, v in zip(self._input_tensor_names, inp)}, - {k: v for k, v in zip(self._output_tensor_names, out)}) - builder.add_meta_graph_and_variables(sess, - [tag_constants.SERVING], - signature_def_map=sigs) - return root, builder - - def save(self, root=None): - root, builder = self.build_saved_model(root) - builder.save() - logger.info("Save quantized model to {}.".format(root)) - - -class TensorflowCheckpointModel(TensorflowBaseModel): - - @property - def graph_def(self): - if self.model_type == 'graph_def': - return self.sess.graph.as_graph_def() - from neural_compressor.adaptor.tf_utils.util import _parse_ckpt_bn_input - from tensorflow.python.framework import graph_util - graph_def = self.sess.graph.as_graph_def() - graph_def = _parse_ckpt_bn_input(graph_def) - return graph_util.convert_variables_to_constants( - sess=self._sess, - input_graph_def=graph_def, - output_node_names=self.output_node_names) - - @graph_def.setter - def graph_def(self, graph_def): - if self._sess is not None: - self._sess.close() - output_sess = SESSIONS['graph_def'](graph_def, - self._input_tensor_names, \ - self._output_tensor_names) - self._sess = output_sess[0] - self._input_tensor_names = output_sess[1] - self._output_tensor_names = output_sess[2] - self.model_type = 'graph_def' + framework = kwargs.get("framework", "NA") + if framework == "NA": + framework = get_model_fwk_name(root) - -TENSORFLOW_MODELS = {'frozen_pb': TensorflowBaseModel, - 'graph_def': TensorflowBaseModel, - 'graph': TensorflowBaseModel, - 'checkpoint': TensorflowCheckpointModel, - 'estimator': TensorflowBaseModel, - 'slim': TensorflowBaseModel, - 'saved_model': TensorflowSavedModelModel, - 'keras': TensorflowSavedModelModel,} - -class TensorflowModel(object): - def __new__(cls, model_type, root, **kwargs): - os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - model = TENSORFLOW_MODELS[model_type](root, **kwargs) - model.model_type = model_type - return model - - -class MXNetModel(BaseModel): - """Build MXNetModel object - - Args: - model (mxnet model): model path - """ - - def __init__(self, model, **kwargs): - #(TODO) MXNet does not support recover model from tuning history currently - self.q_config = None - self._model = model - self.calib_cache = {} - - def framework(self): - return 'mxnet' - - @property - def model(self): - return self._model - - @model.setter - def model(self, model): - self._model = model - - def save(self, root=None): - if root is None: - root = cfg.default_workspace - root = os.path.abspath(os.path.expanduser(root)) - os.makedirs(os.path.dirname(root), exist_ok=True) - - if isinstance(self._model, mx.gluon.HybridBlock): - self._model.export(root, remove_amp_cast=False) - logger.info("Save quantized hybrid block model to {}.".format(root)) + if 'tensorflow' in framework: + if 'modelType' in kwargs: + model_type = kwargs['modelType'] + else: + model_type = get_model_type(root) + model = MODELS['tensorflow'](model_type, root, **kwargs) + elif framework == 'keras': + model = MODELS['keras'](root, **kwargs) + elif framework == 'pytorch': + model = MODELS[framework](root, **kwargs) else: - symnet, args, auxs = self._model - symnet = symnet.as_nd_ndarray() - args = {k:v.as_nd_ndarray() for k, v in args.items()} - auxs = {k:v.as_nd_ndarray() for k, v in auxs.items()} - mx.model.save_checkpoint(root, 0, symnet, args, auxs, remove_amp_cast=False) - logger.info("Save quantized symbol model to {}.".format(root)) - - -MODELS = {'tensorflow': TensorflowModel, - 'tensorflow_itex': TensorflowModel, - 'mxnet': MXNetModel, - 'pytorch': PyTorchModel if TORCH else None, - 'pytorch_ipex': PyTorchIpexModel if TORCH else None, - 'pytorch_fx': PyTorchFXModel if TORCH else None, - 'onnxruntime': ONNXModel, - } + model = MODELS[framework](root, **kwargs) + return model \ No newline at end of file diff --git a/neural_compressor/model/mxnet_model.py b/neural_compressor/model/mxnet_model.py new file mode 100644 index 00000000000..84188b8efc9 --- /dev/null +++ b/neural_compressor/model/mxnet_model.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from neural_compressor.conf import config as cfg +from neural_compressor.utils.utility import LazyImport +from neural_compressor.utils import logger +from .base_model import BaseModel +mx = LazyImport('mxnet') + +class MXNetModel(BaseModel): + """Build MXNetModel object + + Args: + model (mxnet model): model path + """ + + def __init__(self, model, **kwargs): + #(TODO) MXNet does not support recover model from tuning history currently + self.q_config = None + self._model = model + self.calib_cache = {} + + def framework(self): + return 'mxnet' + + @property + def model(self): + return self._model + + @model.setter + def model(self, model): + self._model = model + + def save(self, root=None): + if root is None: + root = cfg.default_workspace + root = os.path.abspath(os.path.expanduser(root)) + os.makedirs(os.path.dirname(root), exist_ok=True) + + if isinstance(self._model, mx.gluon.HybridBlock): + self._model.export(root, remove_amp_cast=False) + logger.info("Save quantized hybrid block model to {}.".format(root)) + else: + symnet, args, auxs = self._model + symnet = symnet.as_nd_ndarray() + args = {k:v.as_nd_ndarray() for k, v in args.items()} + auxs = {k:v.as_nd_ndarray() for k, v in auxs.items()} + mx.model.save_checkpoint(root, 0, symnet, args, auxs, remove_amp_cast=False) + logger.info("Save quantized symbol model to {}.".format(root)) diff --git a/neural_compressor/model/onnx_model.py b/neural_compressor/model/onnx_model.py index 90fcda508c6..a090412b171 100644 --- a/neural_compressor/model/onnx_model.py +++ b/neural_compressor/model/onnx_model.py @@ -154,6 +154,10 @@ def add_initializer(self, tensor): if ortq.find_by_name(tensor.name, self._model.graph.initializer) is None: self._model.graph.initializer.extend([tensor]) + def add_initializers(self, tensors): + for tensor in tensors: + self.add_initializer(tensor) + def get_initializer(self, name): for tensor in self._model.graph.initializer: if tensor.name == name: @@ -423,3 +427,21 @@ def get_nodes_chain(self, start_node, stop_node, result_chain=[]): start_node.append(parent.name) return result_chain + + def export(self, save_path, conf): + from neural_compressor.experimental.export import onnx_qlinear_to_qdq + from neural_compressor.config import ONNXQlinear2QDQConfig + if isinstance(conf, ONNXQlinear2QDQConfig): + add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, + self._input_name_to_nodes) + self.add_nodes(add_nodes) + self.remove_nodes(remove_nodes) + self.add_initializers(inits) + self.update() + self.remove_unused_constant() + self.topological_sort() + self.save(save_path) + else: + logger.warning("Unsupported config for export, " + "only ONNXQlinear2QDQConfig is supported!") + exit(0) diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py new file mode 100644 index 00000000000..8070935a337 --- /dev/null +++ b/neural_compressor/model/tensorflow_model.py @@ -0,0 +1,998 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +import shutil +import importlib +from abc import abstractmethod +import tempfile +import sys +from neural_compressor.utils.utility import LazyImport, compute_sparsity +from neural_compressor.utils.utility import version1_lt_version2, version1_gt_version2, version1_gte_version2 +from neural_compressor.utils import logger +from neural_compressor.conf import config as cfg +from neural_compressor.model.base_model import BaseModel + +tf = LazyImport('tensorflow') +np = LazyImport('numpy') + +tensor_to_node = lambda s: list(set([x.split(':')[0] for x in s])) + +def get_model_type(model): + """Get mode type + Args: + model (string or model object): model path or model object + Returns: + type (string): model type + """ + + from neural_compressor.adaptor.tf_utils.util import is_saved_model_format, is_ckpt_format + if isinstance(model, str): + model = os.path.abspath(os.path.expanduser(model)) + if (model.endswith('.h5') and os.path.isfile(model)) or \ + is_saved_model_format(os.path.dirname(model)) or \ + (os.path.isdir(model) and is_saved_model_format(model)): + if version1_lt_version2(tf.version.VERSION, '2.10.0'): + logger.warn("keras model running on tensorflow 2.10.0 and" + " lower not support intel ITEX.") + try: + model = tf.keras.models.load_model(model) + except: + pass + if isinstance(model, tf.keras.Model) and hasattr(model, 'to_json'): + return 'keras' + if isinstance(model, tf.Graph): + return 'graph' + elif isinstance(model, tf.compat.v1.GraphDef): + return 'graph_def' + elif isinstance(model, tf.compat.v1.estimator.Estimator): + return 'estimator' + elif isinstance(model, str): + model = os.path.abspath(os.path.expanduser(model)) + if (model.endswith('.pb') and os.path.isfile(model)): + if is_saved_model_format(os.path.dirname(model)): + return 'saved_model' + else: + return 'frozen_pb' + elif model.endswith('.ckpt') and os.path.isfile(model): + return 'slim' + elif os.path.isdir(model): + if is_ckpt_format(model): + return 'checkpoint' + elif is_saved_model_format(model): + return 'saved_model' + elif os.path.isfile(model + '.pb'): + return 'frozen_pb' + + raise ValueError('model {} has not recognized model type....'.format(model)) + + + +def validate_graph_node(graph_def, node_names): + """Validate nodes exist in the graph_def + Args: + graph_def (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object + node_names (list of string): node names to be validated + """ + + if len(node_names) == 0: + return False + all_node_name = [node.name for node in graph_def.node] + for user_name in node_names: + if user_name not in all_node_name: + logger.warn( + str("Node name {} specified in yaml doesn't exist in the model."). + format(user_name)) + return False + return True + +def validate_and_inference_input_output(graph_def, \ + input_tensor_names, output_tensor_names): + """validate and inference the input and output tensor names of graph_def + Args: + graph_def (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object + input_tensor_names (list of string): input_tensor_names of graph_def + output_tensor_names (list of string): output_tensor_names of graph_def + Returns: + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names + temp_output_tensor_names = [] + if validate_graph_node(graph_def, tensor_to_node(input_tensor_names)): + input_tensor_names = input_tensor_names + else: + input_tensor_names, temp_output_tensor_names = get_input_output_node_names(graph_def) + + if validate_graph_node(graph_def, tensor_to_node(output_tensor_names)): + output_tensor_names = output_tensor_names + elif temp_output_tensor_names: + output_tensor_names = temp_output_tensor_names + else: + _, output_tensor_names = get_input_output_node_names(graph_def) + + return input_tensor_names, output_tensor_names + +def graph_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with tf.compat.v1.Graph + Args: + model (tf.compat.v1.Graph): tf.compat.v1.Graph object + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + config = tf.compat.v1.ConfigProto() + config.use_per_session_threads = 1 + config.inter_op_parallelism_threads = 1 + sess = tf.compat.v1.Session(graph=model, config=config) + + input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ + model.as_graph_def(), input_tensor_names, output_tensor_names) + + return sess, input_tensor_names, output_tensor_names + +def graph_def_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with tf.compat.v1.GraphDef + Args: + model (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + graph = tf.Graph() + try: + with graph.as_default(): + tf.import_graph_def(model, name='') + except: + input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ + model, input_tensor_names, output_tensor_names) + from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def + from neural_compressor.adaptor.tf_utils.util import strip_unused_nodes + model = fix_ref_type_of_graph_def(model) + input_node_names = tensor_to_node(input_tensor_names) + output_node_names = tensor_to_node(output_tensor_names) + model = strip_unused_nodes(model, input_node_names, output_node_names) + with graph.as_default(): + tf.import_graph_def(model, name='') + + return graph_session(graph, input_tensor_names, output_tensor_names, **kwargs) + +def frozen_pb_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with frozen pb + Args: + model (string): model path + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + graph_def = tf.compat.v1.GraphDef() + model = model if model.endswith('.pb') else model + '.pb' + with open(model, 'rb') as f: + graph_def.ParseFromString(f.read()) + return graph_def_session(graph_def, input_tensor_names, \ + output_tensor_names, **kwargs) + +def _contains_function_with_implements_attr(saved_model_proto): + meta_graph = saved_model_proto.meta_graphs[0] + for function in meta_graph.graph_def.library.function: + if function.attr.get("_implements", None) or function.attr.get( + "api_implements", None): + return True + return False + +def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_names): + """Load graph_def from saved model with the default serving signature key. + Args: + saved_model_dir: Directory of the SavedModel. + saved_model_tags: Set of tags identifying the MetaGraphDef within the + SavedModel to analyze. + Returns: + graph_def: The loaded GraphDef. + input_tensors: List of input tensors. + output_tensors: List of output tensors. + """ + config = tf.compat.v1.ConfigProto() + config.use_per_session_threads = 1 + config.inter_op_parallelism_threads = 1 + if not os.listdir(os.path.join(model,'variables')): + sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) + loader = tf.compat.v1.saved_model.loader.load(sess, ["serve"], model) + if len(input_tensor_names) == 0: + input_tensor_names = [i.name for _, i in \ + loader.signature_def['serving_default'].inputs.items()] + else: + assert validate_graph_node(\ + sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)), \ + 'tensor names {} not in the graph'.format(input_tensor_names) + + if len(output_tensor_names) == 0: + output_tensor_names = [i.name for _, i in \ + loader.signature_def['serving_default'].outputs.items()] + else: + assert validate_graph_node(\ + sess.graph.as_graph_def(), tensor_to_node(output_tensor_names)), \ + 'tensor names {} not in the graph'.format(output_tensor_names) + + return sess.graph.as_graph_def(), input_tensor_names, output_tensor_names + else: + from tensorflow.python.eager import context + from tensorflow.python.saved_model import load + from tensorflow.python.saved_model import tag_constants + from tensorflow.python.saved_model import signature_constants + from tensorflow.python.framework.convert_to_constants import \ + convert_variables_to_constants_v2 + from tensorflow.python.training import saver + from tensorflow.core.protobuf import config_pb2 + from tensorflow.python.grappler import tf_optimizer + from tensorflow.core.protobuf import meta_graph_pb2 + _saved_model = load.load(model, [tag_constants.SERVING]) + func = _saved_model.signatures[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + frozen_func = convert_variables_to_constants_v2(func) + grappler_meta_graph_def = saver.export_meta_graph( + graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) + if len(input_tensor_names) == 0: + input_tensor_names = [i.name.split(':')[0] for i in frozen_func.inputs] + if len(output_tensor_names) == 0: + output_tensor_names = [i.name.split(':')[0] for i in frozen_func.outputs] + # Add a collection 'train_op' so that Grappler knows the outputs. + fetch_collection = meta_graph_pb2.CollectionDef() + for array in frozen_func.inputs + frozen_func.outputs: + fetch_collection.node_list.value.append(array.name) + grappler_meta_graph_def.collection_def["train_op"].CopyFrom( + fetch_collection) + from tensorflow.python.eager import context + grappler_session_config = config_pb2.ConfigProto() + rewrite_options = grappler_session_config.graph_options.rewrite_options + rewrite_options.min_graph_nodes = -1 + opt = tf_optimizer.OptimizeGraph(grappler_session_config, + grappler_meta_graph_def, graph_id=b"tf_graph") + return opt, input_tensor_names, output_tensor_names + +def get_graph_from_saved_model_v2(saved_model_dir, + input_tensor_names, output_tensor_names): + from tensorflow.python.saved_model import tag_constants + from tensorflow.python.saved_model import signature_constants + saved_model_exported_names = [ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + saved_model_tags = set([tag_constants.SERVING]) + return load_saved_model(saved_model_dir, saved_model_tags, + input_tensor_names, output_tensor_names) + +def get_graph_from_original_keras_v2(model, output_dir): + from tensorflow.python.eager import def_function + from tensorflow.lite.python.util import trace_model_call + from tensorflow.lite.python.util import model_input_signature + from tensorflow.python.framework import convert_to_constants + from tensorflow.python.framework import dtypes + from tensorflow.lite.python.util import run_graph_optimizations + from tensorflow.lite.python.convert import OpsSet + from tensorflow.lite.python.util import get_grappler_config + input_signature = None + # If the model's call is not a `tf.function`, then we need to first get its + # input signature from `model_input_signature` method. + if not isinstance(model.call, def_function.Function): + input_signature = model_input_signature(model, keep_original_batch_size=False) + + func = trace_model_call(model, input_signature) + concrete_func = func.get_concrete_function() + funcs = [concrete_func] + + frozen_func, graph_def = ( + convert_to_constants.convert_variables_to_constants_v2_as_graph( + funcs[0], lower_control_flow=False)) + + input_tensors = [ + tensor for tensor in frozen_func.inputs + if tensor.dtype != dtypes.resource + ] + output_tensors = frozen_func.outputs + # Grappler will also try to lower while loop into switch merge + # representation which is undesired for Ophints, so we simply remove + # those attributes to prevent Grappler from doing so. + graph = convert_to_constants.disable_lower_using_switch_merge(graph_def) + # Run function inlining optimization to ensure any models generated + # through the from_frozen_graph path have been inlined. + # grappler_config = get_grappler_config(['function']) + # graph_def = run_graph_optimizations( + # graph, + # input_tensors, + # output_tensors, + # config=grappler_config) + input_names = [tensor.name.split(':')[0] for tensor in input_tensors] + output_names = [tensor.name.split(':')[0] for tensor in output_tensors] + return graph_def, input_names, output_names + +def check_keras_format(model, saved_model_dir): + from tensorflow.python import saved_model + from tensorflow.python.saved_model.load import load + from tensorflow.python.saved_model import save_options + from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info + version = 'saved_model_v2' + try: + saved_model.save( + model, + saved_model_dir, + options=save_options.SaveOptions(save_debug_info=True)) + except: + return 'trackable_object' + saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir) + saved_model_version = saved_model_proto.saved_model_schema_version + if saved_model_version == 0: + return 'saved_model_v1' + if saved_model_version not in [1, 2]: + raise ValueError("SavedModel file format({0}) is not supported".format( + saved_model_version)) + return version + +def get_graph_from_saved_model_v1(model): + from tensorflow.python.framework import ops + from tensorflow.python.saved_model import constants + from tensorflow.python.client import session + from tensorflow.python.saved_model import tag_constants + from tensorflow.python.saved_model import signature_constants + from tensorflow.lite.python.convert_saved_model import get_meta_graph_def + from tensorflow.lite.python.convert_saved_model import get_signature_def + from tensorflow.lite.python.convert_saved_model import get_inputs_outputs + saved_model_tags = set([tag_constants.SERVING]) + signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + + meta_graph = get_meta_graph_def(model, saved_model_tags) + signature_def = get_signature_def(meta_graph, signature_key) + inputs, outputs = get_inputs_outputs(signature_def) + # Check SavedModel for assets directory. + collection_def = meta_graph.collection_def + if constants.ASSETS_KEY in collection_def: + raise ValueError("SavedModels with assets/ directory are not supported.") + + from tensorflow.python.saved_model import loader + from tensorflow.python.framework import graph_util as tf_graph_util + graph = ops.Graph() + import tensorflow as tf + with session.Session(graph=graph) as sess: + loader.load(sess, meta_graph.meta_info_def.tags, model) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.tables_initializer()) + output_nodes = list(set([output.split(':')[0] for output in outputs])) + node_ops = [node.op for node in graph.as_graph_def().node] + if 'MakeIterator' in node_ops: + output_nodes.append('MakeIterator') + table_ops = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) + # For table initialization + for table_op in table_ops: + output_nodes.append(table_op.name) + if len(table_ops) > 0: + output_nodes.append('init_all_tables') + graph_def = tf_graph_util.convert_variables_to_constants( + sess, graph.as_graph_def(), output_nodes) + return graph_def, inputs, outputs + +def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with keras model + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + temp_dir = tempfile.mkdtemp() + if tf.version.VERSION > '2.1.0': + if not isinstance(model, tf.keras.Model): + model = tf.keras.models.load_model(model) + keras_format = check_keras_format(model, temp_dir) + if keras_format == 'saved_model_v2': + try: + graph_def, input_names, output_names = get_graph_from_saved_model_v2( + temp_dir, input_tensor_names, output_tensor_names) + if '_FusedBatchNormEx' in [node.op for node in graph_def.node]: + keras_format = 'trackable_object' + except: + keras_format = 'trackable_object' + if keras_format == 'trackable_object': + try: + graph_def, input_names, output_names = get_graph_from_original_keras_v2( + model, temp_dir) + except: + keras_format = 'saved_model_v1' + if keras_format == 'saved_model_v1': + try: + tf.keras.backend.set_learning_phase(0) + graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) + except: + raise ValueError('Not supported keras model type...') + + # tensorflow 1.x use v1 convert method + else: + tf.keras.backend.set_learning_phase(0) + graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) + shutil.rmtree(temp_dir, True) + return graph_def_session(graph_def, input_names, output_names, **kwargs) + + +def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with slim model + Args: + model (string): model path + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + assert version1_lt_version2(tf.version.VERSION, '2.0.0'), 'slim model only used in tensorflow 1.x' + from .nets_factory import TFSlimNetsFactory + factory = TFSlimNetsFactory() + assert 'name' in kwargs, 'model name should be set in slim checkpoint....' + assert kwargs['name'] in factory.default_slim_models, \ + 'only support topology {}'.format(factory.default_slim_models) + net = copy.deepcopy(factory.networks_map[kwargs['name']]) + model_func = net.pop('model') + arg_scope = net.pop('arg_scope')() + inputs_shape = net.pop('input_shape') + kwargs = net + import tf_slim as slim + with tf.Graph().as_default(): + images = tf.compat.v1.placeholder(name='input', dtype=tf.float32, \ + shape=inputs_shape) + with tf.compat.v1.Session() as sess: + with slim.arg_scope(arg_scope) as scope: # pylint: disable=not-context-manager + model_func(images, is_training=False, **kwargs) + graph_def = sess.graph.as_graph_def() + output_tensor_names = output_tensor_names if len(output_tensor_names) > 0 \ + else [graph_def.node[-1].name] + + from tensorflow.python.tools.freeze_graph import freeze_graph_with_def_protos + graph_def = freeze_graph_with_def_protos( + input_graph_def=graph_def, + input_saver_def=None, + input_checkpoint=model, + output_node_names=','.join(output_tensor_names), + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph='', + clear_devices=True, + initializer_nodes='') + + return graph_def_session(graph_def, ['input'], output_tensor_names) + +def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with ckpt model + Args: + model (string): model path + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): validated output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + assert output_tensor_names is not None and len(output_tensor_names) > 0, \ + 'outputs should not be None of checkpoint....' + + ckpt_prefix = [os.path.splitext(i)[0] for i in os.listdir(model) \ + if i.endswith(".meta")][0] + + config = tf.compat.v1.ConfigProto() + config.use_per_session_threads = 1 + config.inter_op_parallelism_threads = 1 + graph = tf.Graph() + sess = tf.compat.v1.Session(graph=graph, config=config) + with graph.as_default(): + saver = tf.compat.v1.train.import_meta_graph(\ + os.path.join(model, ckpt_prefix + '.meta'), clear_devices=True) + + sess.run(tf.compat.v1.global_variables_initializer()) + saver.restore(sess, os.path.join(model, ckpt_prefix)) + + from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names + if validate_graph_node(sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)): + input_tensor_names = input_tensor_names + else: + input_tensor_names, _ = get_input_output_node_names(sess.graph.as_graph_def()) + return sess, input_tensor_names, output_tensor_names + +def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with estimator model + Args: + model (tf.estimator.Estimator): tf.estimator.Estimator object + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + kwargs (dict): other required parameters, like input_fn + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + + assert 'input_fn' in kwargs, 'input func should be supplied for estimator session....' + with tf.Graph().as_default() as g: + features, input_hooks = model._get_features_from_input_fn( + kwargs['input_fn'], tf.estimator.ModeKeys.PREDICT) + estimator_spec = model._call_model_fn(features, None, + tf.estimator.ModeKeys.PREDICT, model.config) + + if len(output_tensor_names) == 0: + outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ + isinstance(estimator_spec.predictions, dict) else \ + [estimator_spec.predictions.name] + else: + outputs = output_tensor_names + + logger.info("Estimator output tensor names are {}.".format(outputs)) + with tf.compat.v1.Session(graph=g) as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + # Freezing a graph requires output_node_names, which can be found in + # estimator_spec.predictions that contains prediction tensors as a + # dictionary + # When a model uses Iterator, we need to have 'MakeIterator' (default + # name used by TF) in the output_node_names as well. + output_nodes = list(set([output.split(':')[0] for output in outputs])) + if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: + output_nodes.append('MakeIterator') + + graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, + g.as_graph_def(), output_nodes) + + return graph_def_session(graph_def, input_tensor_names, outputs) + +def saved_model_session(model, input_tensor_names, output_tensor_names, **kwargs): + """Build session with saved model + Args: + model (string): model path + input_tensor_names (list of string): input_tensor_names of model + output_tensor_names (list of string): output_tensor_names of model + Returns: + sess (tf.compat.v1.Session): tf.compat.v1.Session object + input_tensor_names (list of string): validated input_tensor_names + output_tensor_names (list of string): validated output_tensor_names + """ + try: + graph_def, input_names, output_names = get_graph_from_saved_model_v2( + model, input_tensor_names, output_tensor_names) + except: + graph_def, input_names, output_names = get_graph_from_saved_model_v1(model) + assert graph_def is not None, 'Can not parse the saved model...' + return graph_def_session(graph_def, input_names, output_names, **kwargs) + +# it's necessary that a session with input output tensors to run the model +SESSIONS = {'frozen_pb': frozen_pb_session, + 'graph_def': graph_def_session, + 'graph': graph_session, + 'saved_model': saved_model_session, + 'keras': keras_session, + 'checkpoint': checkpoint_session, + 'estimator': estimator_session, + 'slim': slim_session,} + + +class TensorflowBaseModel(BaseModel): + """Build TensorflowBaseModel object + Args: + model (string or tensorflow model object): model path or model object + kwargs (dict): other required parameters, like input_fn + """ + + def __init__(self, model, **kwargs): + + self._model = model + self._name = '' + self._weights = None + self.kwargs = kwargs + self._graph_info = {} + self._input_tensor_names = [] + self._output_tensor_names = [] + self._model_type = '' + self._sess = None + self._iter_op = None + self._workspace_path = '' + self._q_config = None + + def framework(self): + return 'tensorflow' + + @property + def name(self): + return self._name + + @name.setter + def name(self, name): + self.kwargs.update({'name': name}) + self._name = name + + @property + def weights(self): + """ Getter to weights """ + return self._weights + + @weights.setter + def weights(self, new_weights): + """ Setter to weights """ + self._weights = new_weights + + @property + def q_config(self): + return self._q_config + + @q_config.setter + def q_config(self, q_config): + self._q_config = q_config + + @property + def workspace_path(self): + return self._workspace_path + + @workspace_path.setter + def workspace_path(self, path): + self._workspace_path = path + + @property + def model_type(self): + return self._model_type + + @model_type.setter + def model_type(self, model_type): + assert model_type in SESSIONS, 'model type not supported....' + self._model_type = model_type + + @property + def model(self): + return self.graph + + @property + def graph_def(self): + return self.graph.as_graph_def() + + @property + def graph_info(self): + self._graph_info = {} + for node in self.graph_def.node: + self._graph_info[node.name] = node.op + return self._graph_info + + @property + def sess(self): + if self._sess is None: + self._load_sess(self._model, **self.kwargs) + return self._sess + + @property + def graph(self): + return self.sess.graph + + @graph_def.setter + def graph_def(self, graph_def): + if self._sess is not None: + self._sess.close() + output_sess = SESSIONS['graph_def'](graph_def,\ + self._input_tensor_names, \ + self._output_tensor_names) + + self._sess = output_sess[0] + self._input_tensor_names = output_sess[1] + self._output_tensor_names = output_sess[2] + self.model_type = 'graph_def' + + def _load_sess(self, model, **kwargs): + if self.name: + kwargs.update({'name': self.name}) + # assert self.model_type, 'model type not set....' + output_sess = SESSIONS[self.model_type](model, + self._input_tensor_names, \ + self._output_tensor_names, + **kwargs) + self._sess = output_sess[0] + self._input_tensor_names = output_sess[1] + self._output_tensor_names = output_sess[2] + + tf.compat.v1.get_variable_scope().reuse_variables() + return self._sess + + @property + def iter_op(self): + self._iter_op = [] + if self._sess is None: + self._load_sess(self._model, **self.kwargs) + op_list = [node.op for node in self._sess.graph.as_graph_def().node] + if 'MakeIterator' in op_list: + self._iter_op.append(self._sess.graph.get_operation_by_name('MakeIterator')) + return self._iter_op + + @property + def input_tensor_names(self): + if self._sess is None: + self._load_sess(self._model, **self.kwargs) + return copy.deepcopy(self._input_tensor_names) + + @input_tensor_names.setter + def input_tensor_names(self, tensor_names): + if len(tensor_names) == 0: + logger.warn("Input tensor names is empty.") + return + if self._sess is not None: + assert validate_graph_node(\ + self.graph_def, tensor_to_node(tensor_names)), \ + 'tensor names {} not in graph'.format(tensor_names) + self._input_tensor_names = tensor_names + + @property + def output_tensor_names(self): + if len(self._output_tensor_names) == 0: + self._load_sess(self._model, **self.kwargs) + return copy.deepcopy(self._output_tensor_names) + + @output_tensor_names.setter + def output_tensor_names(self, tensor_names): + if len(tensor_names) == 0: + logger.warn("Output tensor names should not be empty.") + return + if self._sess is not None: + assert validate_graph_node(\ + self.graph_def, tensor_to_node(tensor_names)), \ + 'tensor names {} not in graph'.format(tensor_names) + self._output_tensor_names = tensor_names + + # input/output node names and input/output tensor + # come from input/output tensor names, so do not support assign these values + @property + def input_node_names(self): + return copy.deepcopy(tensor_to_node(self.input_tensor_names)) + + @property + def output_node_names(self): + output_node_names = tensor_to_node(self.output_tensor_names) + iter_op_list = self.iter_op + if iter_op_list != []: + output_node_names += [iter_op.name for iter_op in iter_op_list] + return copy.deepcopy(output_node_names) + + @property + def input_tensor(self): + from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name + return [get_tensor_by_name(\ + self.graph, x) for x in self.input_tensor_names] + + @property + def output_tensor(self): + from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name + return [get_tensor_by_name(\ + self.graph, x) for x in self.output_tensor_names] + + def save(self, root=None): + if not root: + root = cfg.default_workspace + '/save.pb' + root = os.path.abspath(os.path.expanduser(root)) + # if not have suffix, default append .pb + os.makedirs(os.path.dirname(root), exist_ok=True) + pb_file = root if os.path.split(root)[-1].endswith('.pb') else root + '.pb' + f = tf.io.gfile.GFile(pb_file, 'wb') + f.write(self.graph_def.SerializeToString()) + logger.info("Save quantized model to {}.".format(pb_file)) + + +class TensorflowSavedModelModel(TensorflowBaseModel): + def get_all_weight_names(self): + import tensorflow as tf + names = [] + for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): + if len(layer.weights): + names.append(index) + return names + + def update_weights(self, tensor_name, new_tensor): + pass + + def get_weight(self, tensor_name): + return self.weights[tensor_name] + + @property + def model(self): + import time + import shutil + root = os.path.abspath(os.path.expanduser(cfg.default_workspace)) + root += str(time.time()) + if os.path.exists(root): + shutil.rmtree(root) + os.makedirs(root, exist_ok=True) + if not self._sess: + self._load_sess(self._model, **self.kwargs) + _, builder = self.build_saved_model(root) + builder.save() + model = tf.saved_model.load(root) + shutil.rmtree(root) + return model + + def report_sparsity(self): + """ Get sparsity of the model + Args: + Returns: + df (DataFrame): DataFrame of sparsity of each weight + total_sparsity (float): total sparsity of model + """ + import pandas as pd + import tensorflow as tf + import numpy as np + df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)", + 'Std', 'Mean', 'Abs-Mean']) + pd.set_option('display.precision', 2) + param_dims = [2, 4] + params_size = 0 + sparse_params_size = 0 + for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): + if not len(layer.weights): + continue + # Extract just the actual parameter's name, which in this context we treat + # as its "type" + weights = layer.get_weights()[0] + if weights.ndim in param_dims: + param_size, sparse_param_size, dense_param_size = compute_sparsity( + weights) + density = dense_param_size / param_size + params_size += param_size + sparse_params_size += sparse_param_size + df.loc[len(df.index)] = ([ + index, + list(weights.shape), + dense_param_size, + sparse_param_size, + (1 - density) * 100, + np.std(weights), + np.mean(weights), + np.mean(np.abs(weights)) + ]) + + total_sparsity = sparse_params_size / params_size * 100 + + df.loc[len(df.index)] = ([ + 'Total sparsity:', + params_size, + "-", + int(sparse_params_size), + total_sparsity, + 0, 0, 0]) + + return df, total_sparsity + + def build_saved_model(self, root=None): + if not root: + root = cfg.default_workspace + root = os.path.abspath(os.path.expanduser(root)) + if os.path.exists(root): + import shutil + shutil.rmtree(root) + + os.makedirs(root, exist_ok=True) + + from tensorflow.python.saved_model import signature_constants + from tensorflow.python.saved_model import tag_constants + from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(root) + sigs = {} + with tf.compat.v1.Session(graph=tf.Graph()) as sess: + #(TODO) not directly use self._sess.graph, use self.graph + tf.import_graph_def(self.graph.as_graph_def(), name="") + g = tf.compat.v1.get_default_graph() + inp = [get_tensor_by_name(g, x) for x in self._input_tensor_names] + out = [get_tensor_by_name(g, x) for x in self._output_tensor_names] + sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \ + tf.compat.v1.saved_model.signature_def_utils.predict_signature_def( + {k: v for k, v in zip(self._input_tensor_names, inp)}, + {k: v for k, v in zip(self._output_tensor_names, out)}) + builder.add_meta_graph_and_variables(sess, + [tag_constants.SERVING], + signature_def_map=sigs) + return root, builder + + def save(self, root=None): + root, builder = self.build_saved_model(root) + builder.save() + logger.info("Save quantized model to {}.".format(root)) + +class TensorflowQATModel(TensorflowSavedModelModel): + def __init__(self, model='', **kwargs): + super(TensorflowQATModel, self).__init__(model) + self.keras_model = None + self.model_type = 'keras' + + @property + def model(self): + if self.keras_model == None: + self.keras_model = tf.keras.models.load_model(self._model) + return self.keras_model + + @model.setter + def model(self, q_model): + self.keras_model = q_model + + def save(self, root=None): + if not root: + root = cfg.default_workspace + '/saved_model' + root = os.path.abspath(os.path.expanduser(root)) + # if not have suffix, default append .pb + os.makedirs(os.path.dirname(root), exist_ok=True) + q_aware_model = self.keras_model + q_aware_model.save(root) + saved_format = 'saved_model' + if root.endswith('.h5'): + saved_format = 'h5 file' + logger.info("Save quantized model to {}.".format(saved_format)) + return root + +class TensorflowCheckpointModel(TensorflowBaseModel): + + @property + def graph_def(self): + if self.model_type == 'graph_def': + return self.sess.graph.as_graph_def() + from neural_compressor.adaptor.tf_utils.util import _parse_ckpt_bn_input + from tensorflow.python.framework import graph_util + graph_def = self.sess.graph.as_graph_def() + graph_def = _parse_ckpt_bn_input(graph_def) + return graph_util.convert_variables_to_constants( + sess=self._sess, + input_graph_def=graph_def, + output_node_names=self.output_node_names) + + @graph_def.setter + def graph_def(self, graph_def): + if self._sess is not None: + self._sess.close() + output_sess = SESSIONS['graph_def'](graph_def, + self._input_tensor_names, \ + self._output_tensor_names) + self._sess = output_sess[0] + self._input_tensor_names = output_sess[1] + self._output_tensor_names = output_sess[2] + self.model_type = 'graph_def' + + +TENSORFLOW_MODELS = {'frozen_pb': TensorflowBaseModel, + 'graph_def': TensorflowBaseModel, + 'graph': TensorflowBaseModel, + 'checkpoint': TensorflowCheckpointModel, + 'estimator': TensorflowBaseModel, + 'slim': TensorflowBaseModel, + 'saved_model': TensorflowSavedModelModel, + 'keras': TensorflowSavedModelModel + } + +class TensorflowModel(object): + def __new__(cls, model_type, root, **kwargs): + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + model = TENSORFLOW_MODELS[model_type](root, **kwargs) + model.model_type = model_type + return model diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 42b5cee2d29..5560158358e 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -303,25 +303,13 @@ def workspace_path(self): @workspace_path.setter def workspace_path(self, path): - from ..adaptor.pytorch import _cfg_to_qconfig, _propagate_qconfig + from neural_compressor.utils.pytorch import load workspace_path = path weights_file = os.path.join(os.path.abspath(os.path.expanduser(workspace_path)), 'best_model.pt') assert os.path.exists( weights_file), "weight file %s didn't exist" % weights_file - self._model = copy.deepcopy(self._model.eval()) - stat_dict = torch.load(weights_file) - tune_cfg = stat_dict.pop('best_configure') - op_cfgs = _cfg_to_qconfig(tune_cfg) - _propagate_qconfig(self._model, op_cfgs) - # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in self._model.modules()): - logger.warn("None of the submodule got qconfig applied. Make sure you " - "passed correct configuration through `qconfig_dict` or " - "by assigning the `.qconfig` attribute directly on submodules") - torch.quantization.add_observer_(self._model) - torch.quantization.convert(self._model, inplace=True) - self._model.load_state_dict(stat_dict) + self._model = load(weights_file, self._model) def save(self, root=None): if not root: @@ -576,9 +564,9 @@ def export_to_int8_onnx( model.graph.initializer.append(new_tensor) onnx.save(model, fp32_path) - from neural_compressor.adaptor.onnxrt import ONNXRTAdaptor + from neural_compressor.adaptor.onnxrt import ONNXRUNTIMEAdaptor # pylint: disable=E1120 - inc_model = ONNXRTAdaptor._replace_gemm_with_matmul(model) + inc_model = ONNXRUNTIMEAdaptor._replace_gemm_with_matmul(model) model = inc_model.model onnx.save(model, fp32_path) @@ -661,30 +649,36 @@ def export( save_path: str, conf, ): + from neural_compressor.experimental.export import ( + torch_to_fp32_onnx, + torch_to_int8_onnx + ) if conf.dtype == 'int8': - calib_dataloader = conf.kwargs.pop("calib_dataloader", None) - self.export_to_int8_onnx( - save_path=save_path, - example_inputs=conf.example_inputs, + torch_to_int8_onnx( + self.fp32_model, + self.model, + self.q_config, + save_path, + conf.example_inputs, opset_version=conf.opset_version, dynamic_axes=conf.dynamic_axes, input_names=conf.input_names, output_names=conf.output_names, quant_format=conf.quant_format, dtype='U8S8', - fp32_model=self.fp32_model, - calib_dataloader=calib_dataloader, + recipe=conf.recipe, ) elif conf.dtype == 'fp32': - self.export_to_fp32_onnx( - save_path=save_path, - example_inputs=conf.example_inputs, + torch_to_fp32_onnx( + self.fp32_model, + save_path, + conf.example_inputs, opset_version=conf.opset_version, dynamic_axes=conf.dynamic_axes, input_names=conf.input_names, output_names=conf.output_names, + do_constant_folding=True, verbose=True, - fp32_model=self.fp32_model, ) else: # pragma: no cover assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype) @@ -701,15 +695,15 @@ def __init__(self, model, **kwargs): super(PyTorchFXModel, self).__init__(model, **kwargs) -class PyTorchIpexModel(PyTorchBaseModel): # pragma: no cover - """Build PyTorchIpexModel object +class IPEXModel(PyTorchBaseModel): # pragma: no cover + """Build IPEXModel object Args: model (onnx model): model path """ def __init__(self, model, **kwargs): - super(PyTorchIpexModel, self).__init__(model, **kwargs) + super(IPEXModel, self).__init__(model, **kwargs) self.ipex_config_path = None @property diff --git a/neural_compressor/objective.py b/neural_compressor/objective.py index 81c96117ef9..f373db46c1b 100644 --- a/neural_compressor/objective.py +++ b/neural_compressor/objective.py @@ -18,6 +18,7 @@ from abc import abstractmethod import time import numpy as np +from copy import deepcopy import tracemalloc from .utils.utility import get_size @@ -178,7 +179,7 @@ def __init__(self, objectives, accuracy_criterion, metric_criterion=[True], \ self.objectives = [OBJECTIVES[i]() for i in objectives] self.representation = [str(i).capitalize() for i in self.objectives] - self.baseline = None + self._baseline = None self.val = None if obj_criterion: if len(self.objectives) != len(obj_criterion) and len(obj_criterion) == 1: @@ -192,7 +193,24 @@ def __init__(self, objectives, accuracy_criterion, metric_criterion=[True], \ self.metric_criterion = metric_criterion self.obj_weight = obj_weight self.is_measure = is_measure - + self._accuracy_target = None + + @property + def baseline(self): + return self._baseline + + @baseline.setter + def baseline(self, val): + self._baseline = val + + @property + def accuracy_target(self): + return self._accuracy_target + + @accuracy_target.setter + def accuracy_target(self, val): + self._accuracy_target = val + def compare(self, last, baseline): """The interface of comparing if metric reaches the goal with acceptable accuracy loss. @@ -248,6 +266,49 @@ def compare(self, last, baseline): zip(acc, acc_target, self.metric_criterion)]) else: return False + + def _get_accuracy_target(self): + assert self._baseline is not None, "Baseline is None" + base_acc, _ = self._baseline + if not isinstance(base_acc, list): + base_acc = [base_acc] + if self.metric_weight is not None and len(base_acc) > 1: + base_acc = [np.mean(np.array(base_acc) * self.metric_weight)] + + if self.relative: + if len(base_acc) == 1: + acc_target = [base_acc[0] * (1 - float(self.acc_goal)) if self.higher_is_better \ + else base_acc[0] * (1 + float(self.acc_goal))] + else: + # use metric_criterion to replace acc_criterion + acc_target = [b_acc * (1 - float(self.acc_goal)) if higher_is_better \ + else b_acc * (1 + float(self.acc_goal)) \ + for b_acc, higher_is_better in zip(base_acc, self.metric_criterion)] + else: + if len(base_acc) == 1: + acc_target = [base_acc[0] - float(self.acc_goal) if self.higher_is_better \ + else base_acc[0] + float(self.acc_goal)] + else: + # use metric_criterion to replace acc_criterion + acc_target = [b_acc - float(self.acc_goal) if higher_is_better \ + else b_acc + float(self.acc_goal) \ + for b_acc, higher_is_better in zip(base_acc, self.metric_criterion)] + return acc_target + + def accuracy_meets(self): + last_acc, _ = deepcopy(self.val) + got_better_result = False + if not isinstance(last_acc, list): + last_acc = [last_acc] + + if self.metric_weight is not None and len(last_acc) > 1: + last_acc = [np.mean(np.array(last_acc) * self.metric_weight)] + if not self._accuracy_target: + self.accuracy_target = self._get_accuracy_target() + all_higher = all([_last > _target for _last, _target in zip(last_acc, self.accuracy_target) ]) + all_lower = all([_last < _target for _last, _target in zip(last_acc, self.accuracy_target) ]) + got_better_result = (all_higher and self.higher_is_better) or (all_lower and not self.higher_is_better) + return got_better_result def evaluate(self, eval_func, model): """The interface of calculating the objective. diff --git a/neural_compressor/pruner/README.md b/neural_compressor/pruner/README.md new file mode 100644 index 00000000000..fee44bfde70 --- /dev/null +++ b/neural_compressor/pruner/README.md @@ -0,0 +1,211 @@ +Pruning +============ + + + +1. [Introduction](#introduction) + + + + - [Neural Network Pruning](#neural-network-pruning) + + + + - [Pruning Patterns](#pruning-patterns) + + + + - [Pruning Criteria](#pruning-criteria) + + + + - [Pruning Schedules](#pruning-schedule) + + + + - [Pruning types](#pruning-type) + + + + - [Regularization](#regularization) + + + +2. [Get Started With Pruning API](#get-started-with-pruning-api) + + + +3. [Examples](#examples) + + + + +## Introduction + + + +### Neural Network Pruning +Neural network pruning is a promising model compression technique that removes the least important parameters/neurons in the network and achieves compact architectures with minimal accuracy drop and maximal inference acceleration. As state-of-the-art model sizes have grown at an unprecedented speed, pruning has become increasingly crucial for reducing the computational and memory footprint that huge neural networks require. + + + + +### Pruning Patterns + + + +Pruning patterns defines the rules of pruned weights' arrangements in space. INC currently supports unstructured, N:M and NxM patterns. Please note that N:M pattern is applied to input channels while NxM pattern is applied to output ones. [Details](../../docs/source/pruning_details.md#pruning-patterns). + + + +### Pruning Criteria + + + +Pruning Criteria determines how should the weights of a neural network be scored and pruned. In the image below, pruning scores are represented by neurons' color and those with the lowest scores are pruned. The magnitude and gradient are widely used to score the weights. Currently, INC supports **magnitude**, **gradient**, **snip** and **snip_momentum** criteria; pruning criteria is defined along with pruning type in INC configurations. [Details](../../docs/source/pruning_details.md#pruning-criteria). + + + +### Pruning Schedules + + + +Pruning schedule defines the way the model reach the target sparsity (the ratio of pruned weights). Both **one-shot** and **iterative** pruning schedules are supported. [Details](../../docs/source/pruning_details.md#pruning-schedule). + + + + +### Pruning Types + + + +Pruning type defines how the masks are generated and applied to a neural network. Both **pattern_lock** and **progressive** types are supported by INC. [Details](../../docs/source/pruning_details.md#pruning-type). + + + +### Regularization + + + +Regularization is a technique that discourages learning a more complex model and therefore performs variable-selection. In the image below, some weights are pushed to be as small as possible and the connections are thus sparsified. **Group-lasso** method is used in INC. +[Details](../../docs/source/pruning_details.md#regularization). + + + + +## Get Started with Pruning API + + + +Neural Compressor `Pruning` API is defined under `neural_compressor.pruning`, which takes a user-defined config object as input. +Users can pass the customized training/evaluation functions to `Pruning` in various scenarios. + + + +The following section exemplifies how to use hooks in user pass-in training function to perform model pruning. Through the pruning API, multiple pruner objects are supported in one single Pruning object to enable layer-specific configurations and a default setting is used as a complement. + + + + +```python +from neural_compressor.pruning import Pruning, WeightPruningConfig + +config = WeightPruningConfig( + pruning_configs, # An example of pruning_configs is shown below. + target_sparsity=0.8, start_step=1, end_step=10, pruning_frequency=1 # Default pruning setting. +) +prune = Pruning(config) # Pruning constructor. +prune.model = model # Set model object to prune. +prune.on_train_begin() # Execute on_train_begin hook before training. +for epoch in range(num_train_epochs): + model.train() +    prune.on_epoch_begin(epoch) # Execute on_epoch_begin hook before each epoch. +    for step, batch in enumerate(train_dataloader): +        prune.on_step_begin(step) # Execute on_step_begin hook before each step. +        outputs = model(**batch) +        loss = outputs.loss +        loss.backward() +        prune.on_before_optimizer_step() #Execute on_before_optimizer_step() hook before optimization. +        optimizer.step() + prune.on_after_optimizer_step() #Execute on_after_optimizer_step() hook after optimization. +        scheduler.step()  # Update learning rate schedule +        model.zero_grad() +        prune.on_step_end() # Execute on_step_end hook after each step. + prune.on_epoch_end() # Execute on_epoch_end hook after each epoch. +... +``` + +```python +pruning_configs = [ + { + 'target_sparsity': 0.9, # Target sparsity ratio of modules. + 'pruning_type': "snip_momentum", # Default pruning type. + 'pattern': "4x1", # Default pruning pattern. + 'op_names': ['layer1.*'], # A list of modules that would be pruned. + 'excluded_op_names': ['layer3.*'], # A list of modules that would not be pruned. + 'start_step': 0, # Step at which to begin pruning. + 'end_step': 10, # Step at which to end pruning. + 'pruning_scope': "global", # Default pruning scope. + 'pruning_frequency': 1, # Frequency of applying pruning. + 'min_sparsity_ratio_per_op': 0.0, # Minimum sparsity ratio of each module. + 'max_sparsity_ratio_per_op': 0.98, # Maximum sparsity ratio of each module. + 'sparsity_decay_type': "exp", # Function applied to control pruning rate. + 'pruning_op_types': ['Conv', 'Linear'], # Types of op that would be pruned. + }, + { + "op_names": ['layer3.*'], # A list of modules that would be pruned. + "pruning_type": "snip_momentum_progressive", # Pruning type for the listed ops. + # 'target_sparsity' + } # For layer3, the missing target_sparsity would be complemented by default setting (i.e. 0.8) + ] +``` + + In the case mentioned above, pruning process can be done by pre-defined hooks in Neural Compressor. Users need to place those hooks inside the training function. The pre-defined Neural Compressor hooks are listed below. + + + +``` +on_train_begin() : Execute at the beginning of training phase. +on_epoch_begin(epoch) : Execute at the beginning of each epoch. +on_step_begin(batch) : Execute at the beginning of each batch. +on_step_end() : Execute at the end of each batch. +on_epoch_end() : Execute at the end of each epoch. +on_before_optimizer_step() : Execute before optimization step. +on_after_optimizer_step() : Execute after optimization step. +``` + + + + + + +## Examples + + + +We validate the pruning technique on typical models across various domains (including CV and NLP) and the examples are listed in [Pruning Examples](../../docs/source/pruning_details.md#examples). A complete overview of validated examples including quantization, pruning and distillation results could be found in [INC Validated examples](../../docs/source/validated_model_list.md#validated-pruning-examples). + + +Please refer to pruning examples([PyTorch](../../examples/README.md#Pruning-1)) for more information. + + diff --git a/neural_compressor/pruner/__init__.py b/neural_compressor/pruner/__init__.py new file mode 100644 index 00000000000..d33331cae08 --- /dev/null +++ b/neural_compressor/pruner/__init__.py @@ -0,0 +1,17 @@ +"""prune init.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/neural_compressor/pruner/criteria.py b/neural_compressor/pruner/criteria.py new file mode 100644 index 00000000000..0397fca4c82 --- /dev/null +++ b/neural_compressor/pruner/criteria.py @@ -0,0 +1,188 @@ +"""pruning criterion.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from neural_compressor.utils.utility import LazyImport +torch = LazyImport('torch') + + +CRITERIAS = {} + + +def register_criterion(name): + """Register a criterion to the registry.""" + + def register(criterion): + CRITERIAS[name] = criterion + return criterion + + return register + + +def get_criterion(config, modules): + """Get registered criterion class.""" + name = config["criterion_type"] + if name not in CRITERIAS.keys(): + assert False, f"criteria does not support {name}, currently only support {CRITERIAS.keys()}" + return CRITERIAS[name](modules, config) + + +class PruningCriterion: + """Pruning base criterion. + + Args: + config: A config dict object that includes information about pruner and pruning criterion. + modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. + + Attributes: + scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. + """ + + def __init__(self, modules, config): + """Initiliaze a pruning criterion.""" + self.scores = {} + self.modules = modules + self.config = config + + def on_step_begin(self): + """Calculate and store the pruning scores of pruning modules at the beginning of a step.""" + pass + + def on_after_optimizer_step(self): + """Calculate and store the pruning scores of pruning modules after the optimizer step.""" + pass + + +@register_criterion('magnitude') +class MagnitudeCriterion(PruningCriterion): + """Pruning criterion. + + The magnitude criterion_class is derived from PruningCriterion. + The magnitude value is used to score and determine if a weight is to be pruned. + + Args: + config: A config dict object that includes information about pruner and pruning criterion. + modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. + + Attributes: + scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. + """ + + def __init__(self, modules, config): + """Initiliaze a magnitude pruning criterion.""" + super(MagnitudeCriterion, self).__init__(modules, config) + + def on_step_begin(self): + """Calculate and store the pruning scores based on magtinude criterion.""" + with torch.no_grad(): + for key in self.modules.keys(): + p = self.modules[key].weight.data + self.scores[key] = p + + +@register_criterion('gradient') +class GradientCriterion(PruningCriterion): + """Pruning criterion. + + The gradient criterion_class is derived from PruningCriterion. + The absolute value of gradient is used to score and determine if a weight is to be pruned. + + Args: + config: A config dict object that includes information about pruner and pruning criterion. + modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. + + Attributes: + scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. + """ + + def __init__(self, modules, config): + """Initiliaze a gradient pruning criterion.""" + super(GradientCriterion, self).__init__(modules, config) + + def on_after_optimizer_step(self): + """Calculate and store the pruning scores based on gradient criterion.""" + with torch.no_grad(): + for key in self.modules.keys(): + p = self.modules[key].weight + self.scores[key] = torch.abs(p.grad) + + +@register_criterion('snip') +class SnipCriterion(PruningCriterion): + """Pruning criterion. + + The snip criterion_class is derived from PruningCriterion. + The product of magnitude and gradient is used to score and determine if a weight is to be pruned. + Please refer to SNIP: Single-shot Network Pruning based on Connection Sensitivity. + (https://arxiv.org/abs/1810.02340) + + Args: + config: A config dict object that includes information about pruner and pruning criterion. + modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. + + Attributes: + scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. + """ + + def __init__(self, modules, config): + """Initiliaze a snip pruning criterion.""" + super(SnipCriterion, self).__init__(modules, config) + assert self.config.end_step > 0, "gradient based criterion does not work on step 0" + + def on_after_optimizer_step(self): + """Calculate and store the pruning scores based on snip criterion.""" + ##self.mask_weights() + with torch.no_grad(): + for key in self.modules.keys(): + p = self.modules[key].weight + self.scores[key] = torch.abs(p * p.grad) + + +@register_criterion('snip_momentum') +class SnipMomentumCriterion(PruningCriterion): + """Pruning criterion. + + The snip_momentum criterion_class is derived from PruningCriterion. + A momentum mechanism is used to calculate snip score, which determines if a weight is to be pruned. + + Args: + config: A config dict object that includes information about pruner and pruning criterion. + modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. + alpha: A parameter that determines how much of the snip score is preserved from last pruning step. + beta: A parameter that determines how much of the snip score is updated at the current step. + + Attributes: + scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. + """ + + def __init__(self, modules, config): + """Initiliaze a snip_momentum pruning criterion.""" + super(SnipMomentumCriterion, self).__init__(modules, config) + assert self.config.end_step > 0, "gradient based criterion does not work on step 0" + for key in modules.keys(): + p = modules[key].weight + self.scores[key] = torch.zeros(p.shape).to(p.device) + + self.alpha = 0.9 + self.beta = 1.0 + + def on_after_optimizer_step(self): + """Calculate and store the pruning scores based on snip_momentum criterion.""" + with torch.no_grad(): + for key in self.modules.keys(): + p = self.modules[key].weight + self.scores[key] *= self.alpha + self.scores[key] += self.beta * torch.abs(p * p.grad) diff --git a/neural_compressor/pruner/logger.py b/neural_compressor/pruner/logger.py new file mode 100644 index 00000000000..f39f1198a65 --- /dev/null +++ b/neural_compressor/pruner/logger.py @@ -0,0 +1,23 @@ +"""logger module.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from neural_compressor.utils import logger +except: + import logging + logger = logging.getLogger(__name__) diff --git a/neural_compressor/pruner/patterns.py b/neural_compressor/pruner/patterns.py new file mode 100644 index 00000000000..8ad1d1fb6f0 --- /dev/null +++ b/neural_compressor/pruner/patterns.py @@ -0,0 +1,1110 @@ +"""pruning patterns.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from neural_compressor.utils.utility import LazyImport +torch = LazyImport('torch') +from .logger import logger +from collections import namedtuple + +PATTERNS = {} + + +def register_pattern(name): + """Class decorator used to register a Pattern subclass to the registry. + + Decorator function used before a Pattern subclasses. + Make sure that this Pattern class can be registered in PATTERNS. + + Args: + name: A string. Define the pattern type name which will be included in a pruning process. + + Returns: + cls: The class of register. + """ + + def register(pattern): + """Register patterns.""" + PATTERNS[name] = pattern + return pattern + + return register + + +def get_pattern(config, modules): + """Get registered pattern class. + + Get a Pattern object from PATTERNS. + + Args: + config: A config dict object. Contains the pattern information. + modules: torch neural network modules, which will be pruned with the pattern + + Returns: + A Pattern object. + + Raises: + AssertionError: Currently only support patterns which have been registered in PATTERNS. + """ + name = config.pattern + name = name.split('_')[-1] + if "x" in name: + return PATTERNS["NxM"](config, modules) + if ":" in name: + return PATTERNS["N:M"](config, modules) + assert False, f"currently only support {PATTERNS.keys()}" + + +SparsityInfo = namedtuple("SparsityInfo", ['zero_cnt', 'total_cnt', 'sparsity_ratio']) + + +class BasePattern: + """Pruning Pattern. + + It defines the basic pruning unit and how this unit will be pruned during pruning, e.g. 4x1, 2:4 + + Args: + config: A config dict object. Contains the pattern information. + modules: torch neural network modules, which will be pruned with the pattern + + Attributes: + pattern: A config dict object. The pattern related part in args config. + is_global: A bool. Whether the pruning take global pruning option. + Global pruning means that all pruning layers are gathered to calculate pruning criterion. + Local pruning, on the contrast, means that pruning layers are to calculate criterion individually. + keep_mask_layers:A dict. the layers whose mask will not be updated + invalid_layers: the layers whose shape don't fit the patten + modules: torch neural network modules, which will be pruned with the pattern + config: A config dict object. Contains all the information including the pattern's. + max_sparsity_ratio_per_op: A float. The maximum sparsity that one layer could reach + min_sparsity_ratio_per_op: A float. The minimum sparsity that one layer could reach + target_sparsity: A float. The sparsity ratio of the modules will be reached after pruning. + + """ + + def __init__(self, config, modules): + """Initialize the basic pruning unit of a pattern.""" + self.pattern = config.pattern + self.is_global = config.pruning_scope == "global" + self.keep_mask_layers = {} + self.invalid_layers = [] + self.modules = modules + self.config = config + self.max_sparsity_ratio_per_op = self.config['max_sparsity_ratio_per_op'] + self.min_sparsity_ratio_per_op = self.config['min_sparsity_ratio_per_op'] + self.target_sparsity_ratio = self.config['target_sparsity'] + # Not using deterministic_algorithms for all examples + torch.use_deterministic_algorithms(False) + + def reduce_tensor(self, data, dim): + """Reduce the data along the given dimension. + + Args: + data: The input data + dim: The reduced axis + + Returns: + The reduced tensor + + """ + name = self.config['criterion_reduce_type'] + if name == "mean": + return torch.mean(data, dim=dim) + elif name == "sum": + return torch.sum(data, dim=dim) + elif name == "max": + return torch.max(data, dim=dim)[0] + else: + assert False, "currently only support mean, sum and max reduce type" + + def get_masks(self, scores, target_sparsity_ratio, pre_masks): + """Generate the weight masks according to the weight score and the current target sparsity ratio. + + Args: + scores: A dict{“layer_name”: Tensor}. Store the pruning scores of weights. + target_sparsity_ratio: A float. After pruning, the sparsity of the modules will reach this value. + pre_masks: A dict{"layer_name": Tensor}. The previous masks generated after the last pruning step. + + Returns: + A dict with the identical size as pre_masks. Update the 0/1 values in it. 1 means keep, 0 means drop + + """ + if self.is_global: + return self.get_masks_global(scores, target_sparsity_ratio, pre_masks) + else: + return self.get_masks_local(scores, target_sparsity_ratio, pre_masks) + + def get_masks_global(self, scores, target_sparsity_ratio, pre_masks): + """Generate the weight masks for global pruning, please refer to function get_masks for more information.""" + raise NotImplementedError + + def get_masks_local(self, scores, target_sparsity_ratio, pre_masks): + """Generate the weight masks for local pruning. + + Args: + scores: A dict{“layer_name”: Tensor}. Store the pruning scores of weights. + target_sparsity_ratio: A float. After pruning, the sparsity of the modules will reach this value. + pre_masks: A dict{"layer_name": Tensor}. The previous masks generated after the last pruning step. + + Returns: + A dict with the identical size as pre_masks. Update the 0/1 values in it. 1 means keep, 0 means drop + + """ + masks = {} + if isinstance(self, PatternNxM) and not isinstance(self.block_size, dict): + self.block_size = self.get_block_size_dict(pre_masks) + for key in scores.keys(): + score = {key: scores[key]} + pre_mask = {key: pre_masks[key]} + mask = self.get_masks_global(score, target_sparsity_ratio, pre_mask) + masks[key] = mask[key] + return masks + + def get_single_mask_per_target_ratio(self, score, exact_sparsity_ratio): + """Generate a mask for one layer with the exact_sparsity_ratio. + + Args: + score: A Tensor. the pruning scores of each weight elements. + exact_sparsity_ratio: A float. After pruning, the layer's sparsity will reach this value. + + Returns: + A Tensor with the identical size as score. a new mask. + """ + flattern_score = torch.flatten(score) + k = int(exact_sparsity_ratio * flattern_score.numel()) + threshold, _ = torch.kthvalue(flattern_score, k) + if not k < 1: + zero = torch.tensor([0.]).to(score.device) + one = torch.tensor([1.]).to(score.device) + mask = torch.where(score <= threshold, zero, one) + else: + mask = torch.ones(score.shape, device=score.device) + return mask + + def get_block_size_dict(self, data): + """Get pattern size for each module. + + this is mainly for per-channel pruning when each module has different pruning size + + Args: + data: the input data + + Returns: + To be implemented in subclasses. + """ + raise NotImplementedError + + def get_sparsity_ratio(self, pre_masks, return_dict=False): + """Calculate the zero elements' ratio in pre_masks. + + please be noted that the implementations in subclass are little tricky + TODO: need to refactor this function + + Args: + pre_masks: Dict{"layer_name": Tensor}. The masks generated after the last pruning step. + return_dict: Whether need to return more information like zero_cnt and total_cnt + Returns: + A float. The zero elements' ratio in pre_masks. + """ + zero_cnt = 0 + total_cnt = 0 + for key in pre_masks.keys(): + pre_mask = pre_masks[key] + zero_cnt += torch.sum(pre_mask == 0.0).data.item() + total_cnt += pre_masks[key].numel() ##FIXME + if return_dict: + return {"sparsity_ratio": float(zero_cnt) / total_cnt, "zero_cnt": zero_cnt, "total_cnt": total_cnt} + else: + return float(zero_cnt) / total_cnt + + def get_pattern_lock_masks(self, modules): + """Obtain masks from original weight map according the pattern and weights' zero positions. + + Args: + modules: a dict{“layer_name”: Tensor}. Store weights. + + Returns: + A dict with the identical size as modules, containing pattern lock masks. + """ + pattern_lock_masks = {} + for key in modules.keys(): + weight = modules[key].weight + shape = weight.shape + mask = torch.ones(shape) + mask[weight == 0] = 0.0 + pattern_lock_masks[key] = mask.to(weight.device) + return pattern_lock_masks + + def check_layer_validity(self): + """Check if a layer is valid for this block_size.""" + pass + + def get_reduced_masks_from_data(self, data, key): + """Obtain the unpruned weights and reshape according to the block_size.""" + raise NotImplementedError + + def update_residual_cnt(self, masks, target_sparsity_ratio): + """Update the number of parameters yet to be pruned. + + Args: + masks: the current pruning mask + target_sparsity_ratio: A float. After pruning, the sparsity of the modules will reach this value. + + Returns: + An int. How many weights still need to be pruned to achieve the target sparsity ratio + """ + self.total_params_cnt = self.get_sparsity_ratio(masks, return_dict=True)["total_cnt"] + to_prune_cnt = int(self.total_params_cnt * target_sparsity_ratio) + for key in masks.keys(): + if self.keep_mask_layers.get(key, False): + zero_cnt = self.get_sparsity_ratio({key: masks[key]}, return_dict=True)["zero_cnt"] + to_prune_cnt -= zero_cnt + + return to_prune_cnt + + def get_sparsity_ratio_each_layer(self, masks): + """Calculate the sparsity ratio of each layer. + + TODO: need to refactor this function + + Args: + masks: The current weight masks + + Returns: + infos: the sparsity information for each layer, sparsity_ratio, zero_point and total cnts + SparsityInfo: the sparsity information for the model + """ + infos = {} + zero_cnts = 0 + total_cnts = 0 + for key in masks.keys(): + if key in self.invalid_layers: + continue + reduced_mask = self.get_reduced_masks_from_data(masks[key], key) + zero_cnt = (int(torch.sum(reduced_mask == 0.0).data.item())) + total_cnt = int(reduced_mask.numel()) + sparsity_ratio = float(zero_cnt) / total_cnt + val = SparsityInfo(zero_cnt, total_cnt, sparsity_ratio) + infos[key] = val + zero_cnts += zero_cnt + total_cnts += total_cnt + sparsity_ratio = float(zero_cnts) / total_cnts + return infos, SparsityInfo(zero_cnts, total_cnts, sparsity_ratio) + + def adjust_ratio(self, masks: dict, layer_name: str, key_new_sparsity: SparsityInfo, + max_sparsity_ratio: float, min_sparsity_ratio: float, \ + final_target_sparsity_ratio: float): + """Limits the sparsity of a layer to the set threshold interval. + + Args: + masks: the weight masks + layer_name: the to be examined layer name + key_new_sparsity: the proposal ratio for the layer + max_sparsity_ratio: A float. The maximum sparsity that one layer could reach + min_sparsity_ratio: A float. The minimum sparsity that one layer could reach + final_target_sparsity_ratio: the final target sparsity ratio + + Returns: + A bool indicating if the ratio needs to be adjusted and the adjusted sparsity ratio. + adjust_sparsity_ratio: the ratio adjusted + """ + need_adjust = False + adjust_zero_cnt = key_new_sparsity.zero_cnt + adjust_sparsity_ratio = key_new_sparsity.sparsity_ratio + adjust_total_cnt = key_new_sparsity.total_cnt + + if adjust_sparsity_ratio > max_sparsity_ratio: + need_adjust = True + adjust_sparsity_ratio = max_sparsity_ratio + adjust_zero_cnt = int(adjust_total_cnt * max_sparsity_ratio) + + if adjust_sparsity_ratio < min_sparsity_ratio: + return need_adjust, adjust_sparsity_ratio + + ##TODO no need to calculate each time + infos, net_info = self.get_sparsity_ratio_each_layer(masks) + + any_exceed_target_ratio = False + for key in infos.keys(): + if infos[key].sparsity_ratio > final_target_sparsity_ratio: + any_exceed_target_ratio = True + break + if adjust_sparsity_ratio > final_target_sparsity_ratio: + any_exceed_target_ratio = True + if not any_exceed_target_ratio: + return need_adjust, adjust_sparsity_ratio + + zero_cnt_below_min_sparsity = 0 + total_cnt_below_min_sparsity = 0 + zero_cnt_above_min_sparsity = 0 + for key in infos.keys(): + info = infos[key] + if key == layer_name: + info = SparsityInfo(zero_cnt=adjust_zero_cnt, total_cnt=adjust_total_cnt, + sparsity_ratio=adjust_sparsity_ratio) + if info.sparsity_ratio < min_sparsity_ratio: + zero_cnt_below_min_sparsity += info.zero_cnt + total_cnt_below_min_sparsity += info.total_cnt + else: + zero_cnt_above_min_sparsity += info.zero_cnt + + gap_cnt = int(total_cnt_below_min_sparsity * min_sparsity_ratio) - zero_cnt_below_min_sparsity + remaining_cnt = int(net_info.total_cnt * final_target_sparsity_ratio) \ + - zero_cnt_above_min_sparsity - zero_cnt_below_min_sparsity + if remaining_cnt >= gap_cnt: + return need_adjust, adjust_sparsity_ratio + else: + new_zero_cnt = adjust_zero_cnt - (gap_cnt - remaining_cnt) + new_sparsity_ratio = float(new_zero_cnt) / adjust_total_cnt + ##adjust_zero_cnt = new_zero_cnt + adjust_sparsity_ratio = new_sparsity_ratio + return True, adjust_sparsity_ratio + + +@register_pattern('NxM') +class PatternNxM(BasePattern): + """Pruning Pattern. + + A Pattern class derived from BasePattern. In this pattern, the weights in a NxM block will be pruned or kept + during one pruning step. + + Args: + config: A config dict object. Contains the pattern information. + + Attributes: + block_size: A list of two Integers. The height and width of the block. + Please be aware that the vertical direction of a Linear layer's weight in PyTorch refer to output channel. + Because PyTorch's tensor matmul has a hidden transpose operation. + """ + + def __init__(self, config, modules): + """Initialize the basic pruning unit of NXM pattern.""" + super(PatternNxM, self).__init__(config, modules) + pattern = self.pattern.split('_')[-1] + self.N = pattern.split('x')[0] + self.M = pattern.split('x')[1] + if self.N == "channel": ##channel-wise pruning mode + self.block_size = ["channel", int(self.M)] + elif self.M == "channel": ##channel-wise pruning mode + self.block_size = [int(self.N), "channel"] + else: + self.block_size = [int(pattern.split('x')[0]), int(pattern.split('x')[1])] + self.total_params_cnt = -1 + + self.block_size = self.get_block_size_dict() + self.check_layer_validity() + + def get_block_size_dict(self): + """Calulate the zero elements' ration in pre_masks. + + Args: + data: Dict{"layer_name": Tensor}. Store weights or scores. + + Returns: + A dict. Dict{"layer_name": [block_size_1, block_size_2]}. + Containing layers' corresponding pruning pattern's block shape. + Because in channel-wise pruning different layers can have different pruning patterns. + """ + data = self.modules + block_sizes_dict = {} + if self.N == "channel" or self.M == "channel": + for key in data.keys(): + if isinstance(data[key], torch.nn.Module): + shape = data[key].weight.shape + else: + shape = data[key].shape + if self.N == "channel": + block_sizes_dict[key] = [shape[0], 1] + else: + block_sizes_dict[key] = [1, shape[1]] + return block_sizes_dict + for key in data.keys(): + block_sizes_dict[key] = self.block_size + return block_sizes_dict + + def check_layer_validity(self): + """Check if a layer is valid for this block_size.""" + block_sizes = self.block_size + datas = self.modules + for key in datas.keys(): + data = datas[key].weight + data = self._reshape_orig_to_2dims(data) + shape = data.shape + block_size = block_sizes[key] + if shape[0] % block_size[0] != 0 or shape[1] % block_size[1] != 0: ## only consider input channel + self.invalid_layers.append(key) + logger.warning(f"{key} shape {data.shape} cannot be divided by {self.pattern}") + + def get_reduced_masks_from_data(self, data, key): + """Obtain the unpruned weights and reshape according to the block_size.""" + assert key not in self.invalid_layers + block_size = self.block_size[key] + data = self._reshape_orig_to_2dims(data) + shape = data.shape + new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], block_size[1]] + data = data.reshape(new_shape) + data = data.sum(-1).sum(1) + reduced_mask = data != 0 + return reduced_mask + + def get_sparsity_ratio(self, pre_masks, return_dict=False): + """Please note that the zero cnt and total cnt are all block_wise for supporting channel-wise pruning. + + Args: + pre_masks: Dict{"layer_name": Tensor}. The masks generated after the last pruning step. + + Returns: + A float. Calculate the zero elements' ratio in pre_masks. + """ + zero_cnt = 0 + total_cnt = 0 + for key in pre_masks.keys(): + if key in self.invalid_layers: + continue + reduced_mask = self.get_reduced_masks_from_data(pre_masks[key], key) + zero_cnt += (int(torch.sum(reduced_mask == 0.0).data.item())) + total_cnt += int(reduced_mask.numel()) + if total_cnt == 0: + sparsity_ratio = 0.0 + else: + sparsity_ratio = float(zero_cnt) / total_cnt + if return_dict: + return {"sparsity_ratio": sparsity_ratio, "zero_cnt": zero_cnt, "total_cnt": total_cnt} + else: + return sparsity_ratio + + def get_sparsity_ratio_progressive(self, pre_masks, return_dict=False): + """Calculate the sparsity ratio of each layer.""" + zero_cnt = 0 + total_cnt = 0 + for key in pre_masks.keys(): + if key in self.invalid_layers: + continue + # progressive masks are unstructured, therefore directly find zeros + zero_cnt += float(torch.sum(pre_masks[key] == 0).data.item()) + total_cnt += float(pre_masks[key].numel()) + return (zero_cnt / total_cnt) + + def _reshape_orig_to_2dims(self, data): + """Mainly for processing layer dims not equal to 2, for example conv layer. + + Args: + data: the input + + Returns: + a reshaped data + """ + ##TODO need to verify whether it's ok for transposed conv + if len(data.shape) == 4: + data = data.permute(0, 2, 3, 1) ##cout,k,k,cin + data = data.reshape(data.shape[0], -1) + return data + + def _reshape_2dims_to_orig(self, data, orig_shape): + """Mainly for recover layer dims not equal to 2, for example conv layer. + + Args: + data: input + orig_shape: target shape + + Returns: + a reshaped data + """ + if len(orig_shape) == 4: + data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], + orig_shape[1]) + data = data.permute(0, 3, 1, 2) + return data + + def reshape_orig_to_pattern(self, data, key): + """Reshape the data(s1,s2) to [s1/N,N,s2,s2/M]. + + Args: + data: the input + key: the layer name + + Returns: + The reshaped input tensor. + """ + block_size = self.block_size[key] + data = self._reshape_orig_to_2dims(data) + shape = data.shape + new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], + block_size[1]] + data = data.reshape(new_shape) + return data + + def reshape_reduced_to_orig(self, data, key, orig_shape): + """Reshape the data [s1/N,s2/M] to [s1,s2], also permute dims for conv layer. + + Args: + data: + key: + orig_shape: + + Returns: + Original shape data + """ + block_size = self.block_size[key] + data = data.repeat_interleave(block_size[0], dim=0).repeat_interleave(block_size[1], dim=-1) + data = self._reshape_2dims_to_orig(data, orig_shape) + return data + + def reduce_scores(self, scores): + """Recalculate the pruning scores after reducing the data.""" + new_scores = {} + for key in scores.keys(): + if key in self.invalid_layers: + continue + if self.keep_mask_layers.get(key, False): + continue + self.keep_mask_layers[key] = False + current_score = scores[key] + current_score = self.reshape_orig_to_pattern(current_score, key) + ##sum or mean is quite different for per channel pruning + current_score_sum = self.reduce_tensor(self.reduce_tensor(current_score, dim=-1), dim=1) + new_scores[key] = current_score_sum + return new_scores + + def get_mask_per_threshold(self, score, threshold, block_size): + """Get the mask per threshold.""" + zero = torch.tensor([0.]).to(score.device) + one = torch.tensor([1.]).to(score.device) + mask = torch.where(score <= threshold, zero, one) + mask = mask.repeat_interleave(block_size[0], dim=0).repeat_interleave(block_size[1], dim=-1) + return mask + + def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, + keep_exact_sparsity_ratio=True): + """Generate masks for layers. + + Gather all layer's scores together and calculate a common threshold. + This threshold will be applied for all layers. + + Args: + scores: A dict{“layer_name”: Tensor}. Store the pruning scores of weights. + cur_target_sparsity_ratio: A float. After pruning, the model's sparsity will reach this value. + pre_masks: A dict{"layer_name": Tensor}. The masks generated after the last pruning step. + max_sparsity_ratio_per_op: A float. The maximum sparsity that one layer can reach. + keep_pre_masks: A bool. If True, keep the masks unchanged. + + Returns: + A dict with the identical size as pre_masks. Update the 0/1 values in it. + """ + ##keep the masks if the layer exceed max sparsity ratio + + masks = pre_masks + + k_blockwise = self.update_residual_cnt(masks, cur_target_sparsity_ratio) + if k_blockwise <= 0: + return masks + new_scores = self.reduce_scores(scores) + global_scores = torch.cat([torch.flatten(v) for v in new_scores.values()]) + residual_k = k_blockwise + not_exceed_layers = [key for key in new_scores.keys()] + if self.min_sparsity_ratio_per_op > 0: + sparsity_infos_perlayer, _ = self.get_sparsity_ratio_each_layer(masks) + + while True: + threshold, _ = torch.kthvalue(global_scores, residual_k) + for key in not_exceed_layers: + block_size = self.block_size[key] + score = new_scores[key] + mask = self.get_mask_per_threshold(score, threshold, block_size) + info = self.get_sparsity_ratio({key: mask}, return_dict=True) + zero_cnt = info["zero_cnt"] + total_cnt = info["total_cnt"] + current_sparsity_ratio = float(zero_cnt) / total_cnt + key_new_sparsity = SparsityInfo(zero_cnt, total_cnt, current_sparsity_ratio) + need_adjust, adjust_ratio = self.adjust_ratio(masks, key, key_new_sparsity, + self.max_sparsity_ratio_per_op, + self.min_sparsity_ratio_per_op, + self.target_sparsity_ratio) + if need_adjust: + # uptade status + self.keep_mask_layers[key] = True + masks[key] = self.get_single_mask_per_target_ratio(new_scores[key], adjust_ratio) + masks[key] = masks[key].repeat_interleave(block_size[0], 0).repeat_interleave(block_size[1], -1) + if keep_exact_sparsity_ratio: + zero_cnt = self.get_sparsity_ratio({key: masks[key]}, return_dict=True)["zero_cnt"] + residual_k -= zero_cnt + else: + masks[key] = mask + if not keep_exact_sparsity_ratio: + break + new_not_exceed_layers = [key for key in new_scores.keys() if not self.keep_mask_layers.get(key, False)] + if not_exceed_layers == new_not_exceed_layers or len(new_not_exceed_layers) == 0: + break + not_exceed_layers = new_not_exceed_layers + global_scores = torch.cat([torch.flatten(new_scores[key]) for key in not_exceed_layers]) + + for key in masks.keys(): + if key in self.invalid_layers: + continue + if len(scores[key].shape) == 4: ## need to permute + mask = masks[key] + orig_shape = scores[key].shape + mask = self._reshape_2dims_to_orig(mask, orig_shape) + masks[key] = mask + layer_ratio = torch.sum(masks[key] == 0.0).data.item() / masks[key].numel() + logger.info(f'layer {key} sparsity_ratio is {layer_ratio}') + return masks + + def get_pattern_lock_masks(self, modules): + """Obtain masks from original weight map, by masking where weights' are zero. + + Args: + modules: A dict{“layer_name”: Tensor}. Store weights. + + Returns: + A dict with the identical size as modules, containing pattern lock masks. + """ + pattern_lock_masks = {} + for key in modules.keys(): + weight = modules[key].weight + ori_shape = weight.shape + if key in self.invalid_layers: + mask = torch.ones(weight.shape, device=weight.device) + pattern_lock_masks[mask] = mask + continue + reduced_mask = self.get_reduced_masks_from_data(weight, key) + mask = self.reshape_reduced_to_orig(reduced_mask, key, ori_shape) + pattern_lock_masks[key] = mask + return pattern_lock_masks + + # ---------------progressive related-------------------- + def count_new_masked_cnts(self, new_added_masks): + """Cound the number of elements to be masked.""" + # count how many elements are to masked, + new_masked_cnts = 0 + for key in new_added_masks.keys(): + new_masked_cnts += torch.nonzero(1 - new_added_masks[key]).size()[0] + return new_masked_cnts + + def update_new_added_masks(self, pre_masks, cur_masks): + """Obtain the new set-to-zero mask during a pruning procedure. + + Pre_masks, cur_masks should have identical keys bacause they stands for one model. + """ + # obtain the new set-to-zero mask during a pruning procedure. + # pre_masks, cur_masks should have identical keys bacause they stands for one model. + new_added_masks = {} + for key in pre_masks.keys(): + pre_mask = pre_masks[key] + cur_mask = cur_masks[key] + zero = torch.tensor([0.]).to(pre_mask.device) + one = torch.tensor([1.]).to(cur_mask.device) + new_added_masks[key] = torch.where(pre_mask == cur_mask, one, zero) + return new_added_masks + + def update_progressive_masks(self, pre_masks, cur_masks, scores, progressive_step, progressive_configs): + """Generate the progressive masks.""" + # Generate the progressive masks + use_global = progressive_configs["use_global"] + if use_global: + return self.update_progressive_masks_global(pre_masks, cur_masks, scores, \ + progressive_step, progressive_configs) + else: + return self.update_progressive_masks_local(pre_masks, cur_masks, scores, \ + progressive_step, progressive_configs) + + def update_progressive_masks_linear(self, pre_masks, cur_masks, progressive_step, progressive_configs): + """Generate the progressive masks along the block's larger dimension.""" + progressive_steps = progressive_configs["progressive_steps"] + progressive_masks = {} + new_added_masks = self.update_new_added_masks(pre_masks, cur_masks) + for key in pre_masks.keys(): + block_size = self.block_size[key] + new_added_mask = new_added_masks[key] + # conv + new_added_mask = self._reshape_orig_to_2dims(new_added_mask) + shape = new_added_mask.shape + # progressive masks are generated in the direction of block's large dim. + if block_size[0] >= block_size[1]: + # NxM (N>=M), output channel pruning + new_shape = [shape[0] // block_size[0], progressive_steps, block_size[0] // progressive_steps, + shape[1] // block_size[1], block_size[1]] + new_added_mask_reshape = new_added_mask.reshape(new_shape) + new_added_mask_reshape[:, progressive_step:, :, :, :] = 1.0 + else: + # NxM (N N + return reduced_mask + + def get_least_ninm_mask_from_data(self, score): + """Generate the least N scores in M.""" + current_score = score + M = self.M + N = self.N + current_score = self._reshape_orig_to_2dims(current_score) + shape = current_score.shape + new_shape = [shape[0], shape[1] // M, M] + current_score_new = current_score.reshape(new_shape) + + threshold, _ = torch.kthvalue(current_score_new, N, dim=2) + threshold = threshold.unsqueeze(-1) + + threshold = threshold.expand(shape[0], shape[1] // M, M) + threshold = threshold.reshape((shape[0], shape[1])) + + one = torch.tensor([1.]).to(current_score.device) + zero = torch.tensor([0.]).to(current_score.device) + mask = torch.where(current_score <= threshold, zero, one) + return mask + + def get_sparsity_ratio(self, pre_masks, return_dict=False): + """Please noted that the zero cnt and total cnt are all block_wise for supporting channel-wise pruning. + + The return sparsity ratio is elementwised(confused, TODO). + + Args: + pre_masks: + return_dict: + + Returns: + An elementwise sparisty ratio. + """ + ##simply use elemwise sparsity + zero_cnt = 0 + total_cnt = 0 + for key in pre_masks.keys(): + if key in self.invalid_layers: + # total_cnt += pre_masks[key].numel() // self.M + continue + reduced_mask = self.get_reduced_masks_from_data(pre_masks[key], key) + zero_cnt += int((torch.sum(reduced_mask == 0)).data.item()) + total_cnt += int(reduced_mask.numel()) + sparsity_ratio = float(zero_cnt) / total_cnt * self.N / self.M + + if return_dict: + return {"sparsity_ratio": sparsity_ratio, "zero_cnt": zero_cnt, + "total_cnt": total_cnt} + else: + return sparsity_ratio + + def _reshape_orig_to_2dims(self, data): + if len(data.shape) == 4: ##TODO need to verify whether it's ok for transposed conv + data = data.permute(0, 2, 3, 1) ##cout,k,k,cin + data = data.reshape(data.shape[0], -1) + return data + + def _reshape_2dims_to_orig(self, data, orig_shape): + if len(orig_shape) == 4: + data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]) + data = data.permute(0, 3, 1, 2) + return data + + def reshape_orig_to_pattern(self, data, key): + """Reshape the data based on the pruning pattern.""" + data = self._reshape_orig_to_2dims(data) + shape = data.shape + new_shape = [shape[0], shape[1] // self.M, self.M] + data = data.reshape(new_shape) + return data + + def reshape_reduced_to_orig(self, data, key, orig_shape): + """Reshape the reduced data to its original shape.""" + data = data.repeat_interleave(self.M, dim=-1) + return self._reshape_2dims_to_orig(data, orig_shape) + + def reduce_scores(self, scores): + """Calculate the pruning scores after reducing the data and obtain the least N scores in M.""" + ##to get the least N scores in M + M = self.M + N = self.N + least_ninm_masks = {} + new_scores = {} + for key in scores.keys(): + if key in self.invalid_layers: + continue + if self.keep_mask_layers.get(key, False): + continue + current_score = scores[key] + mask = self.get_least_ninm_mask_from_data(current_score) + current_score_new = self._reshape_orig_to_2dims(current_score) + shape = current_score_new.shape + current_score_new = current_score_new.reshape((shape[0], shape[1])) + ##to get the sum of N scores in each block with M + current_score_new = current_score_new * (1.0 - mask) + current_score_new = current_score_new.reshape(shape[0], shape[1] // M, M) + score_sum = self.reduce_tensor(current_score_new, dim=-1) + least_ninm_masks[key] = mask + new_scores[key] = score_sum + return new_scores, least_ninm_masks + + def get_ele_mask_per_threshold(self, score, threshold, block_size, least_ninm_mask): + """Get the elementwise mask per threshold. + + Args: + score: + threshold: + block_size: + least_m_in_m_masks: + + Returns: + mask: + """ + zero = torch.tensor([0.]).to(score.device) + one = torch.tensor([1.]).to(score.device) + mask = torch.where(score <= threshold, zero, one) + mask = mask.repeat_interleave(block_size[1], dim=-1) + ## both zero will be zero + mask = (mask + least_ninm_mask) + mask = torch.where(mask <= 0, zero, one) + return mask + + def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, + keep_exact_sparsity_ratio=True): + """Generate masks for layers. + + Gather all layer's scores together and calculate a common threshold. + This threshold will be applied for all layers. + + Args: + scores: A dict{“layer_name”: Tensor}. Store the pruning scores of weights. + target_sparsity_ratio: A float. After pruning, the model's sparsity will reach this value. + pre_masks: A dict{"layer_name": Tensor}. The masks generated after the last pruning step. + max_sparsity_ratio_per_op: A float. The maximum sparsity that one layer can reach. + + Returns: + A dict with the identical size as pre_masks. Update the 0/1 values in it. + """ + masks = pre_masks + + block_sparsity_ratio = cur_target_sparsity_ratio * self.M / self.N + k_blockwise = self.update_residual_cnt(pre_masks, block_sparsity_ratio) + if k_blockwise <= 0: + return masks + new_scores, least_ninm_masks = self.reduce_scores(scores) + global_scores = torch.cat([torch.flatten(v) for v in new_scores.values()]) ##block_wise + residual_k = k_blockwise + not_exceed_layers = [key for key in new_scores.keys()] + + while True: + threshold, _ = torch.kthvalue(global_scores, residual_k) + for key in not_exceed_layers: + score = new_scores[key] + mask = self.get_ele_mask_per_threshold(score, threshold, (self.N, self.M), least_ninm_masks[key]) + info = self.get_sparsity_ratio({key: mask}, return_dict=True) + zero_cnt = info["zero_cnt"] + total_cnt = info["total_cnt"] + current_sparsity_ratio = float(zero_cnt) / total_cnt + key_new_sparsity = SparsityInfo(zero_cnt, total_cnt, current_sparsity_ratio) + need_adjust, adjust_ratio = self.adjust_ratio(masks, key, key_new_sparsity, + self.max_sparsity_ratio_per_op * self.M / self.N, + self.min_sparsity_ratio_per_op * self.M / self.N, + self.target_sparsity_ratio * self.M / self.N) + + if need_adjust: + self.keep_mask_layers[key] = True + masks[key] = self.get_single_mask_per_target_ratio(new_scores[key], adjust_ratio) + masks[key] = masks[key].repeat_interleave(self.M, dim=-1) + ## both zero will be zero + masks[key] = (masks[key] + least_ninm_masks[key]) + zero = torch.tensor([0.]).to(score.device) + one = torch.tensor([1.]).to(score.device) + masks[key] = torch.where(masks[key] <= 0, zero, one) + if keep_exact_sparsity_ratio: + zero_cnt = self.get_sparsity_ratio({key: masks[key]}, return_dict=True)["zero_cnt"] + residual_k -= zero_cnt + else: + masks[key] = mask + if not keep_exact_sparsity_ratio: + break + new_not_exceed_layers = [key for key in new_scores.keys() if not self.keep_mask_layers.get(key, False)] + if not_exceed_layers == new_not_exceed_layers or len(new_not_exceed_layers) == 0: + break + not_exceed_layers = new_not_exceed_layers + global_scores = torch.cat([torch.flatten(new_scores[key]) for key in not_exceed_layers]) + + for key in masks.keys(): + if key in self.invalid_layers: + continue + if len(scores[key].shape) == 4: ## need to permute + mask = masks[key] + orig_shape = scores[key].shape + mask = self._reshape_2dims_to_orig(mask, orig_shape) + masks[key] = mask + layer_ratio = torch.sum(masks[key] == 0.0).data.item() / masks[key].numel() + logger.info(f'layer {key} sparsity_ratio is {layer_ratio}') + return masks + + def get_pattern_lock_masks(self, modules): + """Obtain masks from original weight map, by masking where weights' are zero. + + Args: + modules: A dict{“layer_name”: Tensor}. Store weights. + + Returns: + A dict with the identical size as modules, containing pattern lock masks. + """ + pattern_lock_masks = {} + for key in modules.keys(): + weight = modules[key].weight + orig_shape = weight.shape + if key in self.invalid_layers: + mask = torch.ones(orig_shape, device=weight.device) + pattern_lock_masks[key] = mask + continue + mask = self.get_least_ninm_mask_from_data(weight) + mask = self._reshape_2dims_to_orig(mask, orig_shape) + pattern_lock_masks[key] = mask + return pattern_lock_masks diff --git a/neural_compressor/pruners/__init__.py b/neural_compressor/pruner/pruner_legacy/__init__.py similarity index 100% rename from neural_compressor/pruners/__init__.py rename to neural_compressor/pruner/pruner_legacy/__init__.py diff --git a/neural_compressor/pruners/gradient_sensitivity.py b/neural_compressor/pruner/pruner_legacy/gradient_sensitivity.py similarity index 99% rename from neural_compressor/pruners/gradient_sensitivity.py rename to neural_compressor/pruner/pruner_legacy/gradient_sensitivity.py index e6ae10e0ee6..46683c14e23 100644 --- a/neural_compressor/pruners/gradient_sensitivity.py +++ b/neural_compressor/pruner/pruner_legacy/gradient_sensitivity.py @@ -18,7 +18,7 @@ import numpy as np from .pruner import pruner_registry, Pruner from heapq import heappush, heappop -from ..utils import logger +from neural_compressor.utils import logger import re @pruner_registry diff --git a/neural_compressor/pruners/group_lasso.py b/neural_compressor/pruner/pruner_legacy/group_lasso.py similarity index 98% rename from neural_compressor/pruners/group_lasso.py rename to neural_compressor/pruner/pruner_legacy/group_lasso.py index fc659bdafa1..045fa18d07d 100644 --- a/neural_compressor/pruners/group_lasso.py +++ b/neural_compressor/pruner/pruner_legacy/group_lasso.py @@ -20,7 +20,7 @@ import numpy as np from .pruner import pruner_registry, Pruner from .magnitude import BasicMagnitudePruner -from ..utils import logger +from neural_compressor.utils import logger @pruner_registry class GroupLassoPruner(BasicMagnitudePruner): diff --git a/neural_compressor/pruners/magnitude.py b/neural_compressor/pruner/pruner_legacy/magnitude.py similarity index 98% rename from neural_compressor/pruners/magnitude.py rename to neural_compressor/pruner/pruner_legacy/magnitude.py index 752e1cf2268..9544d9474b2 100644 --- a/neural_compressor/pruners/magnitude.py +++ b/neural_compressor/pruner/pruner_legacy/magnitude.py @@ -17,7 +17,7 @@ import numpy as np from .pruner import pruner_registry, Pruner -from ..utils import logger +from neural_compressor.utils import logger @pruner_registry class BasicMagnitudePruner(Pruner): diff --git a/neural_compressor/pruners/pattern_lock.py b/neural_compressor/pruner/pruner_legacy/pattern_lock.py similarity index 100% rename from neural_compressor/pruners/pattern_lock.py rename to neural_compressor/pruner/pruner_legacy/pattern_lock.py diff --git a/neural_compressor/pruners/pruner.py b/neural_compressor/pruner/pruner_legacy/pruner.py similarity index 98% rename from neural_compressor/pruners/pruner.py rename to neural_compressor/pruner/pruner_legacy/pruner.py index 64d2e44cdda..6384235af30 100644 --- a/neural_compressor/pruners/pruner.py +++ b/neural_compressor/pruner/pruner_legacy/pruner.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..experimental.pruning_recipes.patterns import patterns +from neural_compressor.experimental.pruning_recipes.patterns import patterns PRUNERS = {} diff --git a/neural_compressor/pruners/util/block_mask.py b/neural_compressor/pruner/pruner_legacy/util/block_mask.py similarity index 100% rename from neural_compressor/pruners/util/block_mask.py rename to neural_compressor/pruner/pruner_legacy/util/block_mask.py diff --git a/neural_compressor/pruner/pruners.py b/neural_compressor/pruner/pruners.py new file mode 100644 index 00000000000..c9a7cf436ae --- /dev/null +++ b/neural_compressor/pruner/pruners.py @@ -0,0 +1,565 @@ +"""Pruner.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +from neural_compressor.utils.utility import LazyImport +torch = LazyImport('torch') +from .patterns import get_pattern +from .schedulers import get_scheduler +from .criteria import get_criterion, CRITERIAS +from .regs import get_reg +from .logger import logger + +PRUNERS = {} + + +def register_pruner(name): + """Class decorator to register a Pruner subclass to the registry. + + Decorator function used before a Pattern subclass. + Make sure that the Pruner class decorated by this function can be registered in PRUNERS. + + Args: + cls (class): The subclass of register. + name: A string. Define the pruner type. + + Returns: + cls: The class of register. + """ + + def register(pruner): + PRUNERS[name] = pruner + return pruner + + return register + + +def get_pruner(config, modules): + """Get registered pruner class. + + Get a Pruner object from PRUNERS. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + + Returns: + A Pruner object. + + Raises: AssertionError: Cuurently only support pruners which have been registered in PRUNERS. + """ + ## do the ugly work here + if "progressive" not in config["pruning_type"]: + name = config["pruning_type"] + config["progressive"] = False + else: + # if progressive, delete "progressive" words and reset config["progressive"] + name = config["pruning_type"][0:-12] + config["progressive"] = True + if name in CRITERIAS: + if config["progressive"] == False: + config['criterion_type'] = name + name = "basic" ##return the basic pruner + else: + config['criterion_type'] = name + name = "progressive" ## return the progressive pruner + + if name not in PRUNERS.keys(): + assert False, f"does not support {name}, currently only support {PRUNERS.keys()}" + return PRUNERS[name](config, modules) + + +class BasePruner: + """Pruning Pruner. + + The class which executes pruning process. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + + Attributes: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + masks: A dict {"module_name": Tensor}. Store the masks for modules' weights. + scores: A dict {"module_name": Tensor}. Store the score for modules' weights, + which are used to decide pruning parts with a criterion. + pattern: A Pattern object. Defined in ./patterns.py + scheduler: A scheduler object. Defined in ./scheduler.py + current_sparsity_ratio: A float. Current model's sparsity ratio, initialized as zero. + global_step: An integer. The total steps the model has run. + start_step: An integer. When to trigger pruning process. + end_step: An integer. When to end pruning process. + pruning_frequency: An integer. The pruning frequency, which's valid when iterative + pruning is enabled. + target_sparsity_ratio: A float. The final sparsity after pruning. + max_sparsity_ratio_per_op: A float. Sparsity ratio maximum for every module. + """ + + def __init__(self, config, modules): + """Initialize.""" + self.modules = modules + self.config = config + self.masks = {} + self.global_step = 0 + self.handled_global_step = -1 + self.start_step = self.config['start_step'] + self.end_step = self.config['end_step'] + self.pruning_frequency = self.config['pruning_frequency'] + ##this is different with original code + self.total_prune_cnt = (self.end_step - self.start_step + 1) \ + // self.pruning_frequency + self.completed_pruned_cnt = 0 + for key in self.modules.keys(): + module = self.modules[key] + self.masks[key] = torch.ones(module.weight.shape).to(module.weight.device) ##TODO support bias or others + + self.target_sparsity_ratio = self.config['target_sparsity'] + self.current_sparsity_ratio = 0.0 + self.init_sparsity_ratio = 0.0 + self._init() + + def _init(self): + """Auxiliary function for initializing.""" + pass + + def on_epoch_begin(self, epoch): + """Implement at the beginning of each epoch.""" + pass + + def mask_weights(self): + """Apply masks to corresponding modules' weights. + + Weights are multipled with masks. This is the formal pruning process. + """ + with torch.no_grad(): + for key in self.modules.keys(): + module = self.modules[key] + module.weight.data = module.weight.data * self.masks[key] + + def mask_weights_general(self, input_masks): + """Apply input masks to corresponding modules' weights. + + Weights are multipled with input_masks. + + Args: + input_masks: A dict {"module_name": Tensor}. Store the masks for modules' weights. + """ + with torch.no_grad(): + for key in self.modules.keys(): + module = self.modules[key] + module.weight.data = module.weight.data * input_masks[key] + + def on_step_begin(self, local_step): + """Implement at the start of each step.""" + if self.handled_global_step == self.global_step: + return + self.update_masks(local_step) + self.handled_global_step = self.global_step + + def update_masks(self, local_step): + """Update the masks at a given local step.""" + pass + + def on_epoch_end(self): + """Implement at the end of each epoch.""" + pass + + def on_step_end(self): + """Implement at the end of each step.""" + pass + + def on_before_optimizer_step(self): + """Implement before optimizer.step().""" + pass + + def on_after_optimizer_step(self): + """Implement after optimizer.step(). + + Prune the model after optimization. + """ + self.mask_weights() + self.global_step += 1 + + def on_train_begin(self): + """Implement at the beginning of training phase.""" + pass + + def on_train_end(self): + """Implement at the end of training phase.""" + pass + + def on_before_eval(self): + """Implement at the beginning of evaluation phase.""" + pass + + def on_after_eval(self): + """Implement at the end of evaluation phase.""" + pass + + def check_is_pruned_step(self, step): + """Check if a pruning process should be performed at the current step. + + Args: + step: an integer representing the number of current step. + + Returns: + A Boolean. + """ + if step < self.start_step or step > self.end_step: + return False + if int(step - self.start_step) % self.pruning_frequency == 0: + return True + return False + + +@register_pruner("basic") +class BasicPruner(BasePruner): + """Pruning Pruner. + + The class which executes pruning process. + 1. Defines pruning functions called at step begin/end, epoch begin/end. + 2. Defines the pruning criterion. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + + Attributes: + pattern: A Pattern object. Define pruning weights' arrangements within space. + criterion: A Criterion Object. Define which weights are to be pruned + scheduler: A Scheduler object. Define model's sparsity changing method as training/pruning executes. + reg: A Reg object. Define regulization terms. + """ + + def __init__(self, config, modules): + """Initialize.""" + # self.modules = modules + # self.config = config + # self.masks = {} + super(BasicPruner, self).__init__(config, modules) + + def _init(self): + """Auxiliary function for initializing.""" + self.pattern = get_pattern(self.config, self.modules) + self.scheduler = get_scheduler(self.config) + self.criterion = get_criterion(self.config, self.modules) + self.reg = get_reg(self.config, self.modules, self.pattern) + # if switch off progressive but use per-channel pruning, give a warn + if "channel" in self.pattern.pattern: + logger.info("UserWarning: use per-channel pruning pattern without progressive pruning!") + logger.info("Instead, enabling progressive pruning would be a better choice.") + else: + pass + + def set_global_step(self, global_step): + """Set global step number.""" + self.global_step = global_step + + # def on_step_begin(self, local_step): + # """Implement at the start of each step. + # + # Update the masks at a given local_step. + # """ + # self.update_masks(local_step) + + def update_masks(self, local_step): + """Update the masks at a given local step.""" + if self.global_step == self.start_step: + if self.config['lock_init_sparsity']: + self.masks = self.pattern.get_pattern_lock_masks(self.modules) + self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) + self.current_sparsity_ratio = self.init_sparsity_ratio + + if not self.check_is_pruned_step(self.global_step): + return + + if self.current_sparsity_ratio > self.target_sparsity_ratio: + return + + self.criterion.on_step_begin() + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, self.masks, + self.init_sparsity_ratio) + logger.info(f"current target ratio is {current_target_sparsity_ratio}") + + self.completed_pruned_cnt += 1 + if self.criterion.scores == {}: + return + self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks) + self.mask_weights() + + self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) + logger.info(f"current sparsity ratio is {self.current_sparsity_ratio}") + + def on_before_optimizer_step(self): + """Implement before optimizer.step().""" + self.reg.on_before_optimizer_step() + + def on_after_optimizer_step(self): + """Prune the model after optimization.""" + ##the order of the following three lines can't not be exchanged + self.reg.on_after_optimizer_step() + self.mask_weights() + self.criterion.on_after_optimizer_step() + self.global_step += 1 + + +@register_pruner('pattern_lock') +class PatternLockPruner(BasePruner): + """Pruning Pruner. + + A Pruner class derived from BasePruner. + In this pruner, original model's sparsity pattern will be fixed while training. + This pruner is useful when you want to train a sparse model without change its original structure. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + + Attributes: + Inherit from parent class Pruner. + """ + + def __init__(self, config, modules): + """Initialize.""" + super(PatternLockPruner, self).__init__(config, modules) + self.pattern = get_pattern(self.config, modules) + assert self.config.end_step == self.config.start_step, "pattern_lock pruner only supports one shot mode" + + def update_masks(self, local_step): + """Update the masks at a given local step.""" + if not self.check_is_pruned_step(self.global_step): + return + self.masks = self.pattern.get_pattern_lock_masks(self.modules) + + def on_after_optimizer_step(self): + """Implement after optimizer.step(). + + Prune the model after optimization. + """ + self.mask_weights() + self.global_step += 1 + + +@register_pruner('progressive') +class ProgressivePruner(BasicPruner): + """Pruning Pruner. + + A Pruner class derived from BasePruner. In this pruner, mask interpolation will be applied. + Mask interpolation is a fine-grained improvement for NxM structured pruning, + By adding interval masks between masks of two pruning steps + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object. Contains the pruner information. + + Attributes: + Inherit from parent class Pruner. + """ + + def __init__(self, config, modules): + """Initialize.""" + super(ProgressivePruner, self).__init__(config, modules) + + def _init(self): + """Auxiliary function for initialization.""" + self.pattern = get_pattern(self.config, self.modules) + self.scheduler = get_scheduler(self.config) + self.criterion = get_criterion(self.config, self.modules) + self.reg = get_reg(self.config, self.modules, self.pattern) + # progressive pruning set up, including check up paramters. + self.use_progressive = self.config["progressive"] + # progressive parameters + # dict passed to Pattern's functions + self.progressive_configs = { + "progressive_steps": 4, + "progressive_type": "scores", + "use_global": True + } + self.progressive_steps = self.progressive_configs["progressive_steps"] + self.progressive_type = self.progressive_configs["progressive_type"] + self.use_global = self.progressive_configs["use_global"] + self.progressive_logger = False + self._init_for_progressive() + + def _init_for_progressive(self): + """Auxiliary function for initializing progressive pruning.""" + # detailed progressive parameters will stored at patterns.py + # step 1: check if pattern is NxM + if "x" not in self.pattern.pattern: + raise NotImplementedError(f"Currently progressive only " \ + f"support NxM and per-channel pruning patterns.") + + # step 2: check if current set up will "degrade" into non-progressive + degrading_flag = False + if (self.end_step - self.start_step) <= self.progressive_steps or self.progressive_steps <= 1: + logger.info("Current progressive setting will degrading to non-progressive pruning.") + self.use_progressive = False + return + + # step 3: log hyper-parameters. and check validity. + if self.use_progressive: + logger.info(f"Progressive pruning is enabled!") + logger.info(f"Progressive pruning steps: {self.progressive_steps}") + logger.info(f"Progressive type: {self.progressive_type}") + logger.info(f"Progressive balance: {self.use_global}") + self.check_progressive_validity() + self.pre_masks = copy.deepcopy(self.masks) + self.progressive_masks = copy.deepcopy(self.masks) + if self.pruning_frequency < self.progressive_steps:##TODO trick + self.progressive_steps = self.pruning_frequency + # if self.progressive_steps == 3: + # self.progressive_steps = 2 + self.pruning_frequency_progressive = self.progressive_steps + else: + self.pruning_frequency_progressive = self.pruning_frequency // self.progressive_steps + # this is a structural pruning step, it fits self.pruning_frequency + self.structured_update_step = 0 + + def check_progressive_validity(self): + """Check if the settings of progressive pruning are valid.""" + # check some problematic settings + if self.progressive_type == "linear": + if self.use_global: + # when global progressive is applied, linear type is contradict. + raise NotImplementedError("Global progressive pruning do not support linear pattern") + # When linear, progressive_step should not meet a indivisible + for key in self.pattern.block_size.keys(): + block_size = self.pattern.block_size[key] + progressive_direction = max(block_size) + if progressive_direction % self.progressive_steps != 0: + raise ValueError( + f"In layer {key}, its pruning pattern is {block_size}, " \ + f"while progressive steps {self.progressive_steps} is indivisible.") + else: + for key in self.pattern.block_size.keys(): + block_size = self.pattern.block_size[key] + total_block_size = block_size[0] * block_size[1] + if total_block_size < self.progressive_steps: + raise ValueError( + f"In layer {key}, its pruning pattern is {block_size}, " \ + f"while progressive steps {self.progressive_steps} is overflowing.") + + def check_is_pruned_progressive_step(self, step): + """Check if a progressive pruning process should be performed at the current step. + + Args: + step: an integer representing the number of current step. + + Returns: + A Boolean. + """ + # used in progressive pruning + if step < self.start_step or step > self.end_step: + return False + if int(step - self.start_step) % self.pruning_frequency_progressive == 0: + return True + return False + + def update_masks_progressive(self, local_step): + """Update the masks in progressive pruning mode at a given local step.""" + if self.global_step == self.start_step: + if self.config['lock_init_sparsity']: + self.masks = self.pattern.get_pattern_lock_masks(self.modules) + self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) + self.current_sparsity_ratio = self.init_sparsity_ratio + + # case 1: step is not in [start_step, end_step] or it is not either pruning or progressive pruning step. + if (self.check_is_pruned_step(self.global_step) == False) and ( + self.check_is_pruned_progressive_step(self.global_step) == False): + return + if self.current_sparsity_ratio > self.target_sparsity_ratio: + return + + # case 2: step which does progressive update, but it is not a pruning step in case 3 + if self.check_is_pruned_progressive_step(self.global_step) \ + and self.check_is_pruned_step(self.global_step) == False: + # do not do global pruning, only do the progressive mask update. + step_offset = self.global_step - self.structured_update_step + progressive_idx = step_offset // self.pruning_frequency_progressive + if progressive_idx < (self.progressive_steps - 1): + self.progressive_masks = self.pattern.update_progressive_masks(self.pre_masks, self.masks, \ + self.criterion.scores, \ + progressive_idx + 1, \ + self.progressive_configs) + else: + # in the end, directly use new masks. + for n in self.masks.keys(): + self.progressive_masks[n] = self.masks[n].clone() + self.mask_weights_general(self.progressive_masks) + if self.progressive_logger: + self.print_progressive_sparsity() + return + + # case 3: a pruning step, generate new masks, progressive masks also update. + tmp_step = self.global_step + self.structured_update_step = tmp_step + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, self.masks) + logger.info(f"current target ratio is {current_target_sparsity_ratio}") + self.criterion.on_step_begin() + self.completed_pruned_cnt += 1 + if self.criterion.scores == {}: + return + for n in self.masks.keys(): + self.pre_masks[n] = self.masks[n].clone() + # update new masks + self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks, ) + self.progressive_masks = self.pattern.update_progressive_masks(self.pre_masks, self.masks, \ + self.criterion.scores, 1, \ + self.progressive_configs) + self.mask_weights_general(self.progressive_masks) + if self.progressive_logger: + self.print_progressive_sparsity() + return + + def on_step_begin(self, local_step): + """Update the masks at a given local_step.""" + """Implement at the start of each step.""" + if self.handled_global_step == self.global_step: + return + + if not self.use_progressive: + # As _init_for_progressive() works, when degrades to non-progressive + # just call BasicPruner's update_masks(). + self.update_masks(local_step) + else: + self.update_masks_progressive(local_step) + self.handled_global_step = self.global_step + + def on_before_optimizer_step(self): + """Implement before optimizer.step().""" + self.reg.on_before_optimizer_step() + + def on_after_optimizer_step(self): + """Prune the model after optimization.""" + ##the order of the following three lines can't not be exchanged + self.reg.on_after_optimizer_step() + if not self.use_progressive: + self.mask_weights() + else: + self.mask_weights_general(self.progressive_masks) + self.criterion.on_after_optimizer_step() + self.global_step += 1 + + def print_progressive_sparsity(self): + """Output the progressive sparsity.""" + cur_sp = self.pattern.get_sparsity_ratio_progressive(self.progressive_masks) + logger.info("Step: {} -> Current progressive sparsity: {}".format(self.global_step, cur_sp)) diff --git a/neural_compressor/pruner/regs.py b/neural_compressor/pruner/regs.py new file mode 100644 index 00000000000..8ce97e4c87e --- /dev/null +++ b/neural_compressor/pruner/regs.py @@ -0,0 +1,128 @@ +"""Regularizer.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .patterns import BasePattern +from neural_compressor.utils.utility import LazyImport +torch = LazyImport('torch') + +REGS = {} + + +def register_reg(name): + """Register a regularizator to the registry.""" + + def register(reg): + REGS[name] = reg + return reg + + return register + + +def get_reg_type(config): + """Obtain the regularizer type.""" + for key in REGS.keys(): ##assume there is only one reg + if config.get(key, None) != None: + return key + return None + + +def get_reg(config, modules, pattern): + """Get registered regularizator class.""" + reg_type = config["reg_type"] + if reg_type == None: + return BaseReg(config, modules, pattern) + if reg_type not in REGS.keys(): + assert False, f"regularizator does not support {reg_type}, currently only support {REGS.keys()}" + return REGS[reg_type](config, modules, pattern, config["reg_coeff"]) + + +class BaseReg: + """Regularizer. + + The class which performs regularization. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object that includes information of the regularizer. + pattern: A config dict object. The pattern related part in args config. + """ + + def __init__(self, config: dict, modules: dict, pattern: BasePattern): + """Initialize.""" + self.modules = modules + self.config = config + self.pattern = pattern + + def on_before_optimizer_step(self): + """Implement before optimizer.step().""" + pass + + def on_after_optimizer_step(self): + """Implement after optimizer.step().""" + pass + + +@register_reg("group_lasso") +class GroupLasso(BaseReg): + """Regularizer. + + A regularizer class derived from BaseReg. In this class, the Group-lasso regularization will be performed. + Group-lasso is a variable-selection and regularization method. + + Args: + modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. + config: A config dict object that includes information of the regularizer. + pattern: A config dict object. The pattern related part in args config. + + Attributes: + reg_terms: A dict {"module_name": Tensor} of regularization terms. + alpha: A float representing the coeffient related to group lasso. + """ + + def __init__(self, config: dict, modules: dict, pattern: BasePattern, coeff): + """Initialize.""" + super(GroupLasso, self).__init__(config, modules, pattern) + assert "x" in self.config.pattern, "group lasso only supports NXM pattern" + self.reg_terms = {} + self.alpha = float(coeff) + assert self.alpha >= 0, "group lasso only supports positive coeff" + + def on_before_optimizer_step(self): + """Calculate the group-lasso score map.""" + with torch.no_grad(): + if self.pattern.invalid_layers == None: + self.pattern.check_layer_validity() + for key in self.modules.keys(): + if key in self.pattern.invalid_layers: + continue + grad = self.modules[key].weight.grad + reg_term = self.pattern.reshape_orig_to_pattern(grad, key) + reg_term = self.alpha / (torch.norm(reg_term, p=2, dim=[1, 3]) + 1e-12) + reg_term[torch.isinf(reg_term)] = 0.0 + self.reg_terms[key] = reg_term + + def on_after_optimizer_step(self): ##decoupled with grad descent + """Perform group lasso regularization after optimization.""" + with torch.no_grad(): + for key in self.modules.keys(): + if key in self.pattern.invalid_layers: + continue + reg_term = self.pattern.reshape_reduced_to_orig(self.reg_terms[key], key, + self.modules[key].weight.shape) + self.modules[key].weight -= reg_term + diff --git a/neural_compressor/pruner/schedulers.py b/neural_compressor/pruner/schedulers.py new file mode 100644 index 00000000000..78e985da05f --- /dev/null +++ b/neural_compressor/pruner/schedulers.py @@ -0,0 +1,177 @@ +"""scheduler module.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +SCHEDULERS = {} + + +def register_scheduler(name): + """Class decorator used to register a Scheduler subclass to the registry. + + Decorator function used before a Scheduler subclass. + Make sure that the Scheduler class decorated by this function can be registered in SCHEDULERS. + + Args: + cls (class): The class of register. + name: A string. Define the scheduler type. + + Returns: + cls: The class of register. + """ + + def register(scheduler): + SCHEDULERS[name] = scheduler + return scheduler + + return register + + +def get_scheduler(config): + """Get registered scheduler class. + + Get a scheduler object from SCHEDULERS. + + Args: + config: A config dict object. Contains the scheduler information. + + Returns: + A Scheduler object. + """ + name = "iterative" + if config.start_step == config.end_step: + name = "oneshot" + return SCHEDULERS[name](config) + + +class PruningScheduler: + """Pruning Scheduler. + + The class which defines a sparsity changing process during pruning. + Mainly contains two types: + 1. iterative scheduler. Prune the model from dense to target sparsity gradually. + 2. one-shot scheduler. Prune the model in a single step and reach the target sparsity. + + Args: + config: A config dict object. Contains the scheduler information. + + Attributes: + config: A config dict object. Contains the scheduler information. + """ + + def __init__(self, config): + """Initialize.""" + self.config = config + + def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_steps, masks, init_ratio=0.0): + """To be implemented in subclasses.""" + raise NotImplementedError + + +@register_scheduler('oneshot') +class OneshotScheduler(PruningScheduler): + """Pruning Scheduler. + + A Scheduler class derived from Scheduler. + Prune the model to target sparsity once. + + Args: + config: A config dict object. Contains the scheduler information. + + Attributes: + Inherit from parent class Scheduler. + """ + + def __init__(self, config): + """Initialize.""" + super(OneshotScheduler, self).__init__(config) + + def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_steps, masks, init_ratio=0.0): + """Update sparsity ratio. + + Args: + target_ratio: A float representing the sparsity ratio after pruning. + current_prune_step: An integer representing the current pruning step. + total_prune_steps: An integer representing the total number of steps of the pruning process. + masks: A dict {"module_name": Tensor} that stores the masks for modules' weights. + init_ratio: A float representing the sparsity ratio before pruning. + + Return: + A float representing the sparsity ratio that the model will reach after the next pruning step. + """ + return target_ratio + + +@register_scheduler('iterative') +class IterativeScheduler(PruningScheduler): + """Pruning Scheduler. + + A Scheduler class derived from Scheduler. + Prune the model from dense to target sparsity in several steps. + + Args: + config: A config dict object. Contains the scheduler information. + + Attributes: + Inherit from parent class Scheduler. + """ + + def __init__(self, config): + """Initialize.""" + super(IterativeScheduler, self).__init__(config) + + def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_steps, masks, + init_sparsity_ratio=0.0): + """Obtain new target sparsity ratio according to the step. + + Args: + target_ratio: A float. The target sparsity ratio. + current_prune_step: A integer. The current pruning step. + total_prune_steps: A integer. The total steps included in the pruning progress. + masks: A dict{"module_name": Tensor}. The masks for modules' weights. + init_sparsity_ratio: + + Returns: + A float representing the target sparsity ratio the model will reach after the next pruning step. + """ + aggressive_ratio = target_ratio + aggressive_ratio = min(self.config.max_sparsity_ratio_per_op, + aggressive_ratio) ##legacy issue + + decay_type = self.config.sparsity_decay_type + if decay_type == "cos": + current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * ( + 1.0 - math.cos(float(current_prune_step) / total_prune_steps * (math.pi / 2))) + init_sparsity_ratio + elif decay_type == "exp": + target_dense_change_ratio = ((1.0 - aggressive_ratio) / (1.0 - init_sparsity_ratio)) ** ( + 1 / total_prune_steps) + current_target_sparsity = 1.0 - ( + 1.0 - init_sparsity_ratio) * target_dense_change_ratio ** current_prune_step + + elif decay_type == "linear": + current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * float( + current_prune_step) / total_prune_steps + init_sparsity_ratio + + elif decay_type == "cube": + current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * ( + (float(current_prune_step) / total_prune_steps) ** 3) + init_sparsity_ratio + else: + assert False, "{} is not supported".format(decay_type) + + current_target_sparsity = min(target_ratio, current_target_sparsity) + return current_target_sparsity diff --git a/neural_compressor/pruner/utils.py b/neural_compressor/pruner/utils.py new file mode 100644 index 00000000000..5598167dee5 --- /dev/null +++ b/neural_compressor/pruner/utils.py @@ -0,0 +1,247 @@ +"""prune utils.""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import yaml + +try: + from neural_compressor.conf.dotdict import DotDict +except: + from .dot_dict import DotDict ##TODO +from .logger import logger + + +class WeightPruningConfig: + """ + similiar to torch optimizer's interface + """ + + def __init__(self, pruning_configs=[{}], ##empty dict will use global values + target_sparsity=0.9, pruning_type="snip_momentum", pattern="4x1", op_names=[], + excluded_op_names=[], + start_step=0, end_step=0, pruning_scope="global", pruning_frequency=1, + min_sparsity_ratio_per_op=0.0, max_sparsity_ratio_per_op=0.98, + sparsity_decay_type="exp", pruning_op_types=['Conv', 'Linear'], + **kwargs): + self.pruning_configs = pruning_configs + self._weight_compression = DotDict({ + 'target_sparsity': target_sparsity, + 'pruning_type': pruning_type, + 'pattern': pattern, + 'op_names': op_names, + 'excluded_op_names': excluded_op_names, ##global only + 'start_step': start_step, + 'end_step': end_step, + 'pruning_scope': pruning_scope, + 'pruning_frequency': pruning_frequency, + 'min_sparsity_ratio_per_op': min_sparsity_ratio_per_op, + 'max_sparsity_ratio_per_op': max_sparsity_ratio_per_op, + 'sparsity_decay_type': sparsity_decay_type, + 'pruning_op_types': pruning_op_types, + ##reg_type=None, reduce_type="mean", parameters={"reg_coeff": 0.0} + ##'resume_from_pruned_checkpoint': resume_from_pruned_checkpoint ##resume_from_pruned_checkpoint + }) + self._weight_compression.update(kwargs) + + @property + def weight_compression(self): + return self._weight_compression + + @weight_compression.setter + def weight_compression(self, weight_compression): + self._weight_compression = weight_compression + + +def check_config(prune_config): + """Functions that check key-value is valid to run Pruning object. + + Args: + prune_config: A config dict object. Contains Pruning parameters and configurations. + + Returns: + None if everything is correct. + + Raises: + AssertionError. + """ + assert prune_config['start_step'] >= 0, "start_step should be greater than 0" + assert prune_config['end_step'] >= -1, "end_step should be greater than 0" + assert prune_config['end_step'] >= prune_config['start_step'], \ + "end_step should be greater than start_step" + assert prune_config['target_sparsity'] >= 0 and prune_config['target_sparsity'] < 1.0, \ + "begin_pruning_step should be in range [0,1)" + assert prune_config['pruning_frequency'] > 0, "pruning_frequency should be greater than 0" + assert prune_config['max_sparsity_ratio_per_op'] >= 0 and prune_config['max_sparsity_ratio_per_op'] < 1, \ + "pruning_frequency should be greater than 0" + assert prune_config['pruning_scope'] == "global" or prune_config['pruning_scope'] == "local", \ + "only support 'global' and 'local' prune domain" + try: + prune_config['resume_from_pruned_checkpoint'] = bool(prune_config['resume_from_pruned_checkpoint']) + except: + assert False, "resume_from_pruned_checkpoint should be bool value" + if "x" in prune_config["pattern"]: + pattern = prune_config["pattern"].split('_')[-1].split('x') + if pattern[0] == "channel" or pattern[1] == "channel": + pass + else: + try: + N = int(pattern[0]) + M = int(pattern[1]) + except: + assert False, "N or M can't convert to int" + assert N > 0, "N should be greater than 0" + assert M > 0, "M should be greater than 0" + if ":" in prune_config["pattern"]: + pattern = prune_config["pattern"].split('_')[-1].split(':') + try: + N = int(pattern[0]) + M = int(pattern[1]) + except: + assert False, "N or M can't convert to int" + assert N > 0, "N should be greater than 0" + assert M > N, "M should be greater than N" + max_ratio = float(N) / M + assert prune_config['target_sparsity'] <= max_ratio, \ + "in N:M pattern, the max sparsity is N/M={}".format(max_ratio) + prune_config['max_sparsity_ratio_per_op'] = min(max_ratio, prune_config['max_sparsity_ratio_per_op']) + if prune_config['reg_coeff'] != None: + prune_config['reg_coeff'] = float(prune_config['reg_coeff']) + assert prune_config['reg_coeff'] >= 0, "only support positive reg_type" + assert prune_config["min_sparsity_ratio_per_op"] >= 0 and prune_config["min_sparsity_ratio_per_op"] <= \ + prune_config['max_sparsity_ratio_per_op'], \ + "min_sparsity_ratio_per_op should in[0, max_sparsity_ratio_per_op]" + + +def reset_none_to_default(obj, key, default): + """Functions that add up undefined configurations. + + If some configurations are not defined in the configuration, set it to a default value. + + Args: + obj: A dict{key: value} + key: A string. Key in obj. + default: When the key is not in obj, Add key: default item in original obj. + + """ + if obj == None: + return None + if isinstance(obj, dict): + if (not key in obj.keys()) or obj[key] == None: + return default + else: + return obj[key] + else: + if not hasattr(obj, key) or getattr(obj, key) == None: + return default + else: + return getattr(obj, key) + + +def update_params(info): + if "parameters" in info.keys(): + params = info["parameters"] + for key in params: + info[key] = params[key] + + +def process_and_check_weight_config(val: WeightPruningConfig): + default_global_config = {'target_sparsity': 0.9, 'pruning_type': 'snip_momentum', 'pattern': '4x1', 'op_names': [], + 'excluded_op_names': [], + 'start_step': 0, 'end_step': 0, 'pruning_scope': 'global', 'pruning_frequency': 1, + 'min_sparsity_ratio_per_op': 0.0, 'max_sparsity_ratio_per_op': 0.98, + 'sparsity_decay_type': 'exp', + 'pruning_op_types': ['Conv', 'Linear'], + + } + default_local_config = {'resume_from_pruned_checkpoint': False, 'reg_type': None, + 'criterion_reduce_type': "mean", 'parameters': {"reg_coeff": 0.0}} + + params_default_config = {"reg_coeff": 0.0} + + default_config = {} + default_config.update(default_global_config) + default_config.update(default_local_config) + default_config.update(params_default_config) + + pruning_configs = val.pruning_configs + pruners_info = [] + global_info = val.weight_compression + if len(pruning_configs) == 0: ##only one + pruner_info = global_info + for key in default_config.keys(): + pruner_info[key] = reset_none_to_default(pruner_info, key, default_config[key]) + update_params(pruner_info) + check_config(pruner_info) + pruner_info = DotDict(pruner_info) + pruners_info.append(pruner_info) + + else: ##TODO need update, in this mode, we ingore the global op names + for pruner_info in pruning_configs: + for key in default_config.keys(): + pruner_info[key] = reset_none_to_default(pruner_info, key, global_info[key]) + pruner_info[key] = reset_none_to_default(pruner_info, key, default_config[key]) + update_params(pruner_info) + check_config(pruner_info) + pruner_info = DotDict(pruner_info) + pruners_info.append(pruner_info) + + return pruners_info + + +def process_config(config): + """Obtain a config dict object from a config file. + + Args: + config: A string. The path to configuration file. + + Returns: + A config dict object. + """ + if isinstance(config, WeightPruningConfig): + return process_and_check_weight_config(config) + else: + assert False, f"not supported type {config}" + + +def parse_to_prune(config, model): + """Keep target pruned layers.""" + modules = {} + if config["op_names"] == None or config["op_names"] == []: + config["op_names"] = [".*"] + for raw in config["op_names"]: + try: + pattern = re.compile(raw) + except: + assert False, f"regular expression match does not support {raw}" + for name, module in filter(lambda t: pattern.search(t[0]), model.named_modules()): + for layer_type in config["pruning_op_types"]: + if layer_type in type(module).__name__: + modules[name] = module + break + ##remove not to prune layers + """Drop non-pruned layers.""" + exclude_names = config["excluded_op_names"] + patterns = [re.compile(s) for s in exclude_names] + if len(patterns) <= 0: + return modules + new_modules = {} + for name in modules.keys(): + if any([p.search(name) for p in patterns]): + continue + new_modules[name] = modules[name] + return new_modules diff --git a/neural_compressor/pruning.py b/neural_compressor/pruning.py index 3205ffff99b..0094b0fcdf9 100644 --- a/neural_compressor/pruning.py +++ b/neural_compressor/pruning.py @@ -1,7 +1,8 @@ -#!/usr/bin/env python +"""Pruning.""" +# !/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2021 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,144 +15,188 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from neural_compressor.utils.utility import LazyImport +LazyImport('torch.nn') +torch = LazyImport('torch') -from .utils import logger -from .utils.utility import singleton -from .experimental import Pruning as ExpPruning -from deprecated import deprecated +from neural_compressor.pruner.utils import process_config, parse_to_prune,\ + check_config, update_params +from neural_compressor.pruner.pruners import get_pruner +from neural_compressor.utils import logger +import re +from neural_compressor.pruner.utils import WeightPruningConfig -@singleton class Pruning: - """This is base class of pruning object. + """Pruning. - Since DL use cases vary in the accuracy metrics (Top-1, MAP, ROC etc.), loss criteria - (<1% or <0.1% etc.) and pruning objectives (performance, memory footprint etc.). - Pruning class provides a flexible configuration interface via YAML for users to specify - these parameters. + The main class that users will used in codes to do pruning. + Contain at least one Pruner object. Args: - conf_fname_or_obj (string or obj): The path to the YAML configuration file or - Pruning_Conf class containing accuracy goal, pruning objective and related - dataloaders etc. - + config: a string. The path to a config file. For config file template, please refer to + https://github.com/intel/neural-compressor/tree/master/examples/pytorch/nlp/huggingface_models/text-classification/pruning/pytorch_pruner/eager/ + + Attributes: + model: The model object to prune. + config_file_path: A string. The path to a config file. + pruners: A list. A list of Pruner objects. + pruner_info: A config dict object. Contains pruners' information. """ - def __init__(self, conf_fname_or_obj): - self.exp_pruner = ExpPruning(conf_fname_or_obj) + def __init__(self, config): + """Initialize.""" + self.model = None + self.pruners = [] + self.pruners_info = process_config(config) - def on_epoch_begin(self, epoch): - """ called on the begining of epochs""" - self.exp_pruner.on_epoch_begin(epoch) + def update_config(self, *args, **kwargs): + """Add user-defined arguments to the original configurations. + + The original config of pruning is read from a file. + However, users can still modify configurations by passing key-value arguments in this function. + Please note that the key-value arguments' keys are analysable in current configuration. + """ + for item in self.pruners_info: + for key in kwargs: + if key in item.keys(): + item[key] = kwargs[key] + + update_params(item) + check_config(item) + + # def _call_pruners(self, func): + # """Function which decorates the Pruning class's functions. + # + # It can simplify codes by calling same-name functions in Pruning's Pruner objects. + # For example, when it decorates on_step_begin function of Pruning, + # it automatically calls its Pruners' on_step_begin functions without a "for" code. + # However, when this trick is enabled, the pylint validation on INC cannot passed, therefore commented out. + # """ + # def warpper(self, *args, **kw): + # func_name = f"{func.__name__}" + # func(self, *args, **kw) + # for prune in self.pruners: + # prun_func = getattr(prune, func_name) + # prun_func(*args, **kw) + # + # return warpper + + def get_sparsity_ratio(self): + """Calculate sparsity ratio of a module/layer. - def on_step_begin(self, batch_id): - """ called on the begining of batches""" - self.exp_pruner.on_step_begin(batch_id) + Returns: + Three floats. + elementwise_over_matmul_gemm_conv refers to zero elements' ratio in pruning layers. + elementwise_over_all refers to zero elements' ratio in all layers in the model. + blockwise_over_matmul_gemm_conv refers to all-zero blocks' ratio in pruning layers. + """ + pattern_sparsity_cnt = 0 + element_sparsity_cnt = 0 + for pruner in self.pruners: + modules = pruner.modules + sparsity_ratio = pruner.pattern.get_sparsity_ratio(pruner.masks) + cnt = 0 + for key in modules.keys(): + cnt += modules[key].weight.numel() + pattern_sparsity_cnt += int(cnt * sparsity_ratio) + for key in pruner.masks.keys(): + element_sparsity_cnt += torch.sum(pruner.masks[key] == 0).data.item() + + linear_conv_cnt = 0 + param_cnt = 0 + for name, module in self.model.named_modules(): + if type(module).__name__ in ["Linear"] or re.search(r'Conv.d', type(module).__name__) != None: + linear_conv_cnt += module.weight.numel() + + for n, param in self.model.named_parameters(): + param_cnt += param.numel() + if linear_conv_cnt == 0: + blockwise_over_matmul_gemm_conv = 0 + elementwise_over_matmul_gemm_conv = 0 + else: + blockwise_over_matmul_gemm_conv = float(pattern_sparsity_cnt) / linear_conv_cnt + elementwise_over_matmul_gemm_conv = float(element_sparsity_cnt) / linear_conv_cnt + if param_cnt == 0: + elementwise_over_all = 0 + else: + elementwise_over_all = float( + element_sparsity_cnt) / param_cnt + + return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv + + def _generate_pruners(self): + """Obtain Pruner objects.""" + assert isinstance(self.model, torch.nn.Module) + + for info in self.pruners_info: + modules = parse_to_prune(info, self.model) + if modules == {}: + logger.warning("one pruner hooks no layers, please have a check") + + self.pruners.append(get_pruner(info, modules)) + info['modules'] = [key for key in modules.keys()] + info['len_of_modules'] = len(info['modules']) + logger.info(info) + + # @_call_pruners + def on_train_begin(self): + """Implement at the beginning of training process. + + Before training, ensure that pruners are generated. + """ + self._generate_pruners() ##TODO is there better place to place + # @_call_pruners + def on_epoch_begin(self, epoch): + """Implement at the beginning of every epoch.""" + for pruner in self.pruners: + pruner.on_epoch_begin(epoch) + + # @_call_pruners + def on_step_begin(self, local_step): + """Implement at the beginning of every step.""" + for pruner in self.pruners: + pruner.on_step_begin(local_step) + + # @_call_pruners + def on_before_optimizer_step(self): + """Implement before optimizer.step().""" + for pruner in self.pruners: + pruner.on_before_optimizer_step() + + # @_call_pruners def on_step_end(self): - """ called on the end of batches""" - self.exp_pruner.on_step_end() + """Implement at the end of every step.""" + for pruner in self.pruners: + pruner.on_step_end() + # @_call_pruners def on_epoch_end(self): - """ called on the end of epochs""" - self.exp_pruner.on_epoch_end() - - @deprecated(version='2.0', reason="please use neural_compressor.prepare and neural_compressor.fit instead") - def __call__(self, model, train_dataloader=None, pruning_func=None, eval_dataloader=None, - eval_func=None): - """The main entry point of pruning. - - This interface currently only works on pytorch - and provides three usages: - a) Fully yaml configuration: User specifies all the info through yaml, - including dataloaders used in training and evaluation phases - and pruning tuning settings. - - For this usage, only model parameter is mandatory. - - b) Partial yaml configuration: User specifies dataloaders used in training - and evaluation phase by code. - The tool provides built-in dataloaders and evaluators, user just need provide - a dataset implemented __iter__ or __getitem__ methods and invoke dataloader() - with dataset as input parameter to create neural_compressor dataloader before calling this - function. - - After that, User specifies fp32 "model", train dataset "train_dataloader" - and evaluation dataset "eval_dataloader". - The trained and pruned model is evaluated with "eval_dataloader" - with evaluation metrics specified in the configuration file. The evaluation tells - the tuner whether the pruned model meets the accuracy criteria. If not, - the tuner starts a new training and tuning flow. - - For this usage, model, q_dataloader and eval_dataloader parameters are mandatory. - - c) Partial yaml configuration: User specifies dataloaders used in training phase - by code. - This usage is quite similar with b), just user specifies a custom "eval_func" - which encapsulates the evaluation dataset by itself. - The trained and pruned model is evaluated with "eval_func". - The "eval_func" tells the tuner whether the pruned model meets - the accuracy criteria. If not, the Tuner starts a new training and tuning flow. - - For this usage, model, q_dataloader and eval_func parameters are mandatory. - - Args: - model (object): For PyTorch model, it's torch.nn.model - instance. - train_dataloader (generator): Data loader for training. It is iterable - and should yield a tuple (input, label) for - training dataset containing label, - or yield (input, _) for label-free training - dataset. The input could be a object, list, - tuple or dict, depending on user implementation, - as well as it can be taken as model input. - pruning_func (function, optional): Training function for pruning. - This function takes "model" as input parameter - and executes entire training process with self - contained training hyper-parameters. If this - parameter specified, eval_dataloader parameter - plus metric defined in yaml, or eval_func - parameter should also be specified at same time. - eval_dataloader (generator, optional): Data loader for evaluation. It is iterable - and should yield a tuple of (input, label). - The input could be a object, list, tuple or - dict, depending on user implementation, - as well as it can be taken as model input. - The label should be able to take as input of - supported metrics. If this parameter is - not None, user needs to specify pre-defined - evaluation metrics through configuration file - and should set "eval_func" paramter as None. - Tuner will combine model, eval_dataloader - and pre-defined metrics to run evaluation - process. - eval_func (function, optional): The evaluation function provided by user. - This function takes model as parameter, - and evaluation dataset and metrics should be - encapsulated in this function implementation - and outputs a higher-is-better accuracy scalar - value. - - The pseudo code should be something like: - - def eval_func(model): - input, label = dataloader() - output = model(input) - accuracy = metric(output, label) - return accuracy - - Returns: - pruned model: best pruned model found, otherwise return None - - """ - logger.warning("This API is going to be deprecated. Please import " - "neural_compressor.experimental.Pruning, initialize an instance of `Pruning`," - "set its dataloader and metric attributes, then invoke its __call__ method.") - self.exp_pruner.model = model - self.exp_pruner.train_dataloader = train_dataloader - self.exp_pruner.pruning_func = pruning_func - self.exp_pruner.eval_dataloader = eval_dataloader - self.exp_pruner.eval_func = eval_func - return self.exp_pruner() - - fit = __call__ + """Implement the end of every epoch.""" + for pruner in self.pruners: + pruner.on_epoch_end() + + # @_call_pruners + def on_train_end(self): + """Implement the end of training phase.""" + for pruner in self.pruners: + pruner.on_train_end() + + # @_call_pruners + def on_before_eval(self): + """Implement at the beginning of evaluation phase.""" + for pruner in self.pruners: + pruner.on_before_eval() + + # @_call_pruners + def on_after_eval(self): + """Implement at the end of evaluation phase.""" + for pruner in self.pruners: + pruner.on_after_eval() + + # @_call_pruners + def on_after_optimizer_step(self): + """Implement after optimizer.step().""" + for pruner in self.pruners: + pruner.on_after_optimizer_step() diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 272b86fdc0f..6d50c622756 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -15,189 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -from .utils import logger -from .data import DATALOADERS, DATASETS + from .experimental import Quantization as ExpQuantization -from deprecated import deprecated from neural_compressor.conf.pythonic_config import Config from neural_compressor.config import PostTrainingQuantConfig -class Quantization(object): - """Quantization class automatically searches for optimal quantization recipes for low - precision model inference, achieving best tuning objectives like inference performance - within accuracy loss constraints. - - Tuner abstracts out the differences of quantization APIs across various DL frameworks - and brings a unified API for automatic quantization that works on frameworks including - tensorflow, pytorch and mxnet. - - Since DL use cases vary in the accuracy metrics (Top-1, MAP, ROC etc.), loss criteria - (<1% or <0.1% etc.) and tuning objectives (performance, memory footprint etc.). - Tuner class provides a flexible configuration interface via YAML for users to specify - these parameters. - - Args: - conf_fname_or_obj (string or obj): The path to the YAML configuration file or - Quantization_Conf class containing accuracy goal, tuning objective and preferred - calibration & quantization tuning space etc. - - """ - - def __init__(self, conf_fname_or_obj): - self.exp_quantizer = ExpQuantization(conf_fname_or_obj) - - @deprecated(version='2.0', reason="please use neural_compressor.quantization.fit instead") - def __call__(self, model, q_dataloader=None, q_func=None, eval_dataloader=None, - eval_func=None): - """The main entry point of automatic quantization tuning. - - This interface works on all the DL frameworks that neural_compressor supports - and provides three usages: - a) Fully yaml configuration: User specifies all the info through yaml, - including dataloaders used in calibration and evaluation phases - and quantization tuning settings. - - For this usage, only model parameter is mandatory. - - b) Partial yaml configuration: User specifies dataloaders used in calibration - and evaluation phase by code. - The tool provides built-in dataloaders and evaluators, user just need provide - a dataset implemented __iter__ or __getitem__ methods and invoke dataloader() - with dataset as input parameter to create neural_compressor dataloader before calling this - function. - - After that, User specifies fp32 "model", calibration dataset "q_dataloader" - and evaluation dataset "eval_dataloader". - The calibrated and quantized model is evaluated with "eval_dataloader" - with evaluation metrics specified in the configuration file. The evaluation tells - the tuner whether the quantized model meets the accuracy criteria. If not, - the tuner starts a new calibration and tuning flow. - - For this usage, model, q_dataloader and eval_dataloader parameters are mandatory. - - c) Partial yaml configuration: User specifies dataloaders used in calibration phase - by code. - This usage is quite similar with b), just user specifies a custom "eval_func" - which encapsulates the evaluation dataset by itself. - The calibrated and quantized model is evaluated with "eval_func". - The "eval_func" tells the tuner whether the quantized model meets - the accuracy criteria. If not, the Tuner starts a new calibration and tuning flow. - - For this usage, model, q_dataloader and eval_func parameters are mandatory. - - Args: - model (object): For Tensorflow model, it could be a path - to frozen pb,loaded graph_def object or - a path to ckpt/savedmodel folder. - For PyTorch model, it's torch.nn.model - instance. - For MXNet model, it's mxnet.symbol.Symbol - or gluon.HybirdBlock instance. - q_dataloader (generator): Data loader for calibration, mandatory for - post-training quantization. It is iterable - and should yield a tuple (input, label) for - calibration dataset containing label, - or yield (input, _) for label-free calibration - dataset. The input could be a object, list, - tuple or dict, depending on user implementation, - as well as it can be taken as model input. - q_func (function, optional): Training function for Quantization-Aware - Training. It is optional and only takes effect - when user choose "quant_aware_training" - approach in yaml. - This function takes "model" as input parameter - and executes entire training process with self - contained training hyper-parameters. If this - parameter specified, eval_dataloader parameter - plus metric defined in yaml, or eval_func - parameter should also be specified at same time. - eval_dataloader (generator, optional): Data loader for evaluation. It is iterable - and should yield a tuple of (input, label). - The input could be a object, list, tuple or - dict, depending on user implementation, - as well as it can be taken as model input. - The label should be able to take as input of - supported metrics. If this parameter is - not None, user needs to specify pre-defined - evaluation metrics through configuration file - and should set "eval_func" paramter as None. - Tuner will combine model, eval_dataloader - and pre-defined metrics to run evaluation - process. - eval_func (function, optional): The evaluation function provided by user. - This function takes model as parameter, - and evaluation dataset and metrics should be - encapsulated in this function implementation - and outputs a higher-is-better accuracy scalar - value. - - The pseudo code should be something like: - - def eval_func(model): - input, label = dataloader() - output = model(input) - accuracy = metric(output, label) - return accuracy - - Returns: - quantized model: best qanitized model found, otherwise return None - - """ - - logger.warning("This API is going to be deprecated. Please import " - "neural_compressor.experimental.Quantization, initialize an instance of `Quantization`," - "set its dataloader and metric attributes, then invoke its __call__ method.") - - self.exp_quantizer.model = model - if q_dataloader is not None: - self.exp_quantizer.calib_dataloader = q_dataloader - elif q_func is not None: - self.exp_quantizer.q_func = q_func - - if eval_func is not None: - self.exp_quantizer.eval_func = eval_func - elif eval_dataloader is not None: - self.exp_quantizer.eval_dataloader = eval_dataloader - - nc_model = self.exp_quantizer.fit() - if self.exp_quantizer.framework == 'tensorflow': - return nc_model.graph if nc_model else None - if self.exp_quantizer.framework == 'pytorch': - saved_path = os.path.abspath(os.path.join(os.path.expanduser( - self.exp_quantizer.conf.usr_cfg.tuning.workspace.path), 'checkpoint')) - nc_model.save(saved_path) - return nc_model.model - - fit = __call__ - - @deprecated(version='2.0', reason="this function has been deprecated") - def dataset(self, dataset_type, *args, **kwargs): - return DATASETS(self.exp_quantizer.framework)[dataset_type](*args, **kwargs) - - @deprecated(version='2.0', reason="this function has been deprecated") - def dataloader(self, dataset, batch_size=1, collate_fn=None, last_batch='rollover', - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False): - return DATALOADERS[self.exp_quantizer.framework]( - dataset=dataset, - batch_size=batch_size, collate_fn=collate_fn, last_batch=last_batch, - sampler=sampler, batch_sampler=batch_sampler, num_workers=num_workers, - pin_memory=pin_memory - ) - - @deprecated(version='2.0', reason="this function has been deprecated") - def metric(self, name, metric_cls, **kwargs): - from .experimental.common import Metric as NCMetric - nc_metric = NCMetric(metric_cls, name, **kwargs) - self.exp_quantizer.metric = nc_metric - - @deprecated(version='2.0', reason="this function has been deprecated") - def postprocess(self, name, postprocess_cls, **kwargs): - from .experimental.common import Postprocess as NCPostprocess - nc_postprocess = NCPostprocess(postprocess_cls, name, **kwargs) - self.exp_quantizer.postprocess = nc_postprocess - - def fit(model, conf, calib_dataloader=None, @@ -207,7 +29,6 @@ def fit(model, eval_metric=None, **kwargs): """Quantize the model with a given configure. - Args: model (torch.nn.Module): For Tensorflow model, it could be a path to frozen pb,loaded graph_def object or @@ -253,21 +74,18 @@ def fit(model, encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. - The pseudo code should be something like: - def eval_func(model): input, label = dataloader() output = model(input) accuracy = metric(output, label) return accuracy - """ if isinstance(conf, PostTrainingQuantConfig): if eval_func is None and eval_dataloader is None: conf.performance_only = True - conf = Config(quantization=conf) + conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) quantizer = ExpQuantization(conf) quantizer.model = model if eval_func is not None: diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 4b59cf2cced..fc8350f8a10 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -21,8 +21,8 @@ from .strategy import strategy_registry, TuneStrategy from ..utils import logger -from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig @strategy_registry diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index c35398dd4bb..c3478789d82 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -21,9 +21,9 @@ from .strategy import strategy_registry, TuneStrategy from ..utils import logger -from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .st_utils.tuning_structs import OpTuningConfig -from .st_utils.tuning_space import TUNING_ITEMS_LST +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.tuning_space import TUNING_ITEMS_LST @strategy_registry class BasicTuneStrategy(TuneStrategy): diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 6090d75faf3..e36371cd88d 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -27,8 +27,8 @@ from ..utils import logger from .strategy import strategy_registry, TuneStrategy -from .st_utils.tuning_sampler import OpWiseTuningSampler -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig @strategy_registry diff --git a/neural_compressor/strategy/conservative.py b/neural_compressor/strategy/conservative.py new file mode 100644 index 00000000000..32c80a69f45 --- /dev/null +++ b/neural_compressor/strategy/conservative.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +import numpy as np + +from collections import deque +from collections import OrderedDict as COrderedDict +from copy import deepcopy +from typing import Dict, List, Tuple, OrderedDict + +from .strategy import strategy_registry, TuneStrategy +from .utils.tuning_space import TuningItem +from ..utils import logger +from ..utils.utility import Statistics + +@strategy_registry +class ConservativeTuneStrategy(TuneStrategy): + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + super( + ConservativeTuneStrategy, + self).__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + self.acc_meet_flag = False + + def next_tune_cfg(self): + """ + Conservative tuning: accuracy first, performance second + + 1. Query all quantifiable ops and save as a list: quantifiable_ops = [(op_name, op_type), ...] + 2. Classify the op by its op type + 3. Add op to quant_queue according to the op type priority + 4. Go through the quant_queue and replace it with the fp32 config in tune_cfg if + accuracy meets the requirements else continue + + For bf16 and fp16, do the same thing as int8 + Note: + 1) other tunable items will using the first option as the default value. + + Yields: + tune_config (dict): It's a dict containing the tuning configuration to run. + """ + + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size = calib_sampling_size_lst[0] + tune_cfg = self._initialize_tune_cfg() + tune_cfg['calib_sampling_size'] = calib_sampling_size + op_type_priority = self._get_op_type_priority() + quant_items_pool = self._quant_items_pool(op_type_priority) + logger.info(f"*** Try to convert op into lower precision to improve performance.") + for dtype, op_items in quant_items_pool.items(): + logger.info(f"*** Start to convert op into {dtype}.") + for op_type, items_lst in op_items.items(): + logger.info(f"*** Try to convert all {op_type} ops into {dtype}.") + tmp_tune_cfg = deepcopy(tune_cfg) + for item, quant_mode in items_lst: + op_info = item.name + op_config = tuning_space.set_deafult_config(op_info, quant_mode) + tmp_tune_cfg[op_info] = op_config + yield tmp_tune_cfg + if self.acc_meet_flag: + logger.info(f"*** Convert all {op_type} ops to {dtype} and accuracy still meet the requirements") + tune_cfg = deepcopy(tmp_tune_cfg) + else: + tmp_tune_cfg = deepcopy(tune_cfg) + logger.info(f"*** Convert all {op_type} ops to {dtype} but accuracy not meet the requirements") + logger.info(f"*** Try to convert {op_type} op into {dtype} one by one.") + for item, quant_mode in items_lst: + op_info = item.name + op_config = tuning_space.set_deafult_config(op_info, quant_mode) + tmp_tune_cfg[op_info] = op_config + yield tmp_tune_cfg + if self.acc_meet_flag: + tune_cfg[op_info] = op_config + logger.info((f"*** Convert one {op_type} op({op_info}) " + f"into {dtype} and accuracy still meet the requirements")) + else: + tmp_tune_cfg[op_info] = tune_cfg[op_info] + logger.info(f"*** Skip convert {op_info}.") + logger.info(f"*** Ending tuning process due to no quantifiable op left.") + + def traverse(self): + if not (self.cfg.evaluation and self.cfg.evaluation.accuracy and \ + (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics)) \ + and self.eval_func is None: + logger.info("Neither evaluation function nor metric is defined." \ + " Generate a quantized model with default quantization configuration.") + self.cfg.tuning.exit_policy.performance_only = True + logger.info("Force setting 'tuning.exit_policy.performance_only = True'.") + logger.info("Generate a fake evaluation function.") + self.eval_func = self._fake_eval_func + + # Get fp32 model baseline + if self.baseline is None: + logger.info("Get FP32 model baseline.") + self._fp32_model = self.model + self.baseline = self._evaluate(self.model) + self.objectives.baseline = self.baseline + # self.best_tune_result = self.baseline + # Initialize the best qmodel as fp32 model + # self.best_qmodel = self._fp32_model + # Record the FP32 baseline + self._add_tuning_history() + self.show_baseline_info() + + # Start tuning + trials_count = 0 + for op_tuning_cfg in self.next_tune_cfg(): + tune_cfg = self._tune_cfg_converter(op_tuning_cfg) + trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + logger.debug("Dump current tuning configuration:") + logger.debug(tune_cfg) + self.tuning_times += 1 + self.q_model = self.adaptor.quantize( + copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) + self.algo.calib_iter = tune_cfg['calib_iteration'] + self.algo.q_model = self.q_model + # TODO align the api to let strategy has access to pre_optimized model + assert self.adaptor.pre_optimized_model + self.algo.origin_model = self.adaptor.pre_optimized_model + if self.cfg.quantization.recipes.fast_bias_correction: + self.algo.algorithms[0].quantization_cfg = tune_cfg + self.last_qmodel = self.algo() + assert self.last_qmodel + self.last_tune_result = self._evaluate(self.last_qmodel) + self.acc_meet_flag = self.objectives.accuracy_meets() + if self.acc_meet_flag: + # For the first tuning + if not self.best_tune_result: + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + self.best_tune_result = self.last_tune_result + else: + # Update current tuning config and model with best performance + get_better_performance = self.compare_performace(self.last_tune_result, self.best_tune_result) + if get_better_performance: + logger.info(f"*** Update the model with better performance.") + self.best_qmodel = self.last_qmodel + self.best_tune_result = self.last_tune_result + else: + logger.info(f"*** The qmodel was not updated due to not achieving better performance.") + # Dump the current state to log + self.dump_tuning_state(trials_count, self.last_tune_result, self.best_tune_result, self.baseline) + # Judge stop or continue tuning + need_stop = self.stop(trials_count) + # Record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, + saved_last_tune_result, + q_config=self.q_model.q_config) + self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) + self.tune_cfg = tune_cfg + self._dump_tuning_process_statistics() + if need_stop: + if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: + logger.debug(f'*** Start to do diagnosis (inspect tensor).') + self._diagnosis() + if self.use_multi_objective and len(self.tune_result_record) > 1 and \ + self.best_tune_result is not None: + best_trail, best_result = self.objectives.best_result(self.tune_result_record, + copy.deepcopy(self.baseline)) + if best_result != self.best_tune_result: + from neural_compressor.utils.utility import recover + self.best_qmodel = recover(self.model.model, + os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), + best_trail) + self.best_tune_result = best_result + self._dump_tuning_process_statistics() + break + + def stop(self, trials_count): + need_stop = False + if trials_count >= self.cfg.tuning.exit_policy.max_trials: + need_stop = True + return need_stop + + def compare_performace(self, last_tune_result, best_tune_result): # pragma: no cover + _, last_perf = last_tune_result + _, best_perf = best_tune_result + return last_perf[0] < best_perf[0] + + def dump_tuning_state(self, trials_count, last_tune_result, best_tune_result, baseline): + if last_tune_result: + last_tune = last_tune_result[0] if \ + isinstance(last_tune_result[0], list) else [last_tune_result[0]] + for name, data in zip(self.metric_name, last_tune): + if len(self.tune_data[name]) == 1: + self.tune_data[name].append(data) + else: + self.tune_data[name][1] = data + + if self.metric_weight and len(last_tune) > 1: + weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) + if len(self.tune_data['Weighted accuracy']) == 1: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: + self.tune_data['Weighted accuracy'][1] = weighted_acc + last_tune = [weighted_acc] + + last_tune_msg = '[Accuracy (int8|fp32):' + \ + ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ + zip(last_tune, self.tune_data['baseline'])]) + \ + ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ + x, y, z) for x, y, z in zip( \ + self.objectives.representation, last_tune_result[1], baseline[1]) \ + if x != 'Accuracy']) + ']' + else: # pragma: no cover + last_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 1: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][1] = 'n/a' + + if best_tune_result: + best_tune = best_tune_result[0] if isinstance(best_tune_result[0], list) \ + else [best_tune_result[0]] + + for name, data in zip(self.metric_name, best_tune): + if len(self.tune_data[name]) == 2: + self.tune_data[name].append(data) + else: + self.tune_data[name][2] = data + + if self.metric_weight and len(best_tune) > 1: + weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) + + if len(self.tune_data['Weighted accuracy']) == 2: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: # pragma: no cover + self.tune_data['Weighted accuracy'][2] = weighted_acc + + best_tune = [weighted_acc] + + best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ + for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ + for x,y in zip(self.objectives.representation, \ + best_tune_result[1]) if x != 'Accuracy']) + ']' + + else: + best_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 2: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][2] = 'n/a' + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, + last_tune_msg, + best_tune_msg)) + output_data = [[info_type, + '{:.4f} '.format(self.tune_data[info_type][0]) if \ + not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], + '{:.4f} '.format(self.tune_data[info_type][1]) if \ + not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], + '{:.4f} '.format(self.tune_data[info_type][2]) if \ + not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ + for info_type in self.tune_data.keys() if info_type != 'baseline'] + + output_data.extend([[obj, + '{:.4f} '.format(baseline[1][i]) if baseline else 'n/a', + '{:.4f} '.format(last_tune_result[1][i]) if last_tune_result else 'n/a', + '{:.4f} '.format(best_tune_result[1][i]) if best_tune_result else 'n/a'] \ + for i, obj in enumerate(self.objectives.representation)]) + + Statistics(output_data, + header='Tune Result Statistics', + field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ + 'Best tune result']).print_stat() + + def _get_op_type_priority(self): + optypewise_cap = self.capability['optypewise'] + op_type_priority = list(optypewise_cap.keys()) + return op_type_priority + + def _sorted_item_by_op_type(self, + items_lst: List[Tuple[TuningItem, str]], + op_type_priority: List[str]) -> OrderedDict[str, List]: + """ Socring the tuning items according to its op type. + + Args: + items_lst: The tuning item list. # [(op_item, quant_mode), ... ] + op_type_priority: The op type list with the order. # [optype_1, optype_2] + + Returns: + The tuning items list that sorted according to its op type. + OrderDict: + # op_type: [(TuningItem, quant_mode), ...] + conv2d: [(TuningItem, static), (TuningItem, static)] + linear: [(TuningItem, static), (TuningItem, static)] + """ + op_type_lst_from_items_lst = list(set([item[0].name[1] for item in items_lst])) + # For items whose op type does not exist in the priority list, assign it with lowest priority. + sorted_op_type_lst = [op_type for op_type in op_type_priority if op_type in op_type_lst_from_items_lst] + sorted_op_type_lst += list(set(op_type_lst_from_items_lst) - set(op_type_priority)) + sorted_items = COrderedDict() + for op_type in sorted_op_type_lst: + sorted_items[op_type] = [] + for op_item, quant_mode in items_lst: + op_type = op_item.name[1] + sorted_items[op_type].append((op_item, quant_mode)) + return sorted_items + + def _initialize_tune_cfg(self): + """Initialize the tuning config with fp32 AMAP. + + Returns: + The intialized tuning config. + """ + tuning_space = self.tuning_space + quant_mode_wise_items = tuning_space.quant_mode_wise_items + # Initialize the tuning config + initial_tuning_cfg = {} + all_ops = set() + fp32_ops = [] + for quant_mode, items_lst in quant_mode_wise_items.items(): + items_name_lst = [item.name for item in items_lst] + all_ops = all_ops.union(set(items_name_lst)) + if quant_mode == "fp32": + fp32_ops += [item.name for item in items_lst] + non_fp32_ops_dtype = {} + fp32_ops_set = set(fp32_ops) + for quant_mode, items_lst in quant_mode_wise_items.items(): + items_name_set = set([item.name for item in items_lst]) + tmp_non_fp32_ops = items_name_set.difference(fp32_ops_set) + if tmp_non_fp32_ops: + for op_info in tmp_non_fp32_ops: + non_fp32_ops_dtype[op_info] = quant_mode + for op_info in fp32_ops: + initial_tuning_cfg[op_info] = tuning_space.set_deafult_config(op_info, "fp32") + for op_info, quant_mode in non_fp32_ops_dtype.items(): + initial_tuning_cfg[op_info] = tuning_space.set_deafult_config(op_info, quant_mode) + return initial_tuning_cfg + + def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ + str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: + """Create the op queue to be quantized. + + -------------------------------------------------------------------------- + | Level 1 | bf16 | fp16 | static/dynamic | + | Level 2 | conv2d, linear, ...| conv2d, linear, ...| conv2d, linear, ...| + + Args: + op_type_priority: The optype list with priority. + + Returns: + The op item pool to convert into lower precision. + quant_items_pool(OrderDict): + bf16: + OrderDict: + conv2d: [(TuningItem, bf16), (TuningItem, bf16)] + linear: [(TuningItem, bf16), (TuningItem, bf16)] + int8: + OrderDict: + # (TuningItem, quant_mode) + conv2d: [(TuningItem, static), (TuningItem, static)] + linear: [(TuningItem, static), (TuningItem, static)] + """ + quant_mode_wise_items = self.tuning_space.quant_mode_wise_items + # Add all quantized pair into queue + quant_items_pool = COrderedDict() + # collect and sorted all ops that support bf16 and fp16 + for quant_mode in ['bf16', 'fp16']: + if quant_mode in quant_mode_wise_items: + op_item_pairs = [(op_item, quant_mode) for op_item in quant_mode_wise_items[quant_mode]] + op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) + quant_items_pool[quant_mode] = op_item_pairs + op_item_pairs = [] + quant_ops_name_set = set() + # collect and sorted all ops that support int8 + for quant_mode, items_lst in quant_mode_wise_items.items(): + if "static" in quant_mode or 'dynamic' in quant_mode: + _quant_mode = "static" if "static" in quant_mode else "dynamic" + op_item_pairs += [(item, _quant_mode) for item in items_lst if item.name not in quant_ops_name_set] + quant_ops_name_set = quant_ops_name_set.union([item.name for item in items_lst]) + op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) + quant_items_pool['int8'] = op_item_pairs + return quant_items_pool + + + + + + + + + + + + + + + + diff --git a/neural_compressor/strategy/exhaustive.py b/neural_compressor/strategy/exhaustive.py index 3127761578b..fb329332c8c 100644 --- a/neural_compressor/strategy/exhaustive.py +++ b/neural_compressor/strategy/exhaustive.py @@ -20,8 +20,8 @@ from collections import OrderedDict from .strategy import strategy_registry, TuneStrategy -from .st_utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig from ..utils import logger @strategy_registry @@ -96,7 +96,7 @@ def next_tune_cfg(self): for calib_sampling_size in calib_sampling_size_lst: # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py new file mode 100644 index 00000000000..2f33bf39ba4 --- /dev/null +++ b/neural_compressor/strategy/hawq_v2.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +from copy import deepcopy + +from .strategy import strategy_registry, TuneStrategy + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.tuning_space import TUNING_ITEMS_LST +from ..utils import logger + +@strategy_registry +class HAWQ_V2TuneStrategy(TuneStrategy): + """The HAWQ v2 tuning strategy. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Class): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + super( + HAWQ_V2TuneStrategy, + self).__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + + def next_tune_cfg(self): + tuning_space = self.tuning_space + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] + + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = True + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + # Start compute the hessian trace + logger.info(f"************** Start compute the hessian trace *****************") + target_dtype = "int8" + hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss + # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ + # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." + op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + dataloader = self.calib_dataloader, + q_model = self.q_model, + criterion =hawq_v2_criterion, + enable_act = False) + sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) + logger.info(f"************** Hessian Trace *****************") + for op_name, trace in sorted_op_to_traces.items(): + logger.info(f"*** op: {op_name}, hessian trace : {trace}") + logger.info(f"************************************************") + # WA for op mapping + ordered_ops_tmp = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_name, op_type = op_info + for op_trace_name in op_to_traces.keys(): + if isinstance(op_trace_name, str) and op_trace_name.startswith(op_name): + if op_name in ordered_ops_tmp: + logger.info((f"*** Already assigned the hessian trace to {op_name}", + f"update it with the value of {op_trace_name}")) + ordered_ops_tmp[op_name] = op_to_traces[op_trace_name] + + ordered_ops_tmp = sorted(ordered_ops_tmp.keys(), + key=lambda key: ordered_ops_tmp[key], + reverse=self.higher_is_better) + # WA for add op type + op_info_map = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops_tmp] + op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops_tmp))) + + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(op_tuning_cfg) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True, + skip_first=False) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + + def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): + op_state = op_static_cfg.get_state() + op_name = op_static_cfg.op_name + op_type = op_static_cfg.op_type + op_quant_mode = 'dynamic' + tuning_space = self.tuning_space + dynamic_state = {} + for att in ['weight', 'activation']: + if att not in op_state: + continue + for item_name, item_val in op_state[att].items(): + att_item = (att, item_name) + if att_item not in TUNING_ITEMS_LST: + continue + if tuning_space.query_item_option((op_name, op_type), op_quant_mode, att_item, item_val): + dynamic_state[att_item] = item_val + else: + quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) + tuning_item = quant_mode_item.get_option_by_name(att_item) + dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 614984359ba..7783c2fee57 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -16,14 +16,17 @@ # limitations under the License. import copy +from copy import deepcopy import numpy as np from collections import OrderedDict from typing import Dict, Any, List from .strategy import strategy_registry, TuneStrategy from ..utils import logger +from time import time -from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.helper import tuning_record_msg @strategy_registry class MSETuneStrategy(TuneStrategy): @@ -175,9 +178,9 @@ def next_tune_cfg(self): initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options for calib_sampling_size in calib_sampling_size_lst: - # step1. collect the ops that support static and dynamic + # Collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -193,9 +196,9 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 + # Optype-wise tuning + early_stop_tuning = True + stage1_cnt = 0 int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], @@ -208,14 +211,13 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - # step4. fallback the ops supported both static and dynamic from static to dynamic - # tuning items: None + # Fallback the ops supported both static and dynamic from static to dynamic static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if item in tuning_space.query_items_by_quant_mode('dynamic')] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: - logger.info("Non ops that support both dynamic") + logger.info("No op support both dynamic and static") def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_op_tuning_cfg = deepcopy(op_tuning_cfg) @@ -230,14 +232,13 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - # step5. fallback + # Fallback to float point datatypes ('bf16' or 'fp32') for target_dtype in ['bf16', 'fp32']: fallback_items_lst = [item for item in int8_ops if item in tuning_space.query_items_by_quant_mode(target_dtype)] if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - # replace it with sorted items list + # Replace it with sorted items list fallback_items_name_lst = [item.name for item in fallback_items_lst] # TODO check the best_qmodel ordered_op_name_types = self.mse_impact_lst(fallback_items_name_lst, self.model, self.best_qmodel) @@ -254,11 +255,11 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - # do accumulated fallback according to the order in the previous stage + # Do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py new file mode 100644 index 00000000000..7abc424283e --- /dev/null +++ b/neural_compressor/strategy/mse_v2.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from copy import deepcopy +import numpy as np +from collections import OrderedDict +from typing import Dict, Any, List +from .strategy import strategy_registry, TuneStrategy +from ..utils import logger +from time import time + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.helper import tuning_record_msg + +@strategy_registry +class MSE_V2TuneStrategy(TuneStrategy): + """The tuning strategy using MSE policy in tuning space. + + This MSE policy runs fp32 model and int8 model seperately to get all activation tensors, + and then compares those tensors by MSE algorithm to order all ops with MSE distance for + deciding the impact of each op to final accuracy. + It will be used to define opwise tuningspace by priority. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Class): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + self.ordered_ops = None + super( + MSE_V2TuneStrategy, + self).__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + + def __getstate__(self): + for history in self.tuning_history: + if self._same_yaml(history['cfg'], self.cfg): + history['ordered_ops'] = self.ordered_ops + save_dict = super().__getstate__() + return save_dict + + def next_tune_cfg(self): + """The generator of yielding next tuning config to traverse by concrete strategies + according to last tuning result. + + Yields: + tune_config (dict): It's a dict containing the tuning configuration to run. + """ + + best_op_tuning_cfg = None + if len(self.metric_name) == 1 or self.metric_weight is not None: + best_acc = float('-inf') if self.higher_is_better else float('inf') + else: + best_acc = [float('-inf') if higher_is_better else float('inf') for \ + higher_is_better in self.metric_criterion] + + from copy import deepcopy + tuning_space = self.tuning_space + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + # Collect the ops that support static and dynamic + quant_mode_wise_items = OrderedDict() + query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + # Optype-wise tuning + early_stop_tuning = True + stage1_cnt = 0 + int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] + stage1_max = 2 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + # Fallback the ops supported both static and dynamic from static to dynamic + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("No op support both dynamic and static") + + def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): + new_op_tuning_cfg = deepcopy(op_tuning_cfg) + new_op_tuning_cfg.op_quant_mode = 'dynamic' + return new_op_tuning_cfg + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + + # Fallback one by one by op sensitivity(mse) + # 1. while the accuracy requirements not met: # to improve the accuracy + # 1) calculate the sensitivity of int8 ops in current state. + # 2) fallback the op with higher sensitivity accumulatively + # 2. after the accuracy requirements met: # to improve the performance + # 1) calculate the sensitivity of fp32 ops in the current state + # 2) re-quantize the op with lower sensitivity accumulatively + tune_cfg = deepcopy(self.cur_best_tuning_cfg) + requantize_cfg = deepcopy(self._tune_cfg_converter(self.cur_best_tuning_cfg)) + self.output_op_names = self.adaptor.get_output_op_names(self.cur_best_qmodel) + self.confidence_batches = (self.cfg.tuning.strategy.confidence_batches + if self.cfg.tuning.strategy.confidence_batches != None else 2) + tune_cfg_backup = deepcopy(tune_cfg) + quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode(tune_cfg, 'dynamic') + \ + self._collect_ops_by_quant_mode(tune_cfg, 'static') + op_quant_cfgs = {op_info: tune_cfg_backup[op_info] for op_info in quant_ops_in_tune_cfg} + fallback_records = [] + self.re_quant = True + while not self.objectives.compare(self.last_tune_result, self.baseline): + # Record the time of calcutating the sensitivity + start = time() + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=True) + logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") + select_op_info = ops_lst[0] + logger.info(f"*** The op {select_op_info} have the highest sensitivity in the current state, \ + fallback it to fp32.") + tune_cfg[select_op_info] = OpTuningConfig(select_op_info[0], + select_op_info[1], + 'fp32', + self.tuning_space) + # Record the fallback history + if not fallback_records: + fallback_records = [[select_op_info]] + else: + fallback_records.append(fallback_records[-1] + [select_op_info]) + logger.debug(f"*** The fallback ops record: \n{tuning_record_msg(fallback_records)}") + yield tune_cfg + + logger.info(f"*** The accuracy meeting the accuracy requirements, stop fallback ops.") + while self.objectives.compare(self.last_tune_result, self.baseline): + if len(fallback_records) == 0 or len(fallback_records[-1]) <= 1: + logger.info(f"*** Stop re-quant due to no int8 op or only 1 int8 op left.") + break + logger.info(f"*** Start to re-quant the fallback op in the previous stage.") + # Track the current fallback ops + tmp_fallback_ops = fallback_records[-1] if fallback_records else [] + start = time() + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=False, + requantize_cfgs=requantize_cfg['op']) + logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") + if not ops_lst: + logger.warning("No op to be requantized") + break + for select_op_info in ops_lst: + #assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." + if select_op_info not in tmp_fallback_ops: + logger.debug(f"{select_op_info} not in fallback list.") + continue + + new_fallback_ops = deepcopy(tmp_fallback_ops) + new_fallback_ops.remove(select_op_info) + if new_fallback_ops not in fallback_records: + logger.info(f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ + re-quantize it.") + tune_cfg[select_op_info] = op_quant_cfgs[select_op_info] + fallback_records.append(new_fallback_ops) + logger.debug(f"*** The fallback ops record: \n{tuning_record_msg(fallback_records)}") + yield tune_cfg + break + else: + logger.debug(f"*** Skip re-qaunt {select_op_info}, due the config has been evallated.") + continue + self.re_quant = False + logger.info(f"*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") \ No newline at end of file diff --git a/neural_compressor/strategy/random.py b/neural_compressor/strategy/random.py index e2ee3bf6e26..812b9e40003 100644 --- a/neural_compressor/strategy/random.py +++ b/neural_compressor/strategy/random.py @@ -19,8 +19,8 @@ from .strategy import strategy_registry, TuneStrategy from collections import OrderedDict -from .st_utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig from ..utils import logger @strategy_registry @@ -95,7 +95,7 @@ def next_tune_cfg(self): initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) # collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 63710b43264..8c7276589c2 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -42,12 +42,13 @@ import copy import numpy as np from collections import OrderedDict +from time import time from ..utils import logger -from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler -from .st_utils.tuning_space import TuningItem, TuningSpace -from .st_utils.tuning_structs import OpTuningConfig +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_space import TuningItem, TuningSpace +from .utils.tuning_structs import OpTuningConfig STRATEGIES = {} @@ -140,6 +141,7 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, self.tune_data = {} self.tune_result_record = [] self.tuning_history = [] + self.tuning_result_data = [] # The tuning history ever made, structured like below: # [ # { @@ -170,6 +172,8 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, self.best_qmodel = None self.cur_best_acc = self.initial_best_acc() # track the current best accuracy self.cur_best_tuning_cfg = {} # track tuning cfg with the current best accuracy + self.cur_best_qmodel = None # track quantized model with the current best accuracy + self.re_quant = False self.capability = self.adaptor.query_fw_capability(model) logger.debug(self.capability) @@ -219,14 +223,16 @@ def traverse(self): if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - self.baseline = self._evaluate(self.model) + self.baseline = self._evaluate(self.model) + self.objectives.baseline = self.baseline # record the FP32 baseline self._add_tuning_history() self.show_baseline_info() trials_count = 0 - + traverse_start_time = time() for op_tuning_cfg in self.next_tune_cfg(): + tuning_start_time = time() tune_cfg = self._tune_cfg_converter(op_tuning_cfg) trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) @@ -262,8 +268,22 @@ def traverse(self): q_config=self.q_model.q_config) self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) self.tune_cfg = tune_cfg + now_time = time() + acc_res_msg = "" + performace_res_msg = "" + if self.tuning_result_data: + acc_res_msg = "[ " + "| ".join(self.tuning_result_data[0]) + " ]" + performace_res_msg = "[ " + "| ".join(self.tuning_result_data[1]) + " ]" + logger.debug(f"*** The accuracy of last tuning is: {acc_res_msg}") + logger.debug(f"*** The perfomance of last tuning is: {performace_res_msg}") + logger.debug(f"*** The last tuning time: {(now_time - tuning_start_time):.2f} s") + logger.debug(f"*** The tuning process lasted time: {(now_time - traverse_start_time):.2f} s") + self._dump_tuning_process_statistics() if need_stop: + if self.re_quant: + logger.info("*** Do not stop the tuning process, re-quantize the ops.") + continue if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: logger.debug(f'*** Start to do diagnosis (inspect tensor).') self._diagnosis() @@ -276,6 +296,7 @@ def traverse(self): self.best_qmodel = recover(self.model.model, os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), best_trail) + logger.debug(f"*** Update the best qmodel by recovering from history.") self.best_tune_result = best_result self._dump_tuning_process_statistics() break @@ -378,13 +399,13 @@ def _optype_skipped(optype): def initial_tuning_cfg(self): if self.cfg.quantization.approach == 'post_training_auto_quant': - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] elif self.cfg.quantization.approach == 'post_training_dynamic_quant': - query_order = ['dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['dynamic', 'bf16', 'fp32'] elif self.cfg.quantization.approach == 'post_training_static_quant': - query_order = ['static', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'bf16', 'fp32'] elif self.cfg.quantization.approach == 'quant_aware_training': - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] quant_mode_wise_items = OrderedDict() pre_items = set() @@ -460,6 +481,7 @@ def _tune_cfg_converter(self, op_tuning_cfg): else: tune_cfg['calib_iteration'] = 1 tune_cfg['advance'] = self.cfg.quantization.advance + tune_cfg['approach'] = self.cfg.quantization.approach return tune_cfg def set_tuning_space(self, conf): @@ -519,6 +541,8 @@ def set_framework_info(self, q_dataloader, q_func=None): 'approach': self.cfg.quantization.approach, 'random_seed': self.cfg.tuning.random_seed} framework = self.cfg.model.framework.lower() + framework_specific_info.update({'backend': self.cfg.model.get('backend', 'default')}) + framework_specific_info.update({'format': self.cfg.model.get('quant_format', 'default')}) self.mixed_precision_mode = bool('mixed_precision' in self.cfg) or \ bool('graph_optimization' in self.cfg) @@ -531,21 +555,34 @@ def set_framework_info(self, q_dataloader, q_func=None): 'recipes': self.cfg.quantization.recipes, 'performance_only': self.cfg.tuning.exit_policy.performance_only, 'use_bf16': self.cfg.use_bf16 if self.cfg.use_bf16 is not None else False}) + if self.cfg.model.backend == 'itex': + self.cfg.model.framework = 'tensorflow_itex' + framework = 'tensorflow_itex' + if 'keras' in framework: + framework_specific_info.update({ + 'workspace_path': self.cfg.tuning.workspace.path, }) if framework == 'mxnet': framework_specific_info.update({"q_dataloader": q_dataloader}) - if 'onnxrt' in framework.lower(): + if 'onnx' in framework.lower(): if self.mixed_precision_mode: - framework_specific_info.update({"backend": "integerops"}) framework_specific_info.update({"approach": "post_training_dynamic_quant"}) - else: - framework_specific_info.update({"backend": framework.lower().split('_')[-1]}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) framework_specific_info.update({'workspace_path': self.cfg.tuning.workspace.path}) framework_specific_info.update({'recipes': self.cfg.quantization.recipes}) framework_specific_info.update( {'graph_optimization': OPTIONS[framework].graph_optimization}) framework_specific_info.update({'reduce_range': self.cfg.reduce_range}) + if framework.lower() == 'onnxrt_qdq' or \ + framework_specific_info['backend'] == 'onnxrt_trt_ep': + framework_specific_info.update({'format': 'QDQ'}) + framework = 'onnxrt_qdq' if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': + if self.cfg.model.backend == 'ipex': + self.cfg.model.framework = 'pytorch_ipex' + framework = 'pytorch_ipex' + elif self.cfg.model.backend == 'default': + self.cfg.model.framework = 'pytorch_fx' + framework = 'pytorch_fx' if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"q_dataloader": q_dataloader}) @@ -605,22 +642,26 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): acc, _ = self.last_tune_result if self.cur_best_tuning_cfg is None: self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + self.cur_best_qmodel = self.last_qmodel if not isinstance(acc, list) and ((self.higher_is_better and acc >= self.cur_best_acc) \ or (not self.higher_is_better and acc <= self.cur_best_acc)): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + self.cur_best_qmodel = self.last_qmodel elif len(self.metric_name) > 1 and self.metric_weight is not None: acc = np.mean(np.array(acc) * self.metric_weight) if (self.higher_is_better and acc >= self.cur_best_acc) or \ (not self.higher_is_better and acc <= self.cur_best_acc): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + self.cur_best_qmodel = self.last_qmodel elif len(self.metric_name) > 1 and self.metric_weight is None: if all([acc_i >= best_i if higher_is_better else acc_i <= best_i for \ acc_i, best_i, higher_is_better in \ zip(acc, self.cur_best_acc, self.metric_criterion)]): self.cur_best_acc = acc - self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + self.cur_best_qmodel = self.last_qmodel logger.debug(f"Best acc is {self.cur_best_acc}.") return self.cur_best_acc, self.cur_best_tuning_cfg @@ -721,12 +762,14 @@ def _evaluate(self, model): metric_cfg = self.cfg.evaluation.accuracy.metric if \ self.cfg.evaluation.accuracy.metric else \ self.cfg.evaluation.accuracy.multi_metrics + iteration = -1 if self.cfg.evaluation.accuracy.iteration is None \ + else self.cfg.evaluation.accuracy.iteration eval_func = create_eval_func(self.framework, self.eval_dataloader, self.adaptor, metric_cfg, postprocess_cfg, - self.cfg.evaluation.accuracy.iteration, + iteration, tensorboard = self.cfg.tuning.tensorboard, fp32_baseline = self.baseline == None) @@ -795,10 +838,18 @@ def stop(self, timeout, trials_count): del self.best_qmodel self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel + logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}") if self.metric_met_point == 0: self.metric_met_point = self.tuning_times - else: - del self.last_qmodel + + # track the model with highest acc + if self.best_tune_result and self.last_tune_result: # (acc, [perf]) + if self.re_quant and self.objectives.accuracy_meets(): + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}.") + else: + logger.debug(f"*** Accuracy not meets the requirements, do not update the best qmodel.") if self.last_tune_result: last_tune = self.last_tune_result[0] if \ @@ -885,7 +936,7 @@ def stop(self, timeout, trials_count): '{:.4f} '.format(self.last_tune_result[1][i]) if self.last_tune_result else 'n/a', '{:.4f} '.format(self.best_tune_result[1][i]) if self.best_tune_result else 'n/a'] \ for i, obj in enumerate(self.objectives.representation)]) - + self.tuning_result_data = output_data Statistics(output_data, header='Tune Result Statistics', field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ @@ -995,6 +1046,13 @@ def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): def _fake_eval_func(self, model): return 1. + def _collect_ops_by_quant_mode(self, tune_cfg, quant_mode): + ops_lst = [] + for op_info, op_config in tune_cfg.items(): + if isinstance(op_config, OpTuningConfig) and quant_mode in op_config.op_quant_mode: + ops_lst.append(op_info) + return ops_lst + def _diagnosis(self): import logging logger = logging.getLogger("neural_compressor") diff --git a/neural_compressor/strategy/st_utils/__init__.py b/neural_compressor/strategy/utils/__init__.py similarity index 88% rename from neural_compressor/strategy/st_utils/__init__.py rename to neural_compressor/strategy/utils/__init__.py index e2fa444b0ba..db8d0fcfdf8 100644 --- a/neural_compressor/strategy/st_utils/__init__.py +++ b/neural_compressor/strategy/utils/__init__.py @@ -17,4 +17,5 @@ from .tuning_sampler import TuningSampler, OpWiseTuningSampler, OpTypeWiseTuningSampler, FallbackTuningSampler from .tuning_structs import OpTuningConfig -from .tuning_space import TuningItem, TuningSpace \ No newline at end of file +from .tuning_space import TuningItem, TuningSpace +from .helper import tuning_record_msg \ No newline at end of file diff --git a/neural_compressor/strategy/utils/helper.py b/neural_compressor/strategy/utils/helper.py new file mode 100644 index 00000000000..ce3ca6867bd --- /dev/null +++ b/neural_compressor/strategy/utils/helper.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def tuning_record_msg(records): + records_str_lst = [[str(e) for e in record] for record in records] + record_msg = '\n'.join(','.join(record) for record in records_str_lst) + return record_msg \ No newline at end of file diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/utils/tuning_sampler.py similarity index 97% rename from neural_compressor/strategy/st_utils/tuning_sampler.py rename to neural_compressor/strategy/utils/tuning_sampler.py index fea140a9e4d..9b5eff7dc1b 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/utils/tuning_sampler.py @@ -254,16 +254,18 @@ def __init__(self, tuning_order_lst: List[TuningOrder], initial_op_tuning_cfg: Dict[tuple, Any], op_dtypes: Dict[str, str], - accumulate: bool + accumulate: bool, + skip_first: bool = True ): super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) self.op_dtypes = op_dtypes self.accumulate = accumulate + self.skip_first = skip_first pass def __iter__(self): new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) - skip_first = True + skip_first = self.skip_first for op_name_type, target_dtype in self.op_dtypes.items(): if not self.accumulate: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) diff --git a/neural_compressor/strategy/st_utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py similarity index 89% rename from neural_compressor/strategy/st_utils/tuning_space.py rename to neural_compressor/strategy/utils/tuning_space.py index b2cfddbdd38..a706f5837d2 100644 --- a/neural_compressor/strategy/st_utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -23,7 +23,7 @@ from copy import deepcopy from ...utils import logger -PRECISION_SET = {'bf16', 'fp16', 'fp32'} +PRECISION_SET = {'bf16', 'fp32'} QUANT_MODE_SET = {'static', 'dynamic'} QUNAT_BIT_SET = {'int8', 'uint8', 'int4', 'uint4'} @@ -148,7 +148,7 @@ def _merge_op_cfg(self, op_cap, op_user_cfg, fw_op_cap): } } Returns: - """ + """ for key in ['activation', 'weight']: if key in op_user_cfg and op_user_cfg[key] is not None: user_dtype_lst = op_user_cfg[key]['dtype'] if op_user_cfg[key]['dtype'] is not None else [] @@ -166,8 +166,16 @@ def _merge_op_cfg(self, op_cap, op_user_cfg, fw_op_cap): op_cap['quant'][quant_mode_flag] = deepcopy(fw_quant_cap) for item_name, item_options in op_user_cfg[key].items(): if item_options is not None and key in fw_quant_cap and item_name in fw_quant_cap[key]: - merged_options = [option for option in item_options if \ - option in fw_quant_cap[key][item_name]] + merged_options = [] + for option in item_options: + if option in fw_quant_cap[key][item_name]: + merged_options.append(option) + else: + logger.warning("By default, {1}: {2} is not supported for {0} ".format( + key, item_name, option) + "in Intel Neural Compressor") + logger.warning("Please visit the corresponding yaml file in " + + "neural_compressor/adaptor/ to enhance the default " + + "capability in Intel Neural Compressor") if len(merged_options) == 0: merged_options = fw_quant_cap[key][item_name] op_cap['quant'][quant_mode_flag][key][item_name] = merged_options @@ -207,19 +215,19 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): Here is an example: capability:{ - ('op1','type1'): { + ('op1','type1'): { 'item1': [item1_option1, item1_option2, item1_option3], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op2','type1'): { + ('op2','type1'): { 'item1': [item1_option1, item1_option2, item1_option3], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op3','type2'): { + ('op3','type2'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2], } - ('op4','type2'): { + ('op4','type2'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2], } @@ -234,26 +242,26 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): 'item1': [item1_option1, item1_option2] }} op-wise: { - ('op3','type2'): { + ('op3','type2'): { 'item2': [item2_option1] }} } # step1. merged with model-wise capability:{ - ('op1','type1'): { + ('op1','type1'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op2','type1'): { + ('op2','type1'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op3','type2'): { + ('op3','type2'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2], } - ('op4','type2'): { + ('op4','type2'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2], } @@ -261,19 +269,19 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): # step2. merged with optype-wise capability:{ - ('op1','type1'): { + ('op1','type1'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op2','type1'): { + ('op2','type1'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op3','type2'): { + ('op3','type2'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2], } - ('op4','type2'): { + ('op4','type2'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2], } @@ -281,19 +289,19 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): # step3. merged with op-wise capability:{ - ('op1','type1'): { + ('op1','type1'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op2','type1'): { + ('op2','type1'): { 'item1': [item1_option1, item1_option2], 'item2': [item2_option1, item2_option2, item2_option3], } - ('op3','type2'): { + ('op3','type2'): { 'item1': [item1_option1], 'item2': [item2_option1], } - ('op4','type2'): { + ('op4','type2'): { 'item1': [item1_option1], 'item2': [item2_option1, item2_option2], } @@ -391,7 +399,7 @@ def query_items_by_quant_mode(self, quant_mode): """ Collect all op items that support the specific quantization/precision mode Args: - quant_mode (str): fp32/bf16/fp16/dynamic/static + quant_mode (str): fp32/bf16/dynamic/static Returns: List: the list of op items @@ -422,16 +430,15 @@ def set_deafult_config(self, op_name_type, quant_mode): # set the first option as the default if the not support the required quant mode quant_mode_item = op_item.options[0] for quant_item in op_item.options: - if quant_mode == quant_item.name or quant_mode in quant_item.name: + if quant_mode == quant_item.name or (isinstance(quant_mode, str) and quant_mode in quant_item.name): quant_mode_item = quant_item break # set the first option as the default for each tuning item config = {item.name: item.options[0] for item in quant_mode_item.options} op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode_item.name, + quant_mode, self, config) return op_tuning_config - diff --git a/neural_compressor/strategy/st_utils/tuning_structs.py b/neural_compressor/strategy/utils/tuning_structs.py similarity index 100% rename from neural_compressor/strategy/st_utils/tuning_structs.py rename to neural_compressor/strategy/utils/tuning_structs.py diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 8f0dcecb57e..07494949308 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -35,22 +35,23 @@ class CompressionManager: examples: import neural_compressor.training.prepare_compression compression_manager = prepare_compression(conf, model) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model train_loop: - compression_manager.on_train_begin() for epoch in range(epochs): - compression_manager.on_epoch_begin(epoch) + compression_manager.callbacks.on_epoch_begin(epoch) for i, batch in enumerate(dataloader): - compression_manager.on_step_begin(i) + compression_manager.callbacks.on_step_begin(i) ...... - output = compression_manager.model(batch) + output = model(batch) loss = ...... - loss = compression_manager.on_after_compute_loss(batch, output, loss) + loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss) loss.backward() - compression_manager.on_before_optimizer_step() + compression_manager.callbacks.on_before_optimizer_step() optimizer.step() - compression_manager.on_step_end() - compression_manager.on_epoch_end() - compression_manager.on_train_end() + compression_manager.callbacks.on_step_end() + compression_manager.callbacks.on_epoch_end() + compression_manager.callbacks.on_train_end() compression_manager.save("path_to_save") """ def __init__(self, component): @@ -115,29 +116,14 @@ def save(self, root=None): def export( self, save_path: str, - input, - target_model_type: str = 'ONNX', - quant_mode: str = 'QDQ', - opset_version: int = 14, - *args, - **kwargs + conf, ): """Convert the model to another type model, like `onnx` model and so on. Args: """ - if target_model_type == "ONNX": - if self.model.q_config is not None: - assert self.fp32_model is not None, "Can't deepcopy fp32 model, so we can't " \ - "export to onnx model now, this is a limitation, will remove in furture." - self.model.export_to_int8_onnx( - save_path, input, opset_version=opset_version, fp32_model=self.fp32_model - ) - else: - self.model.export_to_fp32_onnx(save_path, input, opset_version=opset_version) - else: - assert False, "Unsupport export for {} model".format(type(self.model)) + self.model.export(save_path, conf) def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs): @@ -179,17 +165,31 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) comps = [] for conf in confs: if isinstance(conf, QuantizationAwareTrainingConfig): - conf_ = Config(quantization=conf) + conf_ = Config(quantization=conf, + benchmark=None, + pruning=None, + distillation=None, + nas=None) com = Quantization(conf_) + com.model = model elif isinstance(conf, PruningConfig): - conf_ = Config(pruning=conf) + conf_ = Config(pruning=conf, + benchmark=None, + quantization=None, + distillation=None, + nas=None) com = Pruning(conf_) + com.model = model elif isinstance(conf, DistillationConfig): - conf_ = Config(distillation=conf) + conf_ = Config(distillation=conf, + benchmark=None, + quantization=None, + pruning=None, + nas=None) com = Distillation(conf_) - assert conf.teacher_model is not None, \ - "Please set teacher_model in DistillationConfig" - com.teacher_model = conf.teacher_model + com.model = model + if conf.teacher_model is not None: + com.teacher_model = conf.teacher_model else: assert False, "Unsupported configure: {}".format(type(conf)) @@ -202,17 +202,28 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) component = scheduler else: if isinstance(confs, QuantizationAwareTrainingConfig): - conf = Config(quantization=confs) + conf = Config(quantization=confs, + benchmark=None, + pruning=None, + distillation=None, + nas=None) component = Quantization(conf) elif type(confs) == PruningConfig: - conf = Config(pruning=confs) + conf = Config(pruning=confs, + benchmark=None, + quantization=None, + distillation=None, + nas=None) component = Pruning(conf) elif type(confs) == DistillationConfig: - conf = Config(distillation=confs) + conf = Config(distillation=confs, + benchmark=None, + quantization=None, + pruning=None, + nas=None) component = Distillation(conf) - assert confs.teacher_model is not None, \ - "Please set teacher_model in DistillationConfig" - component.teacher_model = confs.teacher_model + if confs.teacher_model is not None: + component.teacher_model = confs.teacher_model else: assert False, logger.error( "confs should be one of QuantizationAwareTrainingConfig, " @@ -222,6 +233,8 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) component.model = model if isinstance(confs, QuantizationAwareTrainingConfig): component.prepare_qat() + else: + component.prepare() compression_manager = CompressionManager(component) return compression_manager diff --git a/neural_compressor/utils/create_obj_from_config.py b/neural_compressor/utils/create_obj_from_config.py index 8d22945c522..61dfcd4ac01 100644 --- a/neural_compressor/utils/create_obj_from_config.py +++ b/neural_compressor/utils/create_obj_from_config.py @@ -16,7 +16,7 @@ # limitations under the License. from neural_compressor.experimental.metric import METRICS -from neural_compressor.experimental.data import DATASETS, TRANSFORMS, FILTERS, DATALOADERS +from neural_compressor.experimental.data import Datasets, TRANSFORMS, FILTERS, DATALOADERS from neural_compressor.experimental.common import Optimizers, Criterions from collections import OrderedDict import copy @@ -65,7 +65,7 @@ def create_dataset(framework, data_source, cfg_preprocess, cfg_filter): preprocesses = TRANSFORMS(framework, 'preprocess') preprocess = get_preprocess(preprocesses, cfg_preprocess) # even we can unify transform, how can we handle the IO, or we do the transform here - datasets = DATASETS(framework) + datasets = Datasets(framework) dataset_type = list(data_source.keys())[0] # generate framework and dataset specific filters filter = None diff --git a/neural_compressor/utils/options.py b/neural_compressor/utils/options.py index 4679bce23d5..fa5fe7b338d 100644 --- a/neural_compressor/utils/options.py +++ b/neural_compressor/utils/options.py @@ -32,7 +32,7 @@ class onnxrt: 'onnxrt_integerops': onnxrt, 'onnxrt_qlinearops': onnxrt, 'onnxrt_qdq': onnxrt, - 'onnxrt_qoperator': onnxrt, + 'onnxruntime': onnxrt, } diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index a6df7fa5751..31241a65500 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -280,10 +280,11 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): from torch.quantization.quantize_fx import prepare_fx, convert_fx, prepare_qat_fx # pragma: no cover - if version > Version("1.12.1") and tune_cfg['approach'] != "post_training_dynamic_quant": + if version.release >= Version( + "1.13.0").release and tune_cfg['approach'] != "post_training_dynamic_quant": from ..adaptor.pytorch import get_example_inputs - example_inputs = get_example_inputs(model, kwargs["dataloader"] if "dataloader" in - kwargs else None) + example_inputs = get_example_inputs( + model, kwargs["dataloader"] if "dataloader" in kwargs else None) else: example_inputs = None diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 5d213a3250d..16eea74e6f1 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -108,13 +108,6 @@ def _singleton(*args, **kw): return instances[cls] return _singleton -def set_backend(backend): - global __BACKEND - __BACKEND = backend - -def get_backend(): - global __BACKEND - return __BACKEND @contextmanager def time_limit(seconds): @@ -348,7 +341,7 @@ def recover(fp32_model, tuning_history_path, num, **kwargs): from neural_compressor.adaptor import FRAMEWORKS adaptor = FRAMEWORKS[framework](q_config['framework_specific_info']) - if 'onnxrt' in framework: + if 'onnx' in framework: from neural_compressor.experimental import common ox_fp32_model = common.Model(fp32_model) tune_index_qmodel = adaptor.recover(ox_fp32_model, q_config) @@ -487,3 +480,24 @@ def dump_data_to_local(data, path, filename): with open(file_path, 'wb') as fp: pickle.dump(data, fp) logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path) + + + +def set_random_seed(seed: int): + from neural_compressor.config import options + options.random_seed = seed + + +def set_workspace(workspace: str): + from neural_compressor.config import options + options.workspace = workspace + + +def set_resume_from(resume_from: str): + from neural_compressor.config import options + options.resume_from = resume_from + + +def set_tensorboard(tensorboard: bool): + from neural_compressor.config import options + options.tensorboard = tensorboard diff --git a/neural_compressor/ux/components/model/model_type_getter.py b/neural_compressor/ux/components/model/model_type_getter.py index 1d0913181a0..0fd151692b3 100644 --- a/neural_compressor/ux/components/model/model_type_getter.py +++ b/neural_compressor/ux/components/model/model_type_getter.py @@ -14,7 +14,7 @@ # limitations under the License. """Model type getter.""" -from neural_compressor.model.model import get_model_type as nc_get_model_type +from neural_compressor.model.tensorflow_model import get_model_type as nc_get_model_type from neural_compressor.ux.utils.expiring_dict import ExpiringDict model_type_cache = ExpiringDict(ttl=600) diff --git a/neural_compressor/ux/components/model/tensorflow/model.py b/neural_compressor/ux/components/model/tensorflow/model.py index 4f33b161c85..e3218634088 100644 --- a/neural_compressor/ux/components/model/tensorflow/model.py +++ b/neural_compressor/ux/components/model/tensorflow/model.py @@ -17,7 +17,7 @@ from typing import Any, List, Optional from neural_compressor.experimental.common.model import Model as NCModel -from neural_compressor.model.model import TensorflowBaseModel +from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.utils.logger import Logger from neural_compressor.ux.components.graph.graph import Graph from neural_compressor.ux.components.graph.reader.tensorflow_reader import TensorflowReader diff --git a/neural_compressor/ux/utils/configs/model_wise_params.json b/neural_compressor/ux/utils/configs/model_wise_params.json index 7de380f0bb4..6aac712cd1a 100644 --- a/neural_compressor/ux/utils/configs/model_wise_params.json +++ b/neural_compressor/ux/utils/configs/model_wise_params.json @@ -3,14 +3,14 @@ "weight": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax"], "bit": 7.0 }, "activation": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax", "kl"] } }, @@ -23,4 +23,4 @@ "algorithm": ["placeholder"] } } -} \ No newline at end of file +} diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index 7619df8c753..0457546e50a 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -10,7 +10,7 @@ from onnx import onnx_pb as onnx_proto from onnx import helper, TensorProto, numpy_helper from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.data import DATASETS, DATALOADERS +from neural_compressor.data import Datasets, DATALOADERS from neural_compressor.experimental import Quantization, common from neural_compressor.experimental import Benchmark, common from neural_compressor import options @@ -22,7 +22,7 @@ def build_static_yaml(): fake_yaml = """ model: name: imagenet - framework: onnxrt_qoperator + framework: onnxrt_qlinearops quantization: approach: post_training_static_quant @@ -529,7 +529,7 @@ def build_gemm_model(): def build_benchmark(): seq = ''' from neural_compressor.experimental import Benchmark -from neural_compressor.data import DATASETS, DATALOADERS +from neural_compressor.data import Datasets, DATALOADERS from neural_compressor import conf from onnx import onnx_pb as onnx_proto from onnx import helper, TensorProto, numpy_helper @@ -555,7 +555,7 @@ def reverse_matrix(x): graph = helper.make_graph(nodes, 'test0', [input0], [output0]) model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) -datasets = DATASETS('onnxrt_qlinearops') +datasets = Datasets('onnxrt_qlinearops') ext_dataset = datasets['dummy'](shape=(10, 2), low=0., high=1., label=True) ext_dataloader = DATALOADERS['onnxrt_qlinearops'](ext_dataset) @@ -590,26 +590,26 @@ class TestAdaptorONNXRT(unittest.TestCase): rn50_export_path = "rn50.onnx" rn50_model = torchvision.models.resnet50() - datasets = DATASETS('onnxrt_qlinearops') + datasets = Datasets('onnxrt_qlinearops') cv_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) cv_dataloader = DATALOADERS['onnxrt_qlinearops'](cv_dataset) ir3_dataset = datasets['dummy'](shape=(10, 2048), low=0., high=1., label=True) ir3_dataloader = DATALOADERS['onnxrt_qlinearops'](ir3_dataset) - gather_dataset = DATASETS('onnxrt_qlinearops')['dummy'](shape=(5, 100, 4), label=True) + gather_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(5, 100, 4), label=True) gather_dataloader = DATALOADERS['onnxrt_qlinearops'](gather_dataset) ext_dataset = datasets['dummy'](shape=(10, 2), low=0., high=1., label=True) ext_dataloader = DATALOADERS['onnxrt_qlinearops'](ext_dataset) - rename_dataset = DATASETS('onnxrt_qlinearops')['dummy'](shape=(5, 1, 200), label=True) + rename_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(5, 1, 200), label=True) rename_dataloader = DATALOADERS['onnxrt_qlinearops'](rename_dataset) matmul_dataset = MatmulDataset() matmul_dataloader = DATALOADERS['onnxrt_qlinearops'](matmul_dataset) - conv_dataset = DATASETS('onnxrt_qlinearops')['dummy'](shape=(10, 3, 1, 3), label=True) + conv_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(10, 3, 1, 3), label=True) conv_dataloader = DATALOADERS['onnxrt_qlinearops'](conv_dataset) @classmethod @@ -679,7 +679,8 @@ def test_inspect_tensor(self): "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, - "backend": "qlinearops", + "backend": "default", + "format": "default", "graph_optimization": options.onnxrt.graph_optimization, "workspace_path": './nc_workspace/{}/{}/'.format( 'onnxrt', @@ -763,7 +764,8 @@ def test_set_tensor(self): "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, - "backend": "qlinearops", + "backend": "default", + "format": "default", "graph_optimization": options.onnxrt.graph_optimization, "workspace_path": './nc_workspace/{}/{}/'.format( 'onnxrt', @@ -893,7 +895,8 @@ def test_adaptor(self): "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, - "backend": "qlinearops", + "backend": "default", + "format": "default", "graph_optimization": options.onnxrt.graph_optimization, "workspace_path": './nc_workspace/{}/{}/'.format( 'onnxrt', @@ -910,29 +913,6 @@ def test_adaptor(self): adaptor.quantize(tune_cfg, common.Model(self.gather_model), self.gather_dataloader) self.assertTrue(len(adaptor.quantizable_ops), 2) - framework_specific_info['device'] = 'gpu' - adaptor = FRAMEWORKS[framework](framework_specific_info) - tune_cfg = {'calib_iteration': 1, - 'op': {('gather', 'Gather'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}, - ('add', 'Add'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}, - ('squeeze', 'Squeeze'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}}} - model = adaptor.quantize(tune_cfg, common.Model(self.gather_model), self.gather_dataloader) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Cast']), 0) - - tune_cfg = {'calib_iteration': 1, - 'op': {('Matmul', 'MatMul'): {'activation': {'dtype': ['uint8'], 'quant_mode': 'static'}, - 'weight': {'dtype': ['int8']}}, - ('add', 'Add'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}, - ('add2', 'Add'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}}} - adaptor = FRAMEWORKS[framework](framework_specific_info) - model = adaptor.quantize(tune_cfg, common.Model(self.matmul_model), self.matmul_dataloader) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Cast']), 0) - for fake_yaml in ["gather.yaml"]: quantizer = Quantization(fake_yaml) quantizer.model = self.gather_model @@ -1054,6 +1034,41 @@ def eval(model): self.assertTrue('add' in node_names) self.assertTrue('add2' in node_names) + def test_new_API(self): + import time + result = [0.1] + def sub_eval(model, result): + time.sleep(0.001 * len(result)) + return result[0] + + def eval(model): + return sub_eval(model, result) + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach='static', quant_format='QDQ') + q_model = quantization.fit(self.matmul_model, config, + calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue('QLinearMatMul' not in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach='static') + q_model = quantization.fit(self.matmul_model, config, + calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue('QLinearMatMul' in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach='dynamic') + q_model = quantization.fit(self.matmul_model, config, + calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue('MatMulInteger' in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach='dynamic', quant_format='QDQ') + q_model = quantization.fit(self.matmul_model, config, + calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue('MatMulInteger' in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach='static', backend='onnxrt_trt_ep') + q_model = quantization.fit(self.matmul_model, config, + calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue('QLinearMatMul' not in [i.op_type for i in q_model.nodes()]) + def test_multi_metrics(self): conf.model.framework = 'onnxrt_qlinearops' conf.quantization.approach = 'post_training_static_quant' diff --git a/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py b/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py index 769fd7353a1..05b83bd17d8 100644 --- a/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py +++ b/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py @@ -11,7 +11,7 @@ from neural_compressor.experimental.data.datasets.dataset import Dataset from neural_compressor.adaptor.ox_utils.calibration import ONNXRTAugment from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.data import DATASETS, DATALOADERS +from neural_compressor.data import Datasets, DATALOADERS def generate_input_initializer(tensor_shape, tensor_dtype, input_name): ''' @@ -55,7 +55,7 @@ def create_nlp_session(): node = onnx.helper.make_node('Gather', ['D', 'B'], ['C'], name='gather') graph = helper.make_graph([squeeze, node], 'test_graph_1', [A], [C], [B_init]) model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) - datasets = DATASETS('onnxrt_qlinearops') + datasets = Datasets('onnxrt_qlinearops') dataset = datasets['dummy_v2'](input_shape=(100, 4), label_shape=(1,)) dataloader = DATALOADERS['onnxrt_qlinearops'](dataset) @@ -154,7 +154,7 @@ def test_dump_calibration(self): dataloader, ["Conv", "Relu"], iterations=[0]) - calib_params = augment.dump_calibration() + calib_params = augment.dump_calibration({}) assert "A" in calib_params and "B" in calib_params and "D" in calib_params and "C" in calib_params def test_augment_graph(self): @@ -502,7 +502,7 @@ def test_quant_param_calculation(self): #test calculation of quantization params #TO_DO: check rmin/rmax - quantization_params_dict = augment.dump_calibration() + quantization_params_dict = augment.dump_calibration({}) node_output_names, output_dicts_list = augment.get_intermediate_outputs('naive') dict_for_quantization = augment._map_calibration(node_output_names, output_dicts_list) #check the size of the quantization dictionary diff --git a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py index d27c6bf8ecf..3852793d7dd 100644 --- a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py +++ b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py @@ -745,7 +745,7 @@ def test_relu(self): sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED sess_options.optimized_model_filepath = "./onnxrt_test/optimized_model.onnx" - session = ort.InferenceSession(model.SerializeToString(), sess_options) + session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) q_config = {"Conv": self.static_q_config, "Relu": self.static_q_config} @@ -758,7 +758,7 @@ def test_relu(self): self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types) sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC - session = ort.InferenceSession(model.SerializeToString(), sess_options) + session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) self.qlinear_test(tmp_model, q_config, quantize_params, quantizable_op_types) self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types) @@ -766,7 +766,7 @@ def test_relu(self): graph = helper.make_graph([conv_node, relu_node, add_node], 'test_graph_2', [A, B, E], [F]) model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC - session = ort.InferenceSession(model.SerializeToString(), sess_options) + session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) self.qlinear_test(tmp_model, q_config, quantize_params, quantizable_op_types) self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types) @@ -786,7 +786,7 @@ def test_clip(self): sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED sess_options.optimized_model_filepath = "./onnxrt_test/optimized_model.onnx" - session = ort.InferenceSession(model.SerializeToString(), sess_options) + session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) model = onnx.load(sess_options.optimized_model_filepath) q_config = {"Conv": self.static_q_config, "Clip": self.static_q_config} diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py index effd890bdd7..4bbd97e2e8c 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py @@ -849,8 +849,7 @@ def test_fx_quant(self): **{'prepare_custom_config_dict': \ {'non_traceable_module_name': ['a']}, 'convert_custom_config_dict': \ - {'preserved_attributes': []}, \ - 'dataloader': quantizer.calib_dataloader + {'preserved_attributes': []} }) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) @@ -860,8 +859,7 @@ def test_fx_quant(self): **{'prepare_custom_config_dict': {'non_traceable_module_name': ['a']}, 'convert_custom_config_dict': - {'preserved_attributes': []}, - 'dataloader': quantizer.calib_dataloader + {'preserved_attributes': []} }) self.assertEqual(model_fx.code, model_fx_recover.code) shutil.rmtree('./saved', ignore_errors=True) @@ -978,7 +976,7 @@ def q_func(model): self.assertTrue('quantize' in str(type(q_model.model.rnn))) def test_fx_sub_module_quant(self): - for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml', 'fx_dynamic_yaml.yaml']: + for fake_yaml in ['fx_qat_yaml.yaml', 'fx_dynamic_yaml.yaml', 'fx_ptq_yaml.yaml']: model_origin = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) @@ -1000,8 +998,7 @@ def test_fx_sub_module_quant(self): **{'prepare_custom_config_dict': \ {'non_traceable_module_name': ['a']}, 'convert_custom_config_dict': \ - {'preserved_attributes': []}, \ - 'dataloader': quantizer.calib_dataloader + {'preserved_attributes': []} }) self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) @@ -1011,8 +1008,7 @@ def test_fx_sub_module_quant(self): **{'prepare_custom_config_dict': \ {'non_traceable_module_name': ['a']}, 'convert_custom_config_dict': \ - {'preserved_attributes': []}, \ - 'dataloader': quantizer.calib_dataloader + {'preserved_attributes': []} }) self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) shutil.rmtree('./saved', ignore_errors=True) diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py index 3bea3e28673..9bb5a122fa0 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py @@ -1,20 +1,14 @@ import copy import neural_compressor.adaptor.pytorch as nc_torch -import numpy as np import os import shutil import torch import torch.nn as nn -import torch.nn.quantized as nnq import unittest import os -from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig -from neural_compressor.config import set_tensorboard, set_workspace -from neural_compressor.data import DATASETS, DATALOADERS -from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.model import MODELS -from neural_compressor.experimental import Quantization, common -from neural_compressor.experimental.data.datasets.dataset import DATASETS +from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, set_workspace +from neural_compressor.data import Datasets, DATALOADERS, DataLoader +from neural_compressor.experimental.data.datasets.dataset import Datasets from neural_compressor import quantization from neural_compressor.training import prepare_compression from neural_compressor.utils.pytorch import load @@ -26,7 +20,6 @@ # improve lazy import UT coverage resnet18 = LazyImport("torchvision.models.resnet18") -q_resnet18 = LazyImport("torchvision.models.quantization.resnet18") PT_VERSION = nc_torch.get_torch_version().release if PT_VERSION >= Version("1.8.0").release: @@ -35,63 +28,6 @@ FX_MODE = False -dyn_op_name_list = {"decoder": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}} - -ptq_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, -} - ptq_fx_op_name_list = { "layer1.0.conv1": { "activation": { @@ -285,32 +221,6 @@ def forward(self, x): return x -class PartialQuantModel(torch.nn.Module): - def __init__(self): - super().__init__() - self.quant = QuantStub() - self.conv = nn.Conv2d(3, 1, 1) - self.bn = nn.BatchNorm2d(1) - self.conv1 = nn.Conv2d(1, 1, 1) - self.bn1 = nn.BatchNorm2d(1) - self.conv2 = nn.Conv2d(1, 1, 1) - self.linear = nn.Linear(224 * 224, 1) - self.dequant = DeQuantStub() - self.sub = SubModel(bypass=False) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - x = self.conv1(x) - x = self.bn1(x) - x = self.sub(x) - x = self.quant(x) - x = self.conv2(x) - x = x.view(1, -1) - x = self.linear(x) - x = self.dequant(x) - return x - class DynamicControlModel(torch.nn.Module): def __init__(self): super().__init__() @@ -373,8 +283,7 @@ def eval_func(model): return 0.0 -def train_func(compression_manager, model, dataloader=None): - compression_manager.callbacks.on_train_begin(dataloader=dataloader) +def train_func(model): optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) # switch to evaluate mode model.train() @@ -385,104 +294,9 @@ def train_func(compression_manager, model, dataloader=None): optimizer.zero_grad() loss.backward() optimizer.step() - compression_manager.callbacks.on_train_end() return model -def q_func(model): - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - # switch to evaluate mode - model.train() - input = torch.randn(1, 3, 224, 224) - # compute output - output = model(input) - loss = output.mean() - optimizer.zero_grad() - loss.backward() - optimizer.step() - return model - - -class TestPytorchAdaptor(unittest.TestCase): - model = q_resnet18() - - @classmethod - def tearDownClass(self): - shutil.rmtree("./saved", ignore_errors=True) - shutil.rmtree("runs", ignore_errors=True) - - def test_quantization_new_API(self): - for fake_yaml in ["dynamic", "qat", "static"]: - model = M() - if fake_yaml == "qat": - quant_conf = QuantizationAwareTrainingConfig(op_name_list=qat_op_name_list) - compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) - q_model = train_func(compression_manager, compression_manager.model) - else: - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = DATALOADERS["pytorch"](dataset) - if fake_yaml == "dynamic": - quant_conf = PostTrainingQuantConfig(approach="dynamic", - op_name_list=dyn_op_name_list) - elif fake_yaml == "static": - quant_conf = PostTrainingQuantConfig(approach="static", - op_name_list=ptq_op_name_list) - q_model = quantization.fit( - model, - quant_conf, - calib_dataloader=dataloader if fake_yaml == "static" else None) - q_model.save("./saved") - # Load configure and weights by neural_compressor.utils - saved_model = load("./saved", model) - shutil.rmtree("./saved", ignore_errors=True) - - def test_auto_quant(self): - def eval_func(model): - return 1 - - model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, - ) - # run fx_quant in neural_compressor and save the quantized GraphModule - quant_conf = PostTrainingQuantConfig(approach="auto") - set_workspace("./saved") - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = common.DataLoader(dataset) - q_model = quantization.fit(model_origin, - quant_conf, - calib_dataloader=dataloader, - eval_func=eval_func) - q_model.save("./saved") - model = common.Model(model_origin) - model.workspace_path = "./saved" - self.assertNotEqual(q_model, None) - self.assertEqual(type(q_model._model.decoder), - type(model._model.decoder)) - shutil.rmtree("./saved", ignore_errors=True) - - def test_tensorboard(self): - model = copy.deepcopy(self.model) - model.eval().fuse_model() - quant_conf = PostTrainingQuantConfig(approach="static", - backend="pytorch") - set_tensorboard(True) - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = common.DataLoader(dataset) - quantization.fit( - model, quant_conf, calib_dataloader=dataloader, eval_func=eval_func - ) - self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) - quantization.fit(model, - quant_conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader) - self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) - set_tensorboard(False) - - @unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") class TestPytorchFXAdaptor(unittest.TestCase): @classmethod @@ -491,53 +305,60 @@ def tearDownClass(self): shutil.rmtree("runs", ignore_errors=True) def test_fx_quant(self): - for fake_yaml in ["qat", "static"]: + for approach in ["qat", "static"]: model_origin = resnet18() - dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224), label=True) + dataset = Datasets("pytorch")["dummy"]((10, 3, 224, 224), label=True) dataloader = DATALOADERS["pytorch"](dataset) - if fake_yaml == "qat": + if approach == "qat": + model = copy.deepcopy(model_origin) conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) + op_name_list=qat_op_name_list) + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model + q_model = train_func(model) + compression_manager.callbacks.on_train_end() + compression_manager.save("./saved") else: conf = PostTrainingQuantConfig( - op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" - ) + op_name_list=ptq_fx_op_name_list) set_workspace("./saved") q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func) - q_model.save("./saved") + q_model.save("./saved") # Load configure and weights with neural_compressor.utils model_fx = load("./saved", model_origin, **{"dataloader": torch.utils.data.DataLoader(dataset)}) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - if fake_yaml != "qat": + if approach != "qat": # recover int8 model with only tune_cfg history_file = "./saved/history.snapshot" model_fx_recover = recover(model_origin, history_file, 0, **{"dataloader": dataloader}) self.assertEqual(model_fx.code, model_fx_recover.code) shutil.rmtree("./saved", ignore_errors=True) - for fake_yaml in ["qat", "static"]: + for approach in ["qat", "static"]: model_origin = M() # run fx_quant in neural_compressor and save the quantized GraphModule - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224), label=True) + dataset = Datasets("pytorch")["dummy"]((100, 3, 224, 224), label=True) dataloader = DATALOADERS["pytorch"](dataset) - if fake_yaml == "qat": + if approach == "qat": + model = copy.deepcopy(model_origin) conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" + op_name_list=qat_op_name_list ) - compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model + q_model = train_func(model) + compression_manager.callbacks.on_train_end() compression_manager.save("./saved") else: conf = PostTrainingQuantConfig( - op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" + op_name_list=ptq_fx_op_name_list ) q_model = quantization.fit(model_origin, conf, @@ -560,9 +381,7 @@ def test_fx_dynamic_quant(self): ) # run fx_quant in neural_compressor and save the quantized GraphModule origin_model.eval() - conf = PostTrainingQuantConfig(approach="dynamic", - op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" - ) + conf = PostTrainingQuantConfig(approach="dynamic", op_name_list=ptq_fx_op_name_list) set_workspace("./saved") q_model = quantization.fit(origin_model, conf) q_model.save("./saved") @@ -593,26 +412,30 @@ def eval_func(model): return 1 # Model Definition - for fake_yaml in ["qat", "auto"]: + for approach in ["qat", "auto"]: model_origin = LSTMModel( ntoken = 10, ninp = 512, nhid = 256, nlayers = 2, ) - dataset = DATASETS("pytorch")["dummy"]((3, 10)) + dataset = Datasets("pytorch")["dummy"]((3, 10)) dataloader = DATALOADERS["pytorch"](dataset) # run fx_quant in neural_compressor and save the quantized GraphModule - if fake_yaml == "qat": + if approach == "qat": + model = copy.deepcopy(model_origin) conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" + op_name_list=qat_op_name_list ) - compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader=dataloader) - self.assertTrue("quantize" in str(type(q_model.model.encoder))) - self.assertTrue("quantize" in str(type(q_model.model.rnn))) + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model.model + train_func(model) + compression_manager.callbacks.on_train_end() + self.assertTrue("quantize" in str(type(model.encoder))) + self.assertTrue("quantize" in str(type(model.rnn))) else: - conf = PostTrainingQuantConfig(backend="pytorch_fx") + conf = PostTrainingQuantConfig(approach="auto") q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader) @@ -620,32 +443,35 @@ def eval_func(model): self.assertTrue("quantize" in str(type(q_model.model.rnn))) def test_fx_sub_module_quant(self): - for fake_yaml in ["qat", "static"]: + for approach in ["qat", "static"]: model_origin = DynamicControlModel() - dataset = DATASETS("pytorch")["dummy"]((1, 3, 224, 224)) + dataset = Datasets("pytorch")["dummy"]((1, 3, 224, 224)) dataloader = DATALOADERS["pytorch"](dataset) # run fx_quant in neural_compressor and save the quantized GraphModule - if fake_yaml == "qat": - conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) + if approach == "qat": + model = copy.deepcopy(model_origin) + conf = QuantizationAwareTrainingConfig(op_name_list=qat_op_name_list) + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model + q_model = train_func(model) + compression_manager.callbacks.on_train_end() + compression_manager.save("./saved") else: set_workspace("./saved") - conf = PostTrainingQuantConfig(backend="pytorch_fx") + conf = PostTrainingQuantConfig() q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader) - q_model.save("./saved") + q_model.save("./saved") # Load configure and weights with neural_compressor.utils model_fx = load("./saved/best_model.pt", model_origin, **{"dataloader": torch.utils.data.DataLoader(dataset) }) self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) - if fake_yaml != "qat": - # recover int8 model with only tune_cfg + if approach != "qat": + # recover int8 model with only tune_cfg history_file = "./saved/history.snapshot" model_fx_recover = recover(model_origin, history_file, 0, **{"dataloader": torch.utils.data.DataLoader(dataset) @@ -658,10 +484,10 @@ def test_fx_sub_module_quant(self): def test_mix_precision(self): model_origin = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = DATALOADERS["pytorch"](dataset) + dataset = Datasets("pytorch")["dummy"]((100, 3, 224, 224)) + dataloader = DataLoader("pytorch", dataset) set_workspace=("./saved") - conf = PostTrainingQuantConfig(op_name_list=ptq_fx_op_name_list, backend="pytorch_fx") + conf = PostTrainingQuantConfig(op_name_list=ptq_fx_op_name_list) q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, diff --git a/test/adaptor/pytorch_adaptor/test_torch2onnx.py b/test/adaptor/pytorch_adaptor/test_torch2onnx.py index 8977b1a1dd4..67b71df76d6 100644 --- a/test/adaptor/pytorch_adaptor/test_torch2onnx.py +++ b/test/adaptor/pytorch_adaptor/test_torch2onnx.py @@ -9,7 +9,7 @@ import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.experimental.data.datasets.dataset import DATASETS +from neural_compressor.experimental.data.datasets.dataset import Datasets from packaging.version import Version from torch.quantization import QuantStub, DeQuantStub @@ -81,7 +81,7 @@ def build_pytorch_yaml(): f.write(fake_dyn_yaml) fake_qat_yaml = fake_ptq_yaml.replace( - 'post_training_static_quant', + 'post_training_static_quant', 'quant_aware_training', ) with open('qat_yaml.yaml', 'w', encoding="utf-8") as f: @@ -205,14 +205,11 @@ def tearDownClass(self): os.remove('int8-model.onnx') def test_fx_quant(self): - for fake_yaml in ['dynamic', 'static']: + for approach in ['dynamic', 'static']: model = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule - conf = PostTrainingQuantConfig( - approach=fake_yaml, - backend="pytorch_fx" - ) - dataset = DATASETS("pytorch")['dummy']((100, 3, 224, 224)) + conf = PostTrainingQuantConfig(approach=approach) + dataset = Datasets("pytorch")['dummy']((100, 3, 224, 224)) dataloader = torch.utils.data.DataLoader(dataset) q_model = quantization.fit(model, conf, @@ -222,7 +219,7 @@ def test_fx_quant(self): int8_jit_model = q_model.export_to_jit(example_inputs) # INC will keep fallbacked fp32 modules when exporting onnx model - if fake_yaml == 'static': + if approach == 'static': calib_dataloader = dataloader else: calib_dataloader = None diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py new file mode 100644 index 00000000000..5a9c5af6c0e --- /dev/null +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py @@ -0,0 +1,136 @@ +import os +import shutil +import unittest +import tensorflow as tf +import numpy as np + +def build_msev2_yaml(): + mse_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse_v2 + accuracy_criterion: + relative: 0.01 + exit_policy: + max_trials: 10 + timeout: 3600 + ''' + with open('mse_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(mse_yaml) + +def build_fake_model(): + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) + last_identity = tf.identity(op2, name='op2_to_store') + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') + last_identity = tf.identity(op2, name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +class TestGetOutputTensor(unittest.TestCase): + @classmethod + def setUpClass(self): + build_msev2_yaml() + self.model = build_fake_model() + + @classmethod + def tearDownClass(self): + os.remove('mse_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_get_output_op_names(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('mse_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.model + qmodel = quantizer.fit() + + self.assertEqual( + quantizer.strategy.adaptor.get_output_op_names(qmodel), + ["Conv2D_dummy_biasadd"]) + + + def test_calculate_op_sensitivity(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization("mse_yaml.yaml") + quantizer.model = self.model + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.pre_process() + + dataloader = quantizer._calib_dataloader + strategy = quantizer.strategy + adaptor = strategy.adaptor + tune_cfg_generator = strategy.next_tune_cfg() + tune_cfg = strategy._tune_cfg_converter(next(tune_cfg_generator)) + output_op_names = ["Conv2D_dummy_biasadd"] + + op_sensitivity = adaptor.calculate_op_sensitivity( + model=quantizer.model, + dataloader=dataloader, + tune_cfg=tune_cfg, + output_op_names=output_op_names, + confidence_batches=1, + fallback=True) + self.assertIn(('op_to_store', 'conv2d'), op_sensitivity) + self.assertIn(('Conv2D', 'conv2d'), op_sensitivity) + + tune_cfg['op'][('op_to_store', 'conv2d')] = { + 'activation': {'dtype': 'fp32', 'quant_mode': 'fp32'}, + 'weight': {'dtype': 'fp32'}} + + op_sensitivity = adaptor.calculate_op_sensitivity( + model=quantizer.model, + dataloader=dataloader, + tune_cfg=tune_cfg, + output_op_names=output_op_names, + confidence_batches=1, + fallback=True) + self.assertNotIn(('op_to_store', 'conv2d'), op_sensitivity) + self.assertIn(('Conv2D', 'conv2d'), op_sensitivity) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py index 247b0871ebb..934f0622f1b 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py @@ -77,7 +77,7 @@ def test_quantize_input(self): framework_specific_info = {'device': 'cpu', 'approach': 'post_training_static_quant', \ 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'workspace_path': 'saved'} + 'workspace_path': 'saved', 'format': 'default', 'backend': 'default'} quantize_input_graph, _ = TensorFlowAdaptor(framework_specific_info).quantize_input(q_model.graph) Not_found_QuantizedV2 = True diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py index 54266a5a948..4c79a225be7 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py @@ -73,7 +73,8 @@ def test_fp32bias(self): framework_specific_info = {'device': 'cpu', 'workspace_path': 'saved',\ 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'approach': 'post_training_static_quant'} + 'approach': 'post_training_static_quant', 'format': 'default', + 'backend': 'default'} adaptor = TensorFlowAdaptor(framework_specific_info) adaptor.set_tensor(q_model, {'bias': np.random.random(16)}) @@ -136,7 +137,8 @@ def test_int32bias(self): framework_specific_info = {'device': 'cpu', 'workspace_path': 'saved',\ 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'approach': 'post_training_static_quant'} + 'approach': 'post_training_static_quant', 'format': 'default', + 'backend': 'default'} adaptor = TensorFlowAdaptor(framework_specific_info) adaptor.set_tensor(q_model, {'bias1': np.random.randint(6,size=2, dtype='int32')}) from tensorflow.core.framework import attr_value_pb2 diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py index 37aef1ca500..1a0450b4425 100644 --- a/test/benchmark/test_benchmark.py +++ b/test/benchmark/test_benchmark.py @@ -45,8 +45,8 @@ def build_benchmark(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -from neural_compressor.data import DATASETS -dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +from neural_compressor.data import Datasets +dataset = Datasets('tensorflow')['dummy']((100, 32, 32, 1), label=True) from neural_compressor.experimental import Benchmark, common from neural_compressor.conf.config import BenchmarkConf benchmarker = Benchmark('fake_yaml.yaml') @@ -60,8 +60,8 @@ def build_benchmark(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -from neural_compressor.data import DATASETS -dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +from neural_compressor.data import Datasets +dataset = Datasets('tensorflow')['dummy']((100, 32, 32, 1), label=True) from neural_compressor.experimental import Benchmark, common from neural_compressor.conf.config import BenchmarkConf conf = BenchmarkConf('fake_yaml.yaml') @@ -94,8 +94,8 @@ def build_benchmark2(): "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", "args = arg_parser.parse_args()\n", - "from neural_compressor.data import DATASETS\n", - "dataset = DATASETS('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", + "from neural_compressor.data import Datasets\n", + "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", "from neural_compressor.experimental import Benchmark, common\n", "benchmarker = Benchmark()\n", diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py index fe5b0d0d710..720210ddf23 100644 --- a/test/benchmark/test_benchmark_2.x.py +++ b/test/benchmark/test_benchmark_2.x.py @@ -2,7 +2,6 @@ import psutil import unittest import os -import yaml import numpy as np import tensorflow as tf import tempfile @@ -18,9 +17,9 @@ def build_benchmark(): args = arg_parser.parse_args() from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental import common -dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +dataset = Datasets('tensorflow')['dummy']((100, 32, 32, 1), label=True) b_dataloader = common.DataLoader(dataset, batch_size=10) conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) fit(args.input_model, conf, b_dataloader=b_dataloader) @@ -33,8 +32,8 @@ def build_benchmark(): args = arg_parser.parse_args() from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig -from neural_compressor.data import DATASETS -dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +from neural_compressor.data import Datasets +dataset = Datasets('tensorflow')['dummy']((100, 32, 32, 1), label=True) from neural_compressor.experimental import common conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) b_dataloader = common.DataLoader(dataset, batch_size=10) @@ -64,8 +63,8 @@ def build_benchmark2(): "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", "args = arg_parser.parse_args()\n", "from neural_compressor.benchmark import fit\n" - "from neural_compressor.data import DATASETS\n", - "dataset = DATASETS('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", + "from neural_compressor.data import Datasets\n", + "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", "from neural_compressor.experimental import common\n", "b_dataloader = common.DataLoader(dataset)\n", diff --git a/test/config/test_pythonic_config.py b/test/config/test_pythonic_config.py index d755daaea08..f72686982b5 100644 --- a/test/config/test_pythonic_config.py +++ b/test/config/test_pythonic_config.py @@ -19,7 +19,7 @@ from torch import nn from neural_compressor.conf.pythonic_config import OpQuantConf, ActivationConf, WeightConf -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental import Quantization, Distillation, Pruning, NAS, common from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor import FRAMEWORKS @@ -140,7 +140,6 @@ def tearDownClass(self): def test_config_setting(self): config.quantization.inputs = ['image'] config.quantization.outputs = ['out'] - config.quantization.backend = 'onnxrt_integerops' config.quantization.approach = 'post_training_dynamic_quant' config.quantization.device = 'gpu' config.quantization.op_type_list = {'Conv': {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}}} @@ -154,7 +153,6 @@ def test_config_setting(self): self.assertEqual(config.quantization.inputs, ['image']) self.assertEqual(config.quantization.outputs, ['out']) - self.assertEqual(config.quantization.backend, 'onnxrt_integerops') self.assertEqual(config.quantization.approach, 'post_training_dynamic_quant') self.assertEqual(config.quantization.device, 'gpu') self.assertEqual(config.quantization.op_type_list, @@ -181,7 +179,6 @@ def test_config_setting(self): def test_quantization(self): - config.quantization.backend = 'onnxrt_integerops' q = Quantization(config) q.model = build_matmul_model() q_model = q() @@ -194,7 +191,7 @@ def test_quantization(self): self.assertTrue(all([not i.name.endswith('_quant') for i in q_model.nodes()])) def test_distillation(self): - config.quantization.backend = 'pytorch' + config.quantization.device = 'cpu' distiller = Distillation(config) model = ConvNet(16, 32) origin_weight = copy.deepcopy(model.out.weight) @@ -202,7 +199,7 @@ def test_distillation(self): distiller.teacher_model = ConvNet(16, 32) # Customized train, evaluation - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def train_func(model): @@ -238,14 +235,13 @@ def eval_func(model): self.assertTrue(torch.any(weight != origin_weight)) def test_pruning(self): - config.quantization.backend = 'pytorch' prune = Pruning(config) model = ConvNet(16, 32) origin_weight = copy.deepcopy(model.out.weight) prune.model = model # Customized train, evaluation - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def train_func(model): @@ -285,7 +281,6 @@ def eval_func(model): def test_use_bf16(self): config.quantization.device = 'cpu' - config.quantization.backend = 'pytorch' config.quantization.approach = 'post_training_dynamic_quant' config.quantization.use_bf16 = False q = Quantization(config) @@ -297,7 +292,7 @@ def test_use_bf16(self): def test_quantization_pytorch(self): config.quantization.device = 'cpu' - config.quantization.backend = 'pytorch' + config.quantization.backend = 'default' config.quantization.approach = 'post_training_dynamic_quant' config.quantization.use_bf16 = False q = Quantization(config) @@ -314,7 +309,6 @@ def tearDownClass(self): def test_tf_quantization(self): config.quantization.inputs = ['input'] config.quantization.outputs = ['out'] - config.quantization.backend = 'tensorflow' config.quantization.approach = 'post_training_static_quant' config.quantization.device = 'cpu' config.quantization.strategy = 'basic' @@ -324,9 +318,9 @@ def test_tf_quantization(self): config.quantization.reduce_range = False q = Quantization(config) + q.model = build_conv2d_model() dataset = q.dataset('dummy', shape=(1, 224, 224, 3), label=True) q.calib_dataloader = common.DataLoader(dataset) - q.model = build_conv2d_model() q_model = q() self.assertTrue(any([i.name.endswith('_requantize') for i in q_model.graph_def.node])) diff --git a/test/data/test_dataloader.py b/test/data/test_dataloader.py index de9017b4e8c..d62bb8c5488 100644 --- a/test/data/test_dataloader.py +++ b/test/data/test_dataloader.py @@ -6,7 +6,7 @@ import shutil from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader from neural_compressor.data.dataloaders.dataloader import DataLoader -from neural_compressor.data import DATASETS, DATALOADERS, TRANSFORMS +from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS from PIL import Image class TestBuiltinDataloader(unittest.TestCase): @@ -1069,7 +1069,7 @@ def test_pytorch_bert_dataset(self): self.assertEqual(5, len(ds[0][0])) def test_tensorflow_dummy(self): - datasets = DATASETS('tensorflow') + datasets = Datasets('tensorflow') dataset = datasets['dummy'](shape=(4, 256, 256, 3)) data_loader = DATALOADERS['tensorflow'](dataset) @@ -1092,7 +1092,7 @@ def test_tensorflow_dummy(self): dataset = datasets['dummy'](shape=(4, 256, 256, 3), dtype=['float32', 'int8']) def test_tensorflow_dummy_v2(self): - datasets = DATASETS('tensorflow') + datasets = Datasets('tensorflow') # test with label dataset = datasets['dummy_v2'](\ input_shape=(256, 256, 3), label_shape=(1,)) @@ -1131,7 +1131,7 @@ def test_tensorflow_dummy_v2(self): input_shape=(256, 256, 3), dtype=['float32', 'int8']) def test_tensorflow_sparse_dummy_v2(self): - datasets = DATASETS('tensorflow') + datasets = Datasets('tensorflow') # test with label dataset = datasets['sparse_dummy_v2'](\ dense_shape=[[10, 20], [5, 3]], label_shape=[[1]], sparse_ratio=[0.98, 0.8]) @@ -1184,7 +1184,7 @@ def test_style_transfer_dataset(self): im = Image.fromarray(random_array) im.save('test.jpg') - datasets = DATASETS('tensorflow') + datasets = Datasets('tensorflow') dataset = datasets['style_transfer'](content_folder='./', style_folder='./') length = len(dataset) image, label = dataset[0] @@ -1223,7 +1223,7 @@ def test_tensorflow_list_dict(self): # self.assertEqual(data[0][1], 2) def test_pytorch_dummy(self): - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') transform = TRANSFORMS('pytorch', 'preprocess')['Resize'](**{'size':100}) dataset = datasets['dummy'](shape=[(4, 256, 256, 3), (4, 1)], \ high=[10., 10.], low=[0., 0.], transform=transform) @@ -1240,7 +1240,7 @@ def test_pytorch_dummy(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_dummy(self): - datasets = DATASETS('mxnet') + datasets = Datasets('mxnet') transform = TRANSFORMS('mxnet', 'preprocess')['Resize'](**{'size':100}) dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) @@ -1258,7 +1258,7 @@ def test_mxnet_dummy(self): self.assertEqual(dataset[0][1], 0) def test_onnxrt_qlinear_dummy(self): - datasets = DATASETS('onnxrt_qlinearops') + datasets = Datasets('onnxrt_qlinearops') transform = TRANSFORMS('onnxrt_qlinearops', 'preprocess')['Resize'](**{'size':100}) dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) @@ -1283,7 +1283,7 @@ def test_onnxrt_qlinear_dummy(self): shape=[(4, 256, 256, 3), (4, 256, 256, 3)], dtype=['float32', 'int8', 'int8']) def test_onnx_integer_dummy(self): - datasets = DATASETS('onnxrt_integerops') + datasets = Datasets('onnxrt_integerops') dataset = datasets['dummy'](shape=(4, 256, 256, 3)) data_loader = DATALOADERS['onnxrt_integerops'](dataset) @@ -1321,7 +1321,7 @@ def test_onnx_bert(self): tsv_w.writerow(['Quality', '#1 ID', '#2 ID', '#1 String', '#2 String']) tsv_w.writerow(['1', '702876', '702977', """Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .""", """Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence ."""]) - datasets = DATASETS('onnxrt_integerops') + datasets = Datasets('onnxrt_integerops') args = {'GLUE': {'data_dir': './MRPC', 'model_name_or_path': 'bert-base-uncased', diff --git a/test/data/test_filter.py b/test/data/test_filter.py index 5b86781de68..830f8e7cf1c 100644 --- a/test/data/test_filter.py +++ b/test/data/test_filter.py @@ -4,7 +4,7 @@ import json import shutil from PIL import Image -from neural_compressor.data import FILTERS, TRANSFORMS, DATASETS, DATALOADERS +from neural_compressor.data import FILTERS, TRANSFORMS, Datasets, DATALOADERS from neural_compressor.utils.create_obj_from_config import create_dataset, get_preprocess, create_dataloader import tensorflow as tf @@ -60,7 +60,7 @@ def testLabelBalanceCOCORecord(self): preprocesses = TRANSFORMS('tensorflow', 'preprocess') filters = FILTERS('tensorflow') filter = filters['LabelBalanceCOCORecord'](2) - datasets = DATASETS('tensorflow') + datasets = Datasets('tensorflow') dataset = datasets['COCORecord']('test.record', \ transform=None, filter=filter) dataloader = DATALOADERS['tensorflow'](dataset=dataset, batch_size=1) @@ -146,7 +146,7 @@ def testLabelBalanceCOCORaw(self): filters = FILTERS('onnxrt_qlinearops') filter = filters['LabelBalanceCOCORaw'](1) - datasets = DATASETS('onnxrt_qlinearops') + datasets = Datasets('onnxrt_qlinearops') dataset = datasets['COCORaw']('./', transform=None, filter=filter) dataloader = DATALOADERS['onnxrt_qlinearops'](dataset=dataset, batch_size=1) for (inputs, labels) in dataloader: diff --git a/test/distillation/test_distillation.py b/test/distillation/test_distillation.py index a5a993f2fdf..672adf86d5c 100644 --- a/test/distillation/test_distillation.py +++ b/test/distillation/test_distillation.py @@ -6,10 +6,10 @@ import torchvision import torch.nn as nn import tensorflow as tf -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader - +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 def build_fake_yaml(): fake_yaml = """ @@ -216,7 +216,7 @@ def test_distillation_external(self): def test_distillation_external_new_API(self): from neural_compressor.training import prepare_compression - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) @@ -252,7 +252,7 @@ def test_distillation_external_new_API(self): stat = torch.load('./saved/best_model.pt') opt_model = self.student_model.load_state_dict(stat) - @unittest.skipIf(tf.version.VERSION < '2.3.0', " keras requires higher version than tf-2.3.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), " keras requires higher version than tf-2.3.0") def test_tf_distillation(self): from neural_compressor.experimental import Distillation from neural_compressor.conf.config import DistillationConf diff --git a/test/distillation/test_self_distillation.py b/test/distillation/test_self_distillation.py index e05a40ae56e..20a695ac211 100644 --- a/test/distillation/test_self_distillation.py +++ b/test/distillation/test_self_distillation.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torchvision -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ PyTorchDataLoader @@ -84,7 +84,7 @@ def test_self_distillation(self): from neural_compressor.config import DistillationConfig, \ SelfKnowledgeDistillationLossConfig - datasets = DATASETS("pytorch") + datasets = Datasets("pytorch") dummy_dataset = datasets["dummy"]( shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True ) diff --git a/test/distributed/test_distributed_pt_train.py b/test/distributed/test_distributed_pt_train.py index 672bdbb5ce8..c46d8fa03ec 100644 --- a/test/distributed/test_distributed_pt_train.py +++ b/test/distributed/test_distributed_pt_train.py @@ -8,7 +8,7 @@ import torch.nn as nn import horovod.torch as hvd -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader def build_fake_py(): @@ -22,7 +22,7 @@ def build_fake_py(): import torch.nn as nn import horovod.torch as hvd -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader diff --git a/test/distributed/test_distributed_tf_dataloader.py b/test/distributed/test_distributed_tf_dataloader.py index 9f4b1d69ba6..88f249ba522 100644 --- a/test/distributed/test_distributed_tf_dataloader.py +++ b/test/distributed/test_distributed_tf_dataloader.py @@ -11,7 +11,7 @@ from neural_compressor import data from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader from neural_compressor.data.dataloaders.dataloader import DataLoader -from neural_compressor.data import DATASETS, DATALOADERS, TRANSFORMS +from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS from neural_compressor.utils import logger from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 diff --git a/test/export/test_onnx_qlieanr_to_qdq.py b/test/export/test_onnx_qlieanr_to_qdq.py new file mode 100644 index 00000000000..63018f12b4a --- /dev/null +++ b/test/export/test_onnx_qlieanr_to_qdq.py @@ -0,0 +1,650 @@ +import os +import shutil +import unittest +import copy +import onnx +import numpy as np +from onnx import helper, TensorProto, numpy_helper, onnx_pb +from neural_compressor.adaptor.ox_utils.quantizer import Quantizer +from neural_compressor.adaptor.ox_utils.util import QuantizedInitializer, QuantizedValue, QuantizationMode +import onnxruntime as ort +from neural_compressor import options +from neural_compressor.config import ONNXQlinear2QDQConfig +from neural_compressor.experimental.common import Model + +def build_model(): + initializers = [] + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 3, 15, 15]) + output = helper.make_tensor_value_info('reshape_output', TensorProto.FLOAT, [88, 11]) + + add_node = onnx.helper.make_node('Add', ['input', 'add_init'], ['add_out'], name='add') + + conv1_weight_initializer = numpy_helper.from_array( + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv1_weight') + conv1_node = helper.make_node('Conv', ['add_out', 'conv1_weight'], ['conv1_output'], name='conv1') + + conv2_weight_initializer = numpy_helper.from_array( + np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name='conv2_weight') + conv2_node = helper.make_node('Conv', ['add_out', 'conv2_weight'], ['conv2_output'], name='conv2') + + # 1, 8, 13, 13 + concat_node = helper.make_node('Concat', ['conv1_output', 'conv2_output'], [ + 'concat_output'], name='Concat', axis=1) + # 1, 8, 11, 11 + avg_args = {'kernel_shape': [3, 3]} + avgpool_node = helper.make_node('AveragePool', ['concat_output'], ['avg_output'], name='AveragePool', **avg_args) + reshape_node = onnx.helper.make_node('Reshape', ['avg_output', 'shape'], ['reshape_output'], name='Reshape') + + initializers = [conv1_weight_initializer, conv2_weight_initializer] + initializers.append(onnx.numpy_helper.from_array(np.array([88, 11], dtype=np.int64), name='shape')) + initializers.append(onnx.numpy_helper.from_array(np.zeros((1, 3, 15, 15), dtype=np.float32), name='add_init')) + graph = helper.make_graph([conv1_node, conv2_node, concat_node, avgpool_node, reshape_node, add_node], + 'test', [input], [output], initializer=initializers) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + return model + +class TestAdaptorONNXRT(unittest.TestCase): + + qlinear_backend = QuantizationMode.QLinearOps + qdq_backend = 'qdqops' + integer_backend = QuantizationMode.IntegerOps + static_q_config = {"weight":{'dtype': 3, + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'static'} + } + dynamic_q_config = {"weight":{'dtype': 3, + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'dynamic'}} + config = ONNXQlinear2QDQConfig() + + @classmethod + def setUpClass(cls): + os.makedirs('./onnxrt_test') + + @classmethod + def tearDownClass(cls): + shutil.rmtree("./onnxrt_test", ignore_errors=True) + os.remove("test.onnx") + + def qlinear_test(self, model, q_config, quantize_params, quantizable_op_types): + quantizer = Quantizer(copy.deepcopy(model), + q_config, + self.qlinear_backend, + True, + quantize_params, + quantizable_op_types) + model = quantizer.quantize_model() + return Model(model) + + def dynamic_test(self, model, q_config, quantize_params, quantizable_op_types): + quantizer = Quantizer(copy.deepcopy(model), + q_config, + self.integer_backend, + False, + quantize_params, + quantizable_op_types) + quantizer.quantize_model() + return Model(model) + + def test_argmax(self): + input_name = "input" + output_name = "output" + input_shape = [1, 256, 128, 128] + output_shape = [1, 32, 128] + initializers = [] + + # make Conv node + conv_weight_name = "conv_weight" + conv_weight_arr = np.random.randint(-1, 2, [32, 256, 1, 1]).astype(np.float32) + conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name=conv_weight_name) + conv_output_name = "conv_output" + conv_inputs = [input_name, conv_weight_name] + conv_outputs = [conv_output_name] + conv_name = "conv_node" + conv_node = onnx.helper.make_node( + "Conv", + conv_inputs, + conv_outputs, + dilations=[1, 1], + kernel_shape=[1, 1], + pads=[0, 0, 0, 0], + strides=[1, 1], + name=conv_name, + ) + + # make ArgMax node + argmax_inputs = [conv_output_name] + argmax_outputs = [output_name] + argmax_name = "argmax_node" + argmax_node = onnx.helper.make_node( + "ArgMax", + argmax_inputs, + argmax_outputs, + axis=3, + keepdims=0, + name=argmax_name, + ) + + initializers = [conv_weight_initializer] + + # make graph + input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape) + output_tensor = helper.make_tensor_value_info(output_name, TensorProto.INT64, output_shape) + graph_name = "ArgMax_Quant_Test" + graph = helper.make_graph( + [conv_node, argmax_node], + graph_name, + [input_tensor], + [output_tensor], + initializer=initializers, + ) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = 7 # use stable onnx ir version + q_config = {'conv_node': self.static_q_config, + 'argmax_node': self.static_q_config} + quantize_params = {'input': [np.uint8(0), np.float32(10.)], + 'conv_weight': [np.uint8(0), np.float32(10.)], + 'conv_output': [np.uint8(0), np.float32(10.)], + 'output': [np.uint8(0), np.float32(10.)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ['Conv', 'ArgMax']) + q_model.export('./test.onnx', self.config) + + def test_gemm(self): + input_name = "input" + output_name = "output" + initializers = [] + weight_shape = [100, 10] + weight_name = "linear1.weight" + bias_shape = [100] + bias_name = "linear1.bias" + node_name = "gemm" + + weight_data = np.random.normal(0, 0.1, weight_shape).astype(np.float32) + initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) + + bias_data = np.random.normal(0, 0.1, bias_shape).astype(np.float32) + initializers.append(onnx.numpy_helper.from_array(bias_data, name=bias_name)) + + gemm1_node = onnx.helper.make_node( + "Gemm", + [input_name, weight_name, bias_name], + [output_name], + alpha=1.0, + beta=1.0, + transB=1, + name=node_name + ) + + gemm1_output_name = "gemm1_output" + input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, [-1, 10]) + output_tensor = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [-1, 100]) + graph_name = "gemm_test" + graph = helper.make_graph( + [gemm1_node], + graph_name, + [input_tensor], + [output_tensor], + initializer=initializers, + ) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = 7 # use stable onnx ir version + q_config = {'gemm': self.static_q_config} + quantize_params = {'input': [np.uint8(0), np.float32(10.)], + 'linear1.weight': [np.uint8(0), np.float32(10.)], + 'linear1.bias': [np.uint8(0), np.float32(10.)], + 'output': [np.uint8(0), np.float32(10.)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) + q_model.export('./test.onnx', self.config) + + bias_tensor = helper.make_tensor_value_info(bias_name, TensorProto.FLOAT, [100]) + gemm2_node = onnx.helper.make_node( + "Gemm", + [input_name, weight_name, bias_name], + [output_name], + alpha=1.0, + beta=1.0, + transB=1, + name=node_name + ) + initializers = [] + initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) + graph_name = "gemm_test" + graph = helper.make_graph( + [gemm2_node], + graph_name, + [input_tensor, bias_tensor], + [output_tensor], + initializer=initializers, + ) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = 7 + q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) + q_model.export('./test.onnx', self.config) + + def test_embed(self): + input_ids_shape = [1, 4] + input_ids_tensor = helper.make_tensor_value_info('input_ids', TensorProto.INT32, input_ids_shape) + + segment_ids_shape = [1, 4] + segment_ids_tensor = helper.make_tensor_value_info('segment_ids', TensorProto.INT32, segment_ids_shape) + + mask_shape = [1, 4] + mask_tensor = helper.make_tensor_value_info('mask', TensorProto.INT32, input_ids_shape) + + # EmbedLayerNormalization Node Constants and Weights: + word_embed_shape = [32, 4] + word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype='float32') + word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name='word_embed') + + pos_embed_shape = [16, 4] + pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype='float32') + pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name='pos_embed') + + seg_embed_shape = [2, 4] + seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype='float32') + seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name='seg_embed') + + gamma_shape = [4] + gamma = np.random.random_sample(gamma_shape).astype(dtype='float32') + gamma_initializer = onnx.numpy_helper.from_array(gamma, name='gamma') + + beta_shape = [4] + beta = np.random.random_sample(beta_shape).astype(dtype='float32') + beta_initializer = onnx.numpy_helper.from_array(beta, name='beta') + + # EmbedLayerNormalization Outputs: + layernorm_out_shape = [1, 4, 4] + layernorm_out_tensor = helper.make_tensor_value_info('layernorm_out', TensorProto.FLOAT, layernorm_out_shape) + + mask_index_out_shape = [1] + mask_index_out_tensor = helper.make_tensor_value_info('mask_index_out', TensorProto.INT32, mask_index_out_shape) + + # EmbedLayerNormalization Node: + embed_layer_norm_inputs = [ + 'input_ids', 'segment_ids', 'word_embed', 'pos_embed', 'seg_embed', 'gamma', 'beta', 'mask' + ] + embed_layer_norm_outputs = ['layernorm_out', 'mask_index_out'] + embed_layer_norm_node = helper.make_node('EmbedLayerNormalization', + embed_layer_norm_inputs, + embed_layer_norm_outputs, + domain='com.microsoft', + name='Embed') + + # Construct the Graph and Model: + nodes = [embed_layer_norm_node] + graph_name = 'embed_layernorm_graph' + inputs = [input_ids_tensor, segment_ids_tensor, mask_tensor] + outputs = [layernorm_out_tensor, mask_index_out_tensor] + initializers = [ + word_embed_initializer, pos_embed_initializer, seg_embed_initializer, gamma_initializer, beta_initializer + ] + + graph = helper.make_graph(nodes, graph_name, inputs, outputs, initializer=initializers) + model = helper.make_model(graph, + opset_imports=[helper.make_opsetid("com.microsoft", 1), helper.make_opsetid("ai.onnx", 12)]) + model.ir_version = 7 # use stable onnx ir version + + q_config = {'Embed': self.static_q_config} + quantize_params = {'word_embed': [np.uint8(10.), np.float32(0)], + 'pos_embed': [np.uint8(10.), np.float32(0)], + 'seg_embed': [np.uint8(10.), np.float32(0)], + 'gamma': [np.uint8(10.), np.float32(0)], + 'beta': [np.uint8(10.), np.float32(0)], + 'layernorm_out': [np.uint8(10.), np.float32(0)], + 'mask_index_out': [np.uint8(10.), np.float32(0)], + 'input_ids': [np.uint8(10.), np.float32(0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ['EmbedLayerNormalization']) + q_model.export('./test.onnx', self.config) + + def test_concat_reshape_pooling(self): + model = build_model() + options.onnxrt.qdq_setting.DedicatedQDQPair = True + + q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ + 'Concat':self.static_q_config, 'AveragePool':self.static_q_config, 'add':self.static_q_config} + quantize_params = {'input': [np.uint8(10.), np.float32(0)], + 'conv1_weight': [np.uint8(10.), np.float32(0)], + 'conv1_output': [np.uint8(10.), np.float32(0)], + 'conv2_weight': [np.uint8(10.), np.float32(0)], + 'conv2_output': [np.uint8(10.), np.float32(0)], + 'concat_output': [np.uint8(10.), np.float32(0)], + 'avg_output': [np.uint8(10.), np.float32(0)], + 'add_out': [np.uint8(10.), np.float32(0)], + 'add_init': [np.uint8(10.), np.float32(0)], + 'shape': [np.uint8(10.), np.float32(0)], + 'reshape_output': [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ['Reshape', 'Conv', 'Concat', 'AveragePool', 'Add'] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + options.onnxrt.qdq_setting.DedicatedQDQPair = False + + q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':self.static_q_config, \ + 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':'fp32', \ + 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ + 'Concat':self.static_q_config, 'AveragePool':'fp32'} + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + quantize_params = {'input': [np.uint8(10.), np.float32(0)], + 'conv1_weight': [np.uint8(10.), np.float32(0)], + 'conv1_output': [np.uint8(10.), np.float32(0)], + 'conv2_weight': [np.uint8(10.), np.float32(0)], + 'conv2_output': [np.uint8(10.), np.float32(0)], + 'concat_output': [np.uint8(10.), np.float32(0)], + 'avg_output': [np.uint8(10.), np.float32(0)], + 'shape': [np.uint8(10.), np.float32(0)], + 'add_out': [np.uint8(10.), np.float32(0)], + 'add_init': [np.uint8(10.), np.float32(0)], + 'reshape_output': [np.uint8(10.), np.float32(0)]} + q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ + 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_conv(self): + for op in ['Conv']: + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 3, 3, 1]) + C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 5, 1]) + D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 1]) + conv_node = onnx.helper.make_node(op, ['A', 'B', 'C'], ['D'], + name=op, + kernel_shape=[3, 3], + pads=[1, 1, 1, 1]) + graph = helper.make_graph([conv_node], 'test_graph_1', [A, B, C], [D]) + model = helper.make_model(graph) + q_config = {op: self.static_q_config}, + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)], + "D": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = [op] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_matmul(self): + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 1]) + C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) + matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') + graph = helper.make_graph([matmul_node], 'test_graph_1', [A, B], [C]) + model = helper.make_model(graph) + q_config = {"Matmul": self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ["Matmul"] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + q_config = {"Matmul": self.dynamic_q_config} + q_model = self.dynamic_test(model, q_config, None, quantizable_op_types) + q_model.export('./test.onnx', self.config) + quantize_params = {"A": [np.float32(10.)], + "B": [np.float32(10.)], + "C": [np.float32(10.)]} + + q_config = {"Matmul": {"weight":{'dtype': 3, + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'dynamic'}}} + quantize_params = {} + q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_attention(self): + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 128, 768]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [768, 2304]) + C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [2304]) + D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 128, 768]) + mask = helper.make_tensor_value_info('mask', TensorProto.INT32, [1, 128]) + + node = onnx.helper.make_node('Attention', ['A', 'B', 'C', 'mask'], ['D'], name='Attention', num_heads=1) + graph = helper.make_graph([node], 'test_graph_1', [A, B, C, mask], [D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + q_config = {"Attention": self.static_q_config} + quantize_params = {"A": [np.uint8(0), np.float32(0.5)], + "B": [np.uint8(0), np.float32(0.5)], + "C": [np.uint8(0), np.float32(0.5)], + "D": [np.uint8(0), np.float32(0.5)]} + quantizable_op_types = ["Attention"] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + q_config = {"Attention": self.dynamic_q_config} + + def test_gather(self): + a_value = np.random.randn(100, 4).astype(np.float32) + A_init = helper.make_tensor('A', TensorProto.FLOAT, [100, 4], + a_value.reshape(400).tolist()) + b_value = np.random.randint(2, size=(1, 10)).astype(np.int32) + B_init = helper.make_tensor('B', TensorProto.INT32, [1, 10], + b_value.reshape(10).tolist()) + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [100, 4]) + B = helper.make_tensor_value_info('B', TensorProto.INT32, [1, 10]) + C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 10, 4]) + node = onnx.helper.make_node('Gather', ['A', 'B'], ['C'], name='Gather') + graph = helper.make_graph([node], 'test_graph_1', [A, B], [C], [A_init, B_init]) + model = helper.make_model(graph) + q_config = {'Gather': {"weight":{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'static'} + }} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ["Gather"] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + q_config = {'Gather': {"weight":{'dtype': 3, + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'dynamic'} + }} + q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + graph = helper.make_graph([node], 'test_graph_1', [A, B], [C]) + model = helper.make_model(graph) + q_config = {'Gather': {"weight":{'dtype': 3, + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'dynamic'} + }} + quantize_params = {} + q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_binary(self): + for op in ['Mul', 'Add']: + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1]) + C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ['A', 'B'], ['C'], name=op) + graph = helper.make_graph([node], 'test_graph_1', [A, B], [C]) + model = helper.make_model(graph) + q_config = {op: self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = [op] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + q_model = self.qlinear_test(model, q_config, {}, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_activation(self): + config = {"weight":{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity': 'per_tensor'}, + 'activation':{'dtype': 2, + 'algorithm': 'minmax', + 'scheme':'asym', + 'granularity':'per_tensor', + 'quant_mode': 'static'} + } + + for op in ["Relu", "LeakyRelu", "Sigmoid"]: + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 10]) + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ['A'], ['B'], name=op) + graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + model = helper.make_model(graph) + q_config = {op: config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = [op] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + a_value = np.random.randn(1, 10).astype(np.float32) + A_init = helper.make_tensor('A', TensorProto.FLOAT, [1, 10], + a_value.reshape(10).tolist()) + graph = helper.make_graph([node], 'test_graph_1', [A], [B], [A_init]) + model = helper.make_model(graph) + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + def test_pooling(self): + op = "MaxPool" + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 5, 1]) + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ['A'], ['B'], + name=op, + kernel_shape=[3, 3], + pads=[1, 1, 1, 1]) + graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + q_config = {op: self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = [op] + for opset_version in [12, 13]: + opset = onnx.OperatorSetIdProto() + opset.version = opset_version + model = helper.make_model(graph, opset_imports=[opset]) + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], + name='Conv', + kernel_shape=[3, 3], + pads=[1, 1, 1, 1]) + pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op, kernel_shape=[1, 1]) + graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + model = helper.make_model(graph) + + q_config = {"Conv": self.static_q_config, op: self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)], + "D": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ["Conv", op] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + op = "GlobalAveragePool" + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 1, 1]) + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ['A'], ['B'], + name=op) + graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + q_config = {op: self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = [op] + for opset_version in [12, 13]: + opset = onnx.OperatorSetIdProto() + opset.version = opset_version + model = helper.make_model(graph, opset_imports=[opset]) + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 1, 1]) + conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], + name='Conv', + kernel_shape=[3, 3], + pads=[1, 1, 1, 1]) + pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op) + graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + model = helper.make_model(graph) + + q_config = {"Conv": self.static_q_config, op: self.static_q_config} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)], + "D": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ["Conv", op] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + + + def test_exclude_node(self): + A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [3, 3, 1, 1]) + D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 3, 5, 1]) + conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], + name='Conv', + kernel_shape=[3, 3], + pads=[1, 1, 1, 1]) + pool_node = onnx.helper.make_node("MaxPool", ['C'], ['D'], name="MaxPool", kernel_shape=[1, 1]) + graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + model = helper.make_model(graph) + + q_config = {"Conv": self.static_q_config, "MaxPool": "fp32"} + quantize_params = {"A": [np.uint8(10.), np.float32(0)], + "B": [np.uint8(10.), np.float32(0)], + "C": [np.uint8(10.), np.float32(0)], + "D": [np.uint8(10.), np.float32(0)]} + quantizable_op_types = ["Conv", "MaxPool"] + self.config.exclude_output_quantization = ['Conv'] + q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) + q_model.export('./test.onnx', self.config) + +if __name__ == "__main__": + unittest.main() diff --git a/test/export/test_torch2onnx.py b/test/export/test_torch2onnx.py index 01410ff0952..21183a7697e 100644 --- a/test/export/test_torch2onnx.py +++ b/test/export/test_torch2onnx.py @@ -7,10 +7,10 @@ from neural_compressor import quantization from neural_compressor.experimental.common import Model from neural_compressor.config import Torch2ONNXConfig -from neural_compressor.experimental.data.datasets.dataset import DATASETS +from neural_compressor.experimental.data.datasets.dataset import Datasets from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig from neural_compressor.training import prepare_compression -from neural_compressor.data import DATASETS, DATALOADERS +from neural_compressor.data import Datasets, DATALOADERS from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch.utils.data as data @@ -79,7 +79,7 @@ class TestPytorch2ONNX(unittest.TestCase): def setUpClass(self): from torchvision.models.quantization import resnet18 self.cv_model = resnet18() - self.cv_dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224)) + self.cv_dataset = Datasets("pytorch")["dummy"]((10, 3, 224, 224)) self.cv_dataloader = DATALOADERS["pytorch"](self.cv_dataset) self.nlp_model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" @@ -93,12 +93,13 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('runs', ignore_errors=True) - # os.remove('fp32-cv-model.onnx') - # os.remove('int8-cv-model.onnx') - # os.remove('fp32-nlp-model.onnx') - # os.remove('int8-nlp-model.onnx') - shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + os.remove('fp32-cv-model.onnx') + os.remove('int8-cv-qdq-model.onnx') + os.remove('int8-cv-qlinear-model.onnx') + os.remove('fp32-nlp-model.onnx') + os.remove('int8-nlp-qdq-model.onnx') + os.remove('int8-nlp-qlinear-model.onnx') def test_fp32_CV_models(self): model = self.cv_model @@ -115,47 +116,167 @@ def test_fp32_CV_models(self): check_CV_onnx('fp32-cv-model.onnx', self.cv_dataloader) def test_int8_CV_models(self): - for fake_yaml in ["dynamic", "qat", "static"]: + for fake_yaml in ["dynamic", "static", "qat"]: model = self.cv_model if fake_yaml == "qat": - quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx') + quant_conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) q_model = train_func_cv(compression_manager, compression_manager.model) else: if fake_yaml == "dynamic": quant_conf = PostTrainingQuantConfig(approach="dynamic") elif fake_yaml == "static": - quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx') + # Random fallback one op to test + fallback_op= { + "conv1": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) q_model = quantization.fit( model, quant_conf, calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None) - if fake_yaml != "dynamic": - int8_onnx_config = Torch2ONNXConfig( - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, - calib_dataloader=self.cv_dataloader, - ) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + ) + q_model.export('int8-cv-qdq-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qdq-model.onnx', self.cv_dataloader) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + ) + q_model.export('int8-cv-qlinear-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qlinear-model.onnx', self.cv_dataloader) + + def test_int8_CV_models_recipe2(self): + for fake_yaml in ["dynamic", "static", "qat"]: + model = self.cv_model + if fake_yaml == "qat": + quant_conf = QuantizationAwareTrainingConfig() + compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) + q_model = train_func_cv(compression_manager, compression_manager.model) else: - int8_onnx_config = Torch2ONNXConfig( - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, - ) - q_model.export('int8-cv-model.onnx', int8_onnx_config) - check_CV_onnx('int8-cv-model.onnx', self.cv_dataloader) + if fake_yaml == "dynamic": + quant_conf = PostTrainingQuantConfig(approach="dynamic") + elif fake_yaml == "static": + # Random fallback one op to test + fallback_op= { + "conv1": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) + q_model = quantization.fit( + model, + quant_conf, + calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + recipe='QDQ_OP_INT32_BIAS', + ) + q_model.export('int8-cv-qdq-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qdq-model.onnx', self.cv_dataloader) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + recipe='QDQ_OP_INT32_BIAS', + ) + q_model.export('int8-cv-qlinear-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qlinear-model.onnx', self.cv_dataloader) + + def test_int8_CV_models_recipe3(self): + for fake_yaml in ["dynamic", "static", "qat"]: + model = self.cv_model + if fake_yaml == "qat": + quant_conf = QuantizationAwareTrainingConfig() + compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) + q_model = train_func_cv(compression_manager, compression_manager.model) + else: + if fake_yaml == "dynamic": + quant_conf = PostTrainingQuantConfig(approach="dynamic") + elif fake_yaml == "static": + # Random fallback one op to test + fallback_op= { + "conv1": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) + q_model = quantization.fit( + model, + quant_conf, + calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + recipe='QDQ_OP_FP32_BIAS_QDQ', + ) + q_model.export('int8-cv-qdq-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qdq-model.onnx', self.cv_dataloader) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=torch.randn(1, 3, 224, 224), + input_names=['input'], + output_names=['output'], + dynamic_axes={"input": {0: "batch_size"}, + "output": {0: "batch_size"}}, + recipe='QDQ_OP_FP32_BIAS_QDQ', + ) + q_model.export('int8-cv-qlinear-model.onnx', int8_onnx_config) + check_CV_onnx('int8-cv-qlinear-model.onnx', self.cv_dataloader) def test_fp32_NLP_models(self): symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} @@ -180,7 +301,7 @@ def test_int8_NLP_models(self): for fake_yaml in ["dynamic", "static", "qat"]: model = self.nlp_model if fake_yaml == "qat": - quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx') + quant_conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) q_model = train_func_nlp( compression_manager, @@ -191,35 +312,165 @@ def test_int8_NLP_models(self): if fake_yaml == "dynamic": quant_conf = PostTrainingQuantConfig(approach="dynamic") elif fake_yaml == "static": - quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx') + # Random fallback one op to test + fallback_op= { + "distilbert.transformer.layer.5.ffn.lin2": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) q_model = quantization.fit( model, quant_conf, calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None) - if fake_yaml != "dynamic": - int8_onnx_config = Torch2ONNXConfig( - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=tuple(self.nlp_input.values()), - input_names=list(self.nlp_input.keys()), - output_names=['labels'], - dynamic_axes=dynamic_axes, - calib_dataloader=self.nlp_dataloader, + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qdq-model.onnx', self.nlp_input) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qlinear-model.onnx', self.nlp_input) + + def test_int8_NLP_models_recipe2(self): + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()} + + for fake_yaml in ["dynamic", "static", "qat"]: + model = self.nlp_model + if fake_yaml == "qat": + quant_conf = QuantizationAwareTrainingConfig() + compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) + q_model = train_func_nlp( + compression_manager, + compression_manager.model, + self.nlp_input ) else: - int8_onnx_config = Torch2ONNXConfig( - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=tuple(self.nlp_input.values()), - input_names=list(self.nlp_input.keys()), - output_names=['labels'], - dynamic_axes=dynamic_axes, + if fake_yaml == "dynamic": + quant_conf = PostTrainingQuantConfig(approach="dynamic") + elif fake_yaml == "static": + # Random fallback one op to test + fallback_op= { + "distilbert.transformer.layer.5.ffn.lin2": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) + q_model = quantization.fit( + model, + quant_conf, + calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + recipe='QDQ_OP_INT32_BIAS', + ) + q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qdq-model.onnx', self.nlp_input) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + recipe='QDQ_OP_INT32_BIAS', + ) + q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qlinear-model.onnx', self.nlp_input) + + def test_int8_NLP_models_recipe3(self): + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()} + + for fake_yaml in ["dynamic", "static", "qat"]: + model = self.nlp_model + if fake_yaml == "qat": + quant_conf = QuantizationAwareTrainingConfig() + compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) + q_model = train_func_nlp( + compression_manager, + compression_manager.model, + self.nlp_input ) - q_model.export('int8-nlp-model.onnx', int8_onnx_config) - check_NLP_onnx('int8-nlp-model.onnx', self.nlp_input) + else: + if fake_yaml == "dynamic": + quant_conf = PostTrainingQuantConfig(approach="dynamic") + elif fake_yaml == "static": + # Random fallback one op to test + fallback_op= { + "distilbert.transformer.layer.5.ffn.lin2": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]} + } + } + quant_conf = PostTrainingQuantConfig( + approach="static", + op_name_list=fallback_op, + ) + q_model = quantization.fit( + model, + quant_conf, + calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + recipe='QDQ_OP_FP32_BIAS_QDQ', + ) + q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qdq-model.onnx', self.nlp_input) + + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(self.nlp_input.values()), + input_names=list(self.nlp_input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + recipe='QDQ_OP_FP32_BIAS_QDQ', + ) + q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) + check_NLP_onnx('int8-nlp-qlinear-model.onnx', self.nlp_input) if __name__ == "__main__": unittest.main() diff --git a/test/ipex/test_adaptor_ipex.py b/test/ipex/test_adaptor_ipex.py index ee9a7e6b4c6..5053d06ba72 100644 --- a/test/ipex/test_adaptor_ipex.py +++ b/test/ipex/test_adaptor_ipex.py @@ -48,7 +48,7 @@ def calib_func(model): class TestPytorchIPEX_1_10_Adaptor(unittest.TestCase): @classmethod def setUpClass(self): - config.quantization.backend = 'pytorch_ipex' + config.quantization.backend = 'ipex' config.quantization.approach = 'post_training_static_quant' config.quantization.use_bf16 = False @@ -61,10 +61,10 @@ def test_tuning_ipex(self): from neural_compressor.experimental import Quantization model = M() quantizer = Quantization(config) + quantizer.model = model quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) - quantizer.model = model quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader nc_model = quantizer.fit() @@ -82,7 +82,7 @@ def test_tuning_ipex(self): class TestPytorchIPEX_1_12_Adaptor(unittest.TestCase): @classmethod def setUpClass(self): - config.quantization.backend = 'pytorch_ipex' + config.quantization.backend = 'ipex' config.quantization.approach = 'post_training_static_quant' config.quantization.use_bf16 = False @@ -95,10 +95,10 @@ def test_tuning_ipex(self): from neural_compressor.experimental import Quantization model = M() quantizer = Quantization(config) + quantizer.model = model quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) - quantizer.model = model quantizer.calib_dataloader = dataloader quantizer.calib_func = calib_func quantizer.eval_dataloader = dataloader @@ -119,11 +119,11 @@ def test_tuning_ipex_for_ipex_autotune_func(self): qconfig = ipex.quantization.default_static_qconfig prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False) quantizer = Quantization(config) + quantizer.model = prepared_model quantizer.conf.usr_cfg.tuning.exit_policy['max_trials'] = 5 quantizer.conf.usr_cfg.tuning.exit_policy['timeout'] = 100 dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) - quantizer.model = prepared_model quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader nc_model = quantizer.fit() @@ -144,9 +144,9 @@ def test_bf16(self): config.quantization.use_bf16 = True config.quantization.performance_only = True quantizer = Quantization(config) + quantizer.model = model dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) - quantizer.model = model quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader nc_model = quantizer.fit() diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py new file mode 100644 index 00000000000..aa776d1d6fd --- /dev/null +++ b/test/itex/test_keras_in_keras_out.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import time +import shutil +import numpy as np +import tensorflow as tf +from tensorflow import keras +from neural_compressor.utils import logger + +test_mode = 'accuracy' + +def build_model(): + # Load MNIST dataset + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential([ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10) + ]) + # Train the digit classification model + model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['accuracy']) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate( + test_images, test_labels, verbose=0) + + print('Baseline test accuracy:', baseline_model_accuracy) + model.save("baseline_model") + +def build_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, 10) + y_test = keras.utils.to_categorical(y_test, 10) + return x_train, y_train, x_test, y_test + +def eval_func(model): + x_train, y_train, x_test, y_test = build_dataset() + start = time.time() + model.compile(metrics=["accuracy"], run_eagerly=False) + score = model.evaluate(x_test, y_test) + end = time.time() + + if test_mode == 'performance': + latency = end - start + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} data/sec".format(1. / latency)) + return score[1] + +class Dataset(object): + def __init__(self, batch_size=100): + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + self.train_images = train_images / 255.0 + self.test_images = test_images / 255.0 + self.train_labels = train_labels + self.test_labels = test_labels + + def __len__(self): + return len(self.test_images) + + def __getitem__(self, idx): + return self.test_images[idx], self.test_labels[idx] + + +class TestKerasInKerasOut(unittest.TestCase): + @classmethod + def setUpClass(self): + os.environ["ITEX_ONEDNN_GRAPH"] = '1' + + @classmethod + def tearDownClass(self): + shutil.rmtree('baseline_model',ignore_errors=True) + shutil.rmtree('itex_qdq_keras_model',ignore_errors=True) + + def test_keras_in_keras_out(self): + logger.info("Run test_keras_in_keras_out case...") + global test_mode + test_mode = 'accuracy' + build_model() + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + logger.info("=================Run Quantization...") + q_model = fit(keras.models.load_model('./baseline_model'), + conf=config, + calib_dataloader=common.DataLoader(Dataset()), + eval_func=eval_func) + q_model.save("itex_qdq_keras_model") + model = keras.models.load_model('./itex_qdq_keras_model') + model.summary() + found_quantize = False + found_dequantize = False + for layer in model.layers: + if 'quantize' in layer.name: + found_quantize = True + if 'de_quantize' in layer.name: + found_dequantize = True + self.assertEqual(found_quantize, True) + self.assertEqual(found_dequantize, True) + + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(backend='itex', iteration=100, cores_per_instance=1, num_of_instance=1) + logger.info("=================Run BenchMark...") + test_mode = 'performance' + fit(model, conf, b_func=eval_func) + + def test_keras_model_interface(self): + logger.info("Run test_keras_model_interface case...") + global test_mode + test_mode = 'accuracy' + build_model() + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + q_model = fit(keras.models.load_model('./baseline_model'), + conf=config, + calib_dataloader=common.DataLoader(Dataset()), + eval_func=eval_func) + q_model.save("itex_qdq_keras_model") + self.assertEqual(q_model.framework(), 'keras') + + framework_config = { + 'framework': 'keras', + 'approach': 'post_training_static_quant' + } + q_model.q_config = framework_config + self.assertEqual(q_model.q_config['framework'], 'keras') + self.assertEqual(q_model.graph_info, None) + self.assertEqual(q_model.framework(), 'keras') + self.assertEqual(isinstance(q_model.model, tf.keras.Model), True) + +if __name__ == '__main__': + unittest.main() diff --git a/test/itex/test_tensorflow_itex_basic.py b/test/itex/test_tensorflow_itex_basic.py index 6fc3e9a518a..34e4aafb34d 100644 --- a/test/itex/test_tensorflow_itex_basic.py +++ b/test/itex/test_tensorflow_itex_basic.py @@ -5,14 +5,10 @@ import os import shutil import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer -from neural_compressor.adaptor.tensorflow import TensorflowQuery +import platform +from tensorflow.python.platform import gfile from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.experimental import Quantization, common -from neural_compressor.utils.utility import CpuInfo +from neural_compressor.experimental import Quantization, Benchmark, common from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 import tensorflow as tf @@ -23,10 +19,11 @@ def build_fake_yaml(fake_yaml, save_path, **kwargs): with open(file=save_path, mode=kwargs['mode'], encoding=kwargs['encoding']) as f: yaml.dump(y, f) -@unittest.skipIf(tf.version.VERSION.find('up') == -1 and tf.version.VERSION < '2.0', "Only supports tf 1.15.up2/up3 and 2.x") +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") class TestItexEnabling(unittest.TestCase): @classmethod def setUpClass(self): + os.system("rm *.log") fake_yaml_1 = ''' model: name: fake_model_cpu @@ -72,6 +69,12 @@ def setUpClass(self): accuracy: metric: topk: 1 + performance: + warmup: 10 + iteration: 100 + configs: + cores_per_instance: 1 + num_of_instance: 1 tuning: strategy: name: basic @@ -217,5 +220,61 @@ def test_depthwiseconv2d_case(self): reshape_counter += 1 self.assertEqual(reshape_counter, 2) + @disable_random() + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0') or \ + platform.system().lower() == "windows", "Only supports tf greater 2.7.0 and Linux") + def test_itex_benchmark_gpu(self): + x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") + top_relu = tf.nn.relu(x) + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + x_pad = tf.pad(top_relu, paddings, "CONSTANT") + conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") + normed = tf.compat.v1.layers.batch_normalization(conv) + conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") + normed2 = tf.compat.v1.layers.batch_normalization(conv2) + add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + relu = tf.nn.relu(add) + relu6 = tf.nn.relu6(relu, name='op_to_store') + out_name = relu6.name.split(':')[0] + num_of_instance = 1 + cores_per_instance = 1 + log_file = '' + with tf.compat.v1.Session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_graph_def = graph_util.convert_variables_to_constants( + sess=sess, + input_graph_def=sess.graph_def, + output_node_names=[out_name]) + + quantizer = Quantization('fake_yaml_2.yaml') + dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = output_graph_def + output_graph = quantizer.fit() + + evaluator = Benchmark('fake_yaml_2.yaml') + evaluator.b_dataloader = common.DataLoader(dataset) + num_of_instance = evaluator.conf.usr_cfg.evaluation.performance.configs.num_of_instance + cores_per_instance = evaluator.conf.usr_cfg.evaluation.performance.configs.cores_per_instance + log_file = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, 0) + if gfile.Exists(log_file): + os.remove(log_file) + evaluator.model = output_graph + evaluator('performance') + + found_multi_instance_log = False + for file_name in os.listdir(os.getcwd()): + if file_name == log_file: + found_multi_instance_log = True + break + + self.assertEqual(found_multi_instance_log, False) + + if __name__ == '__main__': unittest.main() diff --git a/test/itex/test_tensorflow_itex_new_api.py b/test/itex/test_tensorflow_itex_new_api.py new file mode 100644 index 00000000000..7046a4ff2a6 --- /dev/null +++ b/test/itex/test_tensorflow_itex_new_api.py @@ -0,0 +1,80 @@ +# +# -*- coding: utf-8 -*- +# +import unittest + +from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.experimental import common +from neural_compressor.quantization import fit +from neural_compressor.config import PostTrainingQuantConfig +from neural_compressor.utils.utility import set_random_seed +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 + +import tensorflow as tf +from tensorflow.python.framework import graph_util + +class TestItexNewAPI(unittest.TestCase): + @classmethod + def setUpClass(self): + pass + + @classmethod + def tearDownClass(self): + pass + + @disable_random() + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + def test_itex_new_api(self): + x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") + top_relu = tf.nn.relu(x) + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + x_pad = tf.pad(top_relu, paddings, "CONSTANT") + conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") + normed = tf.compat.v1.layers.batch_normalization(conv) + # relu = tf.nn.relu(normed) + + conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") + normed2 = tf.compat.v1.layers.batch_normalization(conv2) + # relu2 = tf.nn.relu(normed2) + add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + relu = tf.nn.relu(add) + relu6 = tf.nn.relu6(relu, name='op_to_store') + + out_name = relu6.name.split(':')[0] + with tf.compat.v1.Session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_graph_def = graph_util.convert_variables_to_constants( + sess=sess, + input_graph_def=sess.graph_def, + output_node_names=[out_name]) + + set_random_seed(9527) + config = PostTrainingQuantConfig( + backend="itex", + quant_format="QDQ", + calibration_sampling_size=[200]) + + from neural_compressor.data import Datasets + dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) + output_graph = fit( + model=output_graph_def, + conf=config, + calib_dataloader=common.DataLoader(dataset=dataset, batch_size=1)) + + dequant_count = 0 + quantize_count = 0 + for i in output_graph.graph_def.node: + if i.op == 'Dequantize': + dequant_count += 1 + if i.op == 'QuantizeV2': + quantize_count += 1 + + self.assertEqual(dequant_count, 5) + self.assertEqual(quantize_count, 4) + +if __name__ == "__main__": + unittest.main() diff --git a/test/metric/test_metrics_2.x.py b/test/metric/test_metrics_2.x.py new file mode 100644 index 00000000000..b638ff47a59 --- /dev/null +++ b/test/metric/test_metrics_2.x.py @@ -0,0 +1,1061 @@ +"""Tests for the metrics module.""" +import numpy as np +import unittest +import platform +from neural_compressor.metric import METRICS +from neural_compressor.metric.f1 import evaluate +from neural_compressor.metric.evaluate_squad import evaluate as evaluate_squad + +class InCorrectMetric: + def __init__(self): + self.item = None + +class CorrectMetric: + def __init__(self): + self.item = [] + + def update(self, samples): + self.item.append(samples) + + def result(self): + return 0 + + def reset(self): + self.item = [] + +class TestMetrics(unittest.TestCase): + def testUserMetric(self): + from neural_compressor.experimental import common, Quantization, Benchmark, \ + Graph_Optimization + for i in [Quantization(), Benchmark(), Graph_Optimization()]: + item = i + with self.assertRaises(AssertionError): + item.metric = InCorrectMetric() + item.framework = 'tensorflow' + item.metric = common.Metric(CorrectMetric, str(i)) + + def testmIOU(self): + metrics = METRICS('tensorflow') + miou = metrics['mIOU']() + preds = np.array([0, 0, 1, 1]) + labels = np.array([0, 1, 0, 1]) + miou.update(preds, labels) + self.assertAlmostEqual(miou.result(), 0.33333334) + + miou.reset() + preds = np.array([0, 0, 1, 1]) + labels = np.array([0, 1, 1, 1]) + miou.update(preds, labels) + self.assertAlmostEqual(miou.result(), 0.58333333) + + def testBLEU(self): + metrics = METRICS('tensorflow') + bleu = metrics['BLEU']() + preds = ['Gutach: Mehr Sicherheit für Fußgänger'] + labels = ('Gutach: Noch mehr Sicherheit für Fußgänger',) + bleu.update(preds, labels) + self.assertAlmostEqual(bleu.result(), 51.1507809) + bleu.reset() + + preds = ['Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt.'] + labels = ('Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.',) + bleu.update(preds, labels) + self.assertAlmostEqual(bleu.result(), 16.108992695) + with self.assertRaises(ValueError): + bleu.update(['a','b'], ('c',)) + + def test_onnxrt_GLUE(self): + metrics = METRICS('onnxrt_qlinearops') + glue = metrics['GLUE']('mrpc') + preds = [np.array( + [[-3.2443411, 3.0909934], + [2.0500996, -2.3100944], + [1.870293 , -2.0741048], + [-2.8377204, 2.617834], + [2.008347 , -2.0215416], + [-2.9693947, 2.7782154], + [-2.9949608, 2.7887983], + [-3.0623112, 2.8748074]]) + ] + labels = [np.array([1, 0, 0, 1, 0, 1, 0, 1])] + glue.update(preds, labels) + self.assertEqual(glue.result(), 0.875) + preds_2 = [np.array( + [[-3.1296735, 2.8356276], + [-3.172515 , 2.9173899], + [-3.220131 , 3.0916846], + [2.1452675, -1.9398905], + [1.5475761, -1.9101546], + [-2.9797182, 2.721741], + [-3.2052834, 2.9934788], + [-2.7451005, 2.622343]]) + ] + labels_2 = [np.array([1, 1, 1, 0, 0, 1, 1, 1])] + glue.update(preds_2, labels_2) + self.assertEqual(glue.result(), 0.9375) + + glue.reset() + glue.update(preds, labels) + self.assertEqual(glue.result(), 0.875) + + def test_tensorflow_F1(self): + metrics = METRICS('tensorflow') + F1 = metrics['F1']() + preds = [1, 1, 1, 1] + labels = [0, 1, 1, 0] + + F1.update(preds, labels) + self.assertEqual(F1.result(), 0.5) + + def test_squad_evaluate(self): + label = [{'paragraphs':\ + [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ + {'answer_start': 177, 'text': 'Denver Broncos'}, \ + {'answer_start': 177, 'text': 'Denver Broncos'}], \ + 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ + 'id': '56be4db0acb8001400a502ec'}]}]}] + preds = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + f1 = evaluate(preds, label) + self.assertEqual(f1, 100.) + dataset = [{'paragraphs':\ + [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ + {'answer_start': 177, 'text': 'Denver Broncos'}, \ + {'answer_start': 177, 'text': 'Denver Broncos'}], \ + 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ + 'id': '56be4db0acb8001400a502ec'}]}]}] + predictions = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + f1_squad = evaluate_squad(dataset,predictions) + self.assertEqual(f1_squad['f1'], 100.) + self.assertEqual(f1_squad['exact_match'], 100.) + + + def test_pytorch_F1(self): + metrics = METRICS('pytorch') + F1 = metrics['F1']() + F1.reset() + preds = [1, 1] + labels = [2, 1, 1] + + F1.update(preds, labels) + self.assertEqual(F1.result(), 0.8) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") + def test_mxnet_F1(self): + metrics = METRICS('mxnet') + F1 = metrics['F1']() + preds = [0, 1, 1, 1, 1, 0] + labels = [0, 1, 1, 1] + + F1.update(preds, labels) + self.assertEqual(F1.result(), 0.8) + + def test_onnx_topk(self): + metrics = METRICS('onnxrt_qlinearops') + top1 = metrics['topk']() + top1.reset() + self.assertEqual(top1.result(), 0) + self.assertEqual(top1.result(), 0) + top2 = metrics['topk'](k=2) + top3 = metrics['topk'](k=3) + + predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] + single_predict = [0, 0.2, 0.9, 0.3] + + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] + sparse_labels = [2, 2] + single_label = 2 + + # test functionality of one-hot label + top1.update(predicts, labels) + top2.update(predicts, labels) + top3.update(predicts, labels) + self.assertEqual(top1.result(), 0.0) + self.assertEqual(top2.result(), 0.5) + self.assertEqual(top3.result(), 1) + + # test functionality of sparse label + top1.update(predicts, sparse_labels) + top2.update(predicts, sparse_labels) + top3.update(predicts, sparse_labels) + self.assertEqual(top1.result(), 0.25) + self.assertEqual(top2.result(), 0.75) + self.assertEqual(top3.result(), 1) + + # test functionality of single label + top1.update(single_predict, single_label) + top2.update(single_predict, single_label) + top3.update(single_predict, single_label) + self.assertEqual(top1.result(), 0.4) + self.assertEqual(top2.result(), 0.8) + self.assertEqual(top3.result(), 1) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") + def test_mxnet_topk(self): + metrics = METRICS('mxnet') + top1 = metrics['topk']() + top1.reset() + self.assertEqual(top1.result(), 0) + top2 = metrics['topk'](k=2) + top3 = metrics['topk'](k=3) + + predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] + single_predict = [0, 0.2, 0.9, 0.3] + + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] + sparse_labels = [2, 2] + single_label = 2 + + # test functionality of one-hot label + top1.update(predicts, labels) + top2.update(predicts, labels) + top3.update(predicts, labels) + self.assertEqual(top1.result(), 0.0) + self.assertEqual(top2.result(), 0.5) + self.assertEqual(top3.result(), 1) + + # test functionality of sparse label + top1.update(predicts, sparse_labels) + top2.update(predicts, sparse_labels) + top3.update(predicts, sparse_labels) + self.assertEqual(top1.result(), 0.25) + self.assertEqual(top2.result(), 0.75) + self.assertEqual(top3.result(), 1) + + # test functionality of single label + top1.update(single_predict, single_label) + top2.update(single_predict, single_label) + top3.update(single_predict, single_label) + self.assertEqual(top1.result(), 0.4) + self.assertEqual(top2.result(), 0.8) + self.assertEqual(top3.result(), 1) + + def test_tensorflow_topk(self): + metrics = METRICS('tensorflow') + top1 = metrics['topk']() + top1.reset() + self.assertEqual(top1.result(), 0) + top2 = metrics['topk'](k=2) + top3 = metrics['topk'](k=3) + + predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] + single_predict = [0, 0.2, 0.9, 0.3] + + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] + sparse_labels = [2, 2] + single_label = 2 + + # test functionality of one-hot label + top1.update(predicts, labels) + top2.update(predicts, labels) + top3.update(predicts, labels) + self.assertEqual(top1.result(), 0.0) + self.assertEqual(top2.result(), 0.5) + self.assertEqual(top3.result(), 1) + + # test functionality of sparse label + top1.update(predicts, sparse_labels) + top2.update(predicts, sparse_labels) + top3.update(predicts, sparse_labels) + self.assertEqual(top1.result(), 0.25) + self.assertEqual(top2.result(), 0.75) + self.assertEqual(top3.result(), 1) + + # test functionality of single label + top1.update(single_predict, single_label) + top2.update(single_predict, single_label) + top3.update(single_predict, single_label) + self.assertEqual(top1.result(), 0.4) + self.assertEqual(top2.result(), 0.8) + self.assertEqual(top3.result(), 1) + + def test_tensorflow_mAP(self): + import json + import os + metrics = METRICS('tensorflow') + fake_dict = 'dog: 1' + with open('anno.yaml', 'w', encoding="utf-8") as f: + f.write(fake_dict) + mAP = metrics['mAP']('anno.yaml') + self.assertEqual(mAP.category_map_reverse['dog'], 1) + detection = [ + np.array([[5]]), + np.array([[5]]), + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([['a', 'b']]), + np.array([[]]), + np.array([b'000000397133.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth) + + detection = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787]]), + np.array([[ 1., 1.]]) + ] + ground_truth = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[b'dog', b'dog']]), + np.array([[]]), + np.array([b'000000397133.jpg']) + ] + mAP.update(detection, ground_truth) + mAP.result() + self.assertEqual(format(mAP.result(), '.5f'), + '1.00000') + + detection = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + detection_2 = [ + np.array([[8]]), + np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], + [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], + [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], + [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], + [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], + [0.7258591 , 0.08907133, 1. , 0.86224866], + [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], + [0.32005906, 0.84334356, 1. , 1. ]]]), + np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ + 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), + np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.56262296, 0.0015625 , 1. , 0.5431719 ], + [0.16374707, 0.60728127, 0.813911 , 0.77823436], + [0.5841452 , 0.21182813, 0.65156907, 0.24670312], + [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], + [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], + [0.3848478 , 0.002125 , 0.61522245, 0.303 ], + [0.61548007, 0. , 0.7015925 , 0.097125 ], + [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], + [0.6274239 , 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375 ], + [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], + [0.7137705 , 0.15429688, 0.726815 , 0.17114063], + [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], + [0.68845433, 0.13501562, 0.714637 , 0.17245312], + [0.69358313, 0.10959375, 0.7043091 , 0.12409375], + [0.493911 , 0. , 0.72571427, 0.299 ], + [0.69576114, 0.15107812, 0.70714283, 0.16332813], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([[]]), + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ + 56, 50, 56, 56, 79, 57, 81]]), + np.array([b'000000397133.jpg']) + ] + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.9358696 , 0.07528409, 0.99891305, 0.25 ], + [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], + [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], + [0.77795655, 0.6268466 , 0.89930433, 0.73434657], + [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], + [0.58473915, 0.75661933, 0.5998261 , 0.83579546], + [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], + [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], + [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], + [0.8130869 , 0.58292615, 0.90526086, 0.62551135], + [0.7844348 , 0.68735796, 0.98182607, 0.83329546], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), + np.array([b'000000037777.jpg']) + ] + + mAP = metrics['mAP']() + + self.assertEqual(mAP.result(), 0) + + mAP.update(detection, ground_truth) + + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.18182') + + mAP.update(detection_2, ground_truth_2) + self.assertEqual(format(mAP.result(), '.5f'), + '0.20347') + mAP.reset() + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.18182') + + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[[64, 62]]]), + np.array([b'000000037777.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64]]), + np.array([b'000000037700.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) + detection_1 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000011.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000012.jpg']) + ] + detection_2 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) + os.remove('anno.yaml') + + + def test_tensorflow_VOCmAP(self): + import os + metrics = METRICS('tensorflow') + fake_dict = 'dog: 1' + with open('anno.yaml', 'w', encoding="utf-8") as f: + f.write(fake_dict) + mAP = metrics['VOCmAP']('anno.yaml') + self.assertEqual(mAP.iou_thrs, 0.5) + self.assertEqual(mAP.map_points, 0) + self.assertEqual(mAP.category_map_reverse['dog'], 1) + detection = [ + np.array([[5]]), + np.array([[5]]), + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([['a', 'b']]), + np.array([[]]), + np.array([b'000000397133.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth) + + os.remove('anno.yaml') + + mAP = metrics['VOCmAP']() + detection = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + detection_2 = [ + np.array([[8]]), + np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], + [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], + [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], + [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], + [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], + [0.7258591 , 0.08907133, 1. , 0.86224866], + [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], + [0.32005906, 0.84334356, 1. , 1. ]]]), + np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ + 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), + np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.56262296, 0.0015625 , 1. , 0.5431719 ], + [0.16374707, 0.60728127, 0.813911 , 0.77823436], + [0.5841452 , 0.21182813, 0.65156907, 0.24670312], + [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], + [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], + [0.3848478 , 0.002125 , 0.61522245, 0.303 ], + [0.61548007, 0. , 0.7015925 , 0.097125 ], + [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], + [0.6274239 , 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375 ], + [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], + [0.7137705 , 0.15429688, 0.726815 , 0.17114063], + [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], + [0.68845433, 0.13501562, 0.714637 , 0.17245312], + [0.69358313, 0.10959375, 0.7043091 , 0.12409375], + [0.493911 , 0. , 0.72571427, 0.299 ], + [0.69576114, 0.15107812, 0.70714283, 0.16332813], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([[]]), + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ + 56, 50, 56, 56, 79, 57, 81]]), + np.array([b'000000397133.jpg']) + ] + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.9358696 , 0.07528409, 0.99891305, 0.25 ], + [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], + [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], + [0.77795655, 0.6268466 , 0.89930433, 0.73434657], + [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], + [0.58473915, 0.75661933, 0.5998261 , 0.83579546], + [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], + [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], + [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], + [0.8130869 , 0.58292615, 0.90526086, 0.62551135], + [0.7844348 , 0.68735796, 0.98182607, 0.83329546], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), + np.array([b'000000037777.jpg']) + ] + + self.assertEqual(mAP.result(), 0) + + mAP.update(detection, ground_truth) + + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.18182') + + mAP.update(detection_2, ground_truth_2) + self.assertEqual(format(mAP.result(), '.5f'), + '0.20347') + mAP.reset() + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.18182') + + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[[64, 62]]]), + np.array([b'000000037777.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64]]), + np.array([b'000000037700.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) + detection_1 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000011.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000012.jpg']) + ] + detection_2 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) + + + def test_tensorflow_COCOmAP(self): + import os + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + metrics = METRICS('tensorflow') + fake_dict = 'dog: 1' + with open('anno.yaml', 'w', encoding="utf-8") as f: + f.write(fake_dict) + mAP = metrics['COCOmAP']('anno.yaml') + mAP2 = metrics['COCOmAPv2']('anno.yaml', output_index_mapping=output_index_mapping) + self.assertEqual(mAP.category_map_reverse['dog'], 1) + self.assertEqual(mAP2.category_map_reverse['dog'], 1) + detection = [ + np.array([[5]]), + np.array([[5]]), + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([['a', 'b']]), + np.array([[]]), + np.array([b'000000397133.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth) + + os.remove('anno.yaml') + + mAP = metrics['COCOmAP']() + mAP2 = metrics['COCOmAPv2']() + detection = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762], + [0.40032804, 0.01218696, 0.6924763 , 0.30341768], + [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + detection_2 = [ + np.array([[8]]), + np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], + [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], + [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], + [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], + [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], + [0.7258591 , 0.08907133, 1. , 0.86224866], + [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], + [0.32005906, 0.84334356, 1. , 1. ]]]), + np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ + 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), + np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + ] + ground_truth = [ + np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], + [0.56262296, 0.0015625 , 1. , 0.5431719 ], + [0.16374707, 0.60728127, 0.813911 , 0.77823436], + [0.5841452 , 0.21182813, 0.65156907, 0.24670312], + [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], + [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], + [0.3848478 , 0.002125 , 0.61522245, 0.303 ], + [0.61548007, 0. , 0.7015925 , 0.097125 ], + [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], + [0.6274239 , 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375 ], + [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], + [0.7137705 , 0.15429688, 0.726815 , 0.17114063], + [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], + [0.68845433, 0.13501562, 0.714637 , 0.17245312], + [0.69358313, 0.10959375, 0.7043091 , 0.12409375], + [0.493911 , 0. , 0.72571427, 0.299 ], + [0.69576114, 0.15107812, 0.70714283, 0.16332813], + [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + np.array([[]]), + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ + 56, 50, 56, 56, 79, 57, 81]]), + np.array([b'000000397133.jpg']) + ] + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.9358696 , 0.07528409, 0.99891305, 0.25 ], + [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], + [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], + [0.77795655, 0.6268466 , 0.89930433, 0.73434657], + [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], + [0.58473915, 0.75661933, 0.5998261 , 0.83579546], + [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], + [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], + [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], + [0.8130869 , 0.58292615, 0.90526086, 0.62551135], + [0.7844348 , 0.68735796, 0.98182607, 0.83329546], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), + np.array([b'000000037777.jpg']) + ] + + self.assertEqual(mAP.result(), 0) + self.assertEqual(mAP2.result(), 0) + + mAP.update(detection, ground_truth) + + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.14149') + + mAP.update(detection_2, ground_truth_2) + self.assertEqual(format(mAP.result(), '.5f'), + '0.13366') + mAP.reset() + mAP.update(detection, ground_truth) + self.assertEqual(format(mAP.result(), '.5f'), + '0.14149') + + mAP2.update(detection, ground_truth) + + mAP2.update(detection, ground_truth) + self.assertEqual(format(mAP2.result(), '.5f'), + '0.14149') + + mAP2 = metrics['COCOmAPv2'](output_index_mapping=output_index_mapping) + + mAP2.update(detection_2, ground_truth_2) + self.assertEqual(format(mAP2.result(), '.5f'), + '0.20520') + mAP2.reset() + mAP2.update(detection_2, ground_truth_2) + self.assertEqual(format(mAP2.result(), '.5f'), + '0.20520') + + mAP2 = metrics['COCOmAPv2']() + + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[[64, 62]]]), + np.array([b'000000037777.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) + self.assertRaises(ValueError, mAP2.update, detection, ground_truth_1) + + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64]]), + np.array([b'000000037700.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) + self.assertRaises(ValueError, mAP2.update, detection, ground_truth_2) + + detection_1 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + ground_truth_1 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000011.jpg']) + ] + self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) + self.assertRaises(ValueError, mAP2.update, detection_1, ground_truth_1) + + ground_truth_2 = [ + np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], + [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[]]), + np.array([[64, 62]]), + np.array([b'000000012.jpg']) + ] + detection_2 = [ + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], + [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), + np.array([[0.9267181 , 0.8510787]]), + np.array([[ 1., 67., 51., 79., 47.]]) + ] + self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) + self.assertRaises(ValueError, mAP2.update, detection_2, ground_truth_2) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") + def test__accuracy(self): + predicts1 = [1, 0, 1, 1] + labels1 = [0, 1, 1, 1] + + predicts2 = [[0, 0], [0, 0]] + labels2 = [[0, 1], [1, 1]] + + predicts3 = [[[0, 1], [0, 0], [0, 1]], [[0, 1], [0, 1], [0, 1]]] + labels3 = [[[0, 1], [0, 1], [1, 0]], [[1, 0], [1, 0], [1, 0]]] + + predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] #1,1,1,1 + labels4 = [0, 1, 0, 0] + + metrics = METRICS('pytorch') + acc = metrics['Accuracy']() + acc.update(predicts1, labels1) + acc_result = acc.result() + self.assertEqual(acc_result, 0.5) + acc.reset() + acc.update(predicts2, labels2) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts3, labels3) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts4, labels4) + self.assertEqual(acc.result(), 0.25) + + metrics = METRICS('mxnet') + acc = metrics['Accuracy']() + acc.update(predicts1, labels1) + acc_result = acc.result() + self.assertEqual(acc_result, 0.5) + acc.reset() + acc.update(predicts2, labels2) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts3, labels3) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts4, labels4) + self.assertEqual(acc.result(), 0.25) + + metrics = METRICS('onnxrt_qlinearops') + acc = metrics['Accuracy']() + acc.update(predicts1, labels1) + acc_result = acc.result() + self.assertEqual(acc_result, 0.5) + acc.reset() + acc.update(predicts2, labels2) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts3, labels3) + self.assertEqual(acc.result(), 0.25) + acc.reset() + acc.update(predicts4, labels4) + self.assertEqual(acc.result(), 0.25) + + acc.reset() + acc.update(1, 1) + self.assertEqual(acc.result(), 1.0) + + wrong_predictions = [1, 0, 0] + wrong_labels = [[0, 1, 1]] + self.assertRaises(ValueError, acc.update, wrong_predictions, wrong_labels) + + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") + def test_mxnet_accuracy(self): + metrics = METRICS('mxnet') + acc = metrics['Accuracy']() + predicts = [1, 0, 1, 1] + labels = [0, 1, 1, 1] + acc.update(predicts, labels) + acc_result = acc.result() + self.assertEqual(acc_result, 0.5) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") + def test_mse(self): + predicts1 = [1, 0, 0, 1] + labels1 = [0, 1, 0, 0] + predicts2 = [1, 1, 1, 1] + labels2 = [0, 1, 1, 0] + + metrics = METRICS('onnxrt_qlinearops') + mse = metrics['MSE'](compare_label=False) + mse.update(predicts1, labels1) + mse_result = mse.result() + self.assertEqual(mse_result, 0.75) + mse.update(predicts2, labels2) + mse_result = mse.result() + self.assertEqual(mse_result, 0.625) + + metrics = METRICS('tensorflow') + mse = metrics['MSE'](compare_label=False) + mse.update(predicts1, labels1) + mse_result = mse.result() + self.assertEqual(mse_result, 0.75) + mse.update(predicts2, labels2) + mse_result = mse.result() + self.assertEqual(mse_result, 0.625) + + + metrics = METRICS('mxnet') + mse = metrics['MSE']() + mse.update(predicts1, labels1) + mse_result = mse.result() + self.assertEqual(mse_result, 0.75) + mse.update(predicts2, labels2) + mse_result = mse.result() + self.assertEqual(mse_result, 0.625) + + metrics = METRICS('pytorch') + mse = metrics['MSE']() + mse.update(predicts1, labels1) + mse_result = mse.result() + self.assertEqual(mse_result, 0.75) + mse.update(predicts2, labels2) + mse_result = mse.result() + self.assertEqual(mse_result, 0.625) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") + def test_mae(self): + predicts1 = [1, 0, 0, 1] + labels1 = [0, 1, 0, 0] + predicts2 = [1, 1, 1, 1] + labels2 = [1, 1, 1, 0] + + metrics = METRICS('tensorflow') + mae = metrics['MAE']() + mae.update(predicts1, labels1) + mae_result = mae.result() + self.assertEqual(mae_result, 0.75) + mae.update(0, 1) + mae_result = mae.result() + self.assertEqual(mae_result, 0.8) + mae.reset() + mae.update(predicts2, labels2) + mae_result = mae.result() + self.assertEqual(mae_result, 0.25) + + metrics = METRICS('pytorch') + mae = metrics['MAE']() + mae.update(predicts1, labels1) + mae_result = mae.result() + self.assertEqual(mae_result, 0.75) + mae.update(predicts2, labels2) + mae_result = mae.result() + self.assertEqual(mae_result, 0.5) + + metrics = METRICS('mxnet') + mae = metrics['MAE']() + mae.update(predicts1, labels1) + mae_result = mae.result() + self.assertEqual(mae_result, 0.75) + mae.update(predicts2, labels2) + mae_result = mae.result() + self.assertEqual(mae_result, 0.5) + + metrics = METRICS('onnxrt_qlinearops') + mae = metrics['MAE']() + mae.update(predicts1, labels1) + mae_result = mae.result() + self.assertEqual(mae_result, 0.75) + mae.update(predicts2, labels2) + mae_result = mae.result() + self.assertEqual(mae_result, 0.5) + + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) + self.assertRaises(AssertionError, mae.update, 1, [1,2]) + self.assertRaises(AssertionError, mae.update, [1, 2], [1]) + self.assertRaises(AssertionError, mae.update, 1, np.array([1,2])) + + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") + def test_rmse(self): + predicts1 = [1, 0, 0, 1] + labels1 = [1, 0, 0, 0] + predicts2 = [1, 1, 1, 1] + labels2 = [1, 0, 0, 0] + + metrics = METRICS('tensorflow') + rmse = metrics['RMSE']() + rmse.update(predicts1, labels1) + rmse_result = rmse.result() + self.assertEqual(rmse_result, 0.5) + rmse.reset() + rmse.update(predicts2, labels2) + rmse_result = rmse.result() + self.assertAlmostEqual(rmse_result, np.sqrt(0.75)) + + metrics = METRICS('pytorch') + rmse = metrics['RMSE']() + rmse.update(predicts1, labels1) + rmse_result = rmse.result() + self.assertEqual(rmse_result, 0.5) + rmse.update(predicts2, labels2) + rmse_result = rmse.result() + self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) + + metrics = METRICS('mxnet') + rmse = metrics['RMSE']() + rmse.update(predicts1, labels1) + rmse_result = rmse.result() + self.assertEqual(rmse_result, 0.5) + rmse.update(predicts2, labels2) + rmse_result = rmse.result() + self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) + + metrics = METRICS('onnxrt_qlinearops') + rmse = metrics['RMSE']() + rmse.update(predicts1, labels1) + rmse_result = rmse.result() + self.assertEqual(rmse_result, 0.5) + rmse.update(predicts2, labels2) + rmse_result = rmse.result() + self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) + + def test_loss(self): + metrics = METRICS('pytorch') + loss = metrics['Loss']() + predicts = [1, 0, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + loss_result = loss.result() + self.assertEqual(loss_result, 0.5) + predicts = [1, 1, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + loss_result = loss.result() + self.assertEqual(loss_result, 0.625) + loss.reset() + predicts = [1, 0, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + self.assertEqual(loss.result(), 0.5) + + + metrics = METRICS('onnxrt_qlinearops') + loss = metrics['Loss']() + predicts = [1, 0, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + loss_result = loss.result() + self.assertEqual(loss_result, 0.5) + predicts = [1, 1, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + loss_result = loss.result() + self.assertEqual(loss_result, 0.625) + loss.reset() + predicts = [1, 0, 0, 1] + labels = [0, 1, 0, 0] + loss.update(predicts, labels) + self.assertEqual(loss.result(), 0.5) + +if __name__ == "__main__": + unittest.main() diff --git a/test/metric/test_mse_metric.py b/test/metric/test_mse_metric.py index c7a6d01a3e5..21a510121db 100644 --- a/test/metric/test_mse_metric.py +++ b/test/metric/test_mse_metric.py @@ -1,7 +1,5 @@ import torch -import torch.nn.quantized as nnq -from torch.quantization import QuantStub, DeQuantStub import torchvision import unittest import os @@ -217,8 +215,8 @@ def test_fx_dynamic_quantization_saved(self): fake_yaml = 'fx_dynamic_yaml.yaml' model = torchvision.models.resnet18() quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = model + dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() diff --git a/test/metric/test_register_metric_transform.py b/test/metric/test_register_metric_transform.py index e8695006688..ff42df870a5 100644 --- a/test/metric/test_register_metric_transform.py +++ b/test/metric/test_register_metric_transform.py @@ -42,7 +42,7 @@ def test_register_metric_postprocess(self): resize_image = resize_image - mean images = np.expand_dims(resize_image, axis=0) labels = [768] - from neural_compressor import Benchmark, Quantization + from neural_compressor import Benchmark from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift from neural_compressor.experimental.metric.metric import TensorflowTopK os.environ['NC_ENV_CONF'] = 'True' @@ -53,9 +53,6 @@ def test_register_metric_postprocess(self): dataloader = evaluator.dataloader(dataset=list(zip(images, labels))) evaluator(self.pb_path, b_dataloader=dataloader) - quantizer = Quantization('fake_yaml.yaml') - quantizer.postprocess('label_quantize', LabelShift, label_shift=1) - quantizer.metric('topk_quantize', TensorflowTopK) evaluator = Benchmark('fake_yaml.yaml') evaluator.metric('topk_second', TensorflowTopK) dataloader = evaluator.dataloader(dataset=list(zip(images, labels))) diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index a05a3e25e5c..26255e5a4ef 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -10,7 +10,7 @@ from neural_compressor import mix_precision from neural_compressor.utils.utility import LazyImport, CpuInfo from neural_compressor.adaptor.torch_utils.bf16_convert import BF16ModuleWrapper -from neural_compressor.config import MixedPrecisionConfig, set_workspace, TuningCriterion +from neural_compressor.config import MixedPrecisionConfig, TuningCriterion from onnx import helper, TensorProto from packaging.version import Version from tensorflow.core.framework import attr_value_pb2 @@ -200,31 +200,6 @@ def build_pt_model(): resnet18 = LazyImport("torchvision.models.resnet18") return resnet18() - -def build_yaml(): - fake_yaml = """ - model: - name: test - framework: onnxrt_qlinearops - - mixed_precision: - precisions: fp16 - - evaluation: - accuracy: - metric: - MSE: - compare_label: False - dataloader: - dataset: - dummy: - shape: [[5,1,5,5], [5,1,5,1]] - label: True - """ - with open("test.yaml", "w", encoding="utf-8") as f: - f.write(fake_yaml) - - class MatmulDataset: def __init__(self): self.data = [] @@ -260,33 +235,20 @@ def setUpClass(self): self.onnx_model = build_matmul_model() self.tf_model = build_tf_graph() - def test_on_non_enabled_host(self): - # test onnx - conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops") - with self.assertRaises(SystemExit) as cm: - output_model = mix_precision.fit(self.onnx_model, conf) - self.assertEqual(cm.exception.code, 0) - @unittest.skipIf(CpuInfo().bf16, 'skip since hardware support bf16') def test_on_non_enabled_host_tf(self): - conf = MixedPrecisionConfig(extra_precisions=["bf16"], backend="tensorflow") + conf = MixedPrecisionConfig() with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.tf_model, conf) self.assertEqual(cm.exception.code, 0) def test_on_non_enabled_dtype(self): # test onnx - conf = MixedPrecisionConfig(extra_precisions=["bf16"], backend="onnxrt_qlinearops") + conf = MixedPrecisionConfig() with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.onnx_model, conf) self.assertEqual(cm.exception.code, 0) - conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="tensorflow") - with self.assertRaises(SystemExit) as cm: - output_model = mix_precision.fit(self.tf_model, conf) - self.assertEqual(cm.exception.code, 0) - - class TestMixedPrecision(unittest.TestCase): @classmethod def setUpClass(self): @@ -296,7 +258,6 @@ def setUpClass(self): self.matmul_dataset = MatmulDataset() self.tf_model = build_tf_graph() self.pt_model = build_pt_model() - build_yaml() @classmethod def tearDownClass(self): @@ -304,35 +265,6 @@ def tearDownClass(self): del os.environ['FORCE_BF16'] shutil.rmtree("./saved", ignore_errors=True) shutil.rmtree("./nc_workspace", ignore_errors=True) - os.remove("test.yaml") - - def test_mixed_precision_with_evaluation(self): - from neural_compressor.experimental import common - from neural_compressor.experimental.metric.metric import ONNXRT_QL_METRICS - # test onnx - conf = MixedPrecisionConfig(extra_precisions=["fp16"], - backend="onnxrt_qlinearops") - set_workspace("./saved") - output_model = mix_precision.fit(self.onnx_model, conf) - self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - - tuning_criterion = TuningCriterion(max_trials=3, timeout=50) - conf = MixedPrecisionConfig(extra_precisions=["fp16"], - backend="onnxrt_qlinearops", - tuning_criterion=tuning_criterion) - - output_model = mix_precision.fit(self.onnx_model, - conf, - eval_dataloader=common.DataLoader(self.matmul_dataset), - eval_metric=ONNXRT_QL_METRICS["MSE"]()) - self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - - from neural_compressor.conf.config import MixedPrecision_Conf - from neural_compressor.experimental import MixedPrecision - converter = MixedPrecision(MixedPrecision_Conf('test.yaml')) - converter.model = self.onnx_model - output_model = converter.fit() - self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) def test_mixed_precision_with_eval_func(self): def eval(model): @@ -344,30 +276,10 @@ def eval2(model): del result[0] return result[0] - from neural_compressor.experimental import MixedPrecision, common from neural_compressor import conf - my_metric = Metric() - conf = MixedPrecisionConfig(extra_precisions=["fp16"], - backend="onnxrt_qlinearops") - - output_model = mix_precision.fit(self.onnx_model, - conf, - eval_dataloader=common.DataLoader(self.matmul_dataset), - eval_metric=my_metric) - self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - conf = MixedPrecisionConfig(extra_precisions=["fp16"], - backend="onnxrt_qlinearops") - - output_model = mix_precision.fit(self.onnx_model, - conf, - eval_dataloader=common.DataLoader(self.matmul_dataset), - eval_metric=common.Metric(Metric)) - self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - conf = MixedPrecisionConfig( inputs="input", outputs="final", - extra_precisions=["bf16", "fp32"], ) output_model = mix_precision.fit( @@ -376,30 +288,22 @@ def eval2(model): eval_func=eval, ) self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) - self.assertEqual(conf.extra_precisions, ['bf16', 'fp32']) self.assertEqual(conf.inputs, 'input') self.assertEqual(conf.outputs, 'final') tuning_criterion = TuningCriterion(max_trials=4, timeout=500) - conf = MixedPrecisionConfig( - backend="tensorflow", - tuning_criterion=tuning_criterion, - extra_precisions=["bf16"], - ) + conf = MixedPrecisionConfig(tuning_criterion=tuning_criterion) output_model = mix_precision.fit( - common.Model(self.tf_model), + self.tf_model, conf, eval_func=eval2, ) self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) tuning_criterion = TuningCriterion(max_trials=1, timeout=100) - conf = MixedPrecisionConfig( - inputs="input", - outputs="final, test", - tuning_criterion=tuning_criterion, - extra_precisions=["bf16", "fp32"], - ) + conf = MixedPrecisionConfig(inputs="input", + outputs="final, test", + tuning_criterion=tuning_criterion) output_model = mix_precision.fit( self.tf_model, conf, @@ -413,10 +317,7 @@ def test_mixed_precision_with_eval_func_pt(self): def eval(model): return 0.5 - conf = MixedPrecisionConfig( - extra_precisions=["bf16"], - backend="pytorch" - ) + conf = MixedPrecisionConfig() output_model = mix_precision.fit( self.pt_model, conf, diff --git a/test/model/test_model.py b/test/model/test_model.py index 8508105f193..ae322d92e09 100644 --- a/test/model/test_model.py +++ b/test/model/test_model.py @@ -4,7 +4,8 @@ import os import platform from neural_compressor.model import MODELS -import neural_compressor.model.model as NCModel +from neural_compressor.model.onnx_model import ONNXModel +from neural_compressor.model.mxnet_model import MXNetModel from neural_compressor.model.model import get_model_fwk_name from neural_compressor.experimental.common.model import Model @@ -134,7 +135,7 @@ def test_graph(self): self.assertEqual(True, isinstance(model.graph_def, tf.compat.v1.GraphDef)) def test_validate_graph_node(self): - from neural_compressor.model.model import validate_graph_node + from neural_compressor.model.tensorflow_model import validate_graph_node graph = build_graph() self.assertEqual(False, validate_graph_node(graph.as_graph_def(), [])) self.assertEqual(False, validate_graph_node(graph.as_graph_def(), ['test'])) @@ -247,6 +248,27 @@ def test_keras_saved_model(self): os.system('rm -rf simple_model') os.system('rm -rf keras_model') + def test_tf_qat_model(self): + if tf.version.VERSION < '2.3.0': + return + keras_model = build_keras() + self.assertEqual('tensorflow', get_model_fwk_name(keras_model)) + + from neural_compressor.model.tensorflow_model import TensorflowQATModel + model = TensorflowQATModel(keras_model) + assert isinstance(model.model, tf.keras.Model) + keras_model.save('./simple_model') + # load from path + model = TensorflowQATModel('./simple_model') + assert isinstance(model.model, tf.keras.Model) + + + os.makedirs('./keras_model', exist_ok=True) + model.save('./keras_model') + load_model = tf.keras.models.load_model('./keras_model') + os.system('rm -rf simple_model') + os.system('rm -rf keras_model') + @unittest.skipIf(tf.version.VERSION < '2.4.0' or platform.system().lower() == "windows", "Only supports tf 2.4.0 or above") def test_saved_model(self): ssd_resnet50_ckpt_url = 'http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz' @@ -302,7 +324,7 @@ def test_saved_model(self): def test_tensorflow(self): - from neural_compressor.model.model import TensorflowBaseModel + from neural_compressor.model.tensorflow_model import TensorflowBaseModel ori_model = build_graph() self.assertEqual('tensorflow', get_model_fwk_name(ori_model)) self.assertEqual('tensorflow', get_model_fwk_name(TensorflowBaseModel(ori_model))) @@ -346,7 +368,7 @@ def tearDownClass(self): def test_model(self): self.assertEqual('onnxruntime', get_model_fwk_name(self.cnn_export_path)) model = MODELS['onnxruntime'](self.cnn_model) - self.assertEqual(True, isinstance(model, NCModel.ONNXModel)) + self.assertEqual(True, isinstance(model, ONNXModel)) self.assertEqual(True, isinstance(model.model, onnx.ModelProto)) model.save('test.onnx') @@ -356,7 +378,7 @@ def test_model(self): class TestPyTorchModel(unittest.TestCase): def testPyTorch(self): import torchvision - from neural_compressor.model.model import PyTorchModel, PyTorchIpexModel, PyTorchFXModel + from neural_compressor.model.torch_model import PyTorchModel, IPEXModel, PyTorchFXModel ori_model = torchvision.models.mobilenet_v2() self.assertEqual('pytorch', get_model_fwk_name(ori_model)) pt_model = PyTorchModel(ori_model) @@ -365,7 +387,7 @@ def testPyTorch(self): with self.assertRaises(AssertionError): pt_model.workspace_path = './pytorch' - ipex_model = PyTorchIpexModel(ori_model) + ipex_model = IPEXModel(ori_model) self.assertTrue(ipex_model.model) ipex_model.model = ori_model ipex_model = PyTorchModel(torchvision.models.mobilenet_v2()) @@ -374,7 +396,7 @@ def testPyTorch(self): ipex_model.save('./') self.assertEqual('pytorch', get_model_fwk_name(PyTorchModel(ori_model))) - self.assertEqual('pytorch', get_model_fwk_name(PyTorchIpexModel(ori_model))) + self.assertEqual('pytorch', get_model_fwk_name(IPEXModel(ori_model))) self.assertEqual('pytorch', get_model_fwk_name(PyTorchFXModel(ori_model))) def load_mxnet_model(symbol_file, param_file): @@ -417,7 +439,7 @@ def test_model(self): import mxnet as mx self.assertEqual('mxnet', get_model_fwk_name(self.net)) model = MODELS['mxnet'](self.net) - self.assertEqual(True, isinstance(model, NCModel.MXNetModel)) + self.assertEqual(True, isinstance(model, MXNetModel)) self.assertEqual(True, isinstance(model.model, mx.gluon.HybridBlock)) model.save('./test') diff --git a/test/model/test_model_pytorch.py b/test/model/test_model_pytorch.py index a6bae08fa77..f2c62be5714 100644 --- a/test/model/test_model_pytorch.py +++ b/test/model/test_model_pytorch.py @@ -2,7 +2,8 @@ import torchvision import unittest import neural_compressor.adaptor.pytorch as nc_torch -from neural_compressor.model import MODELS +from neural_compressor.model import MODELS, Model +from neural_compressor.model.torch_model import PyTorchModel from packaging.version import Version try: @@ -23,6 +24,11 @@ class TestPytorchModel(unittest.TestCase): model = torchvision.models.quantization.resnet18() lpot_model = MODELS['pytorch'](model) + def test_Model(self): + model = torchvision.models.quantization.resnet18() + inc_model = Model(model) + self.assertTrue(isinstance(inc_model, PyTorchModel)) + def test_get_all_weight_name(self): assert len(list(self.lpot_model.get_all_weight_names())) == 62 diff --git a/test/model/test_tensorflow_auto_input_output.py b/test/model/test_tensorflow_auto_input_output.py index 28eeff06b1e..2e9776a875f 100644 --- a/test/model/test_tensorflow_auto_input_output.py +++ b/test/model/test_tensorflow_auto_input_output.py @@ -6,7 +6,7 @@ import platform from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from neural_compressor.experimental.common.model import Model as TensorflowModel -from neural_compressor.model.model import validate_graph_node +from neural_compressor.model.tensorflow_model import validate_graph_node class TestTFAutoDetectInputOutput(unittest.TestCase): mb_model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb' diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 4d22673d578..10673939388 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -1,15 +1,17 @@ -from multiprocessing.spawn import import_main_path import os import shutil import unittest +from pathlib import Path + import numpy as np import torch from neural_compressor.conf.config import NASConfig -from neural_compressor.data import DATASETS -from neural_compressor.experimental import common, NAS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.experimental.nas.dynas import DyNAS +from neural_compressor.data import Datasets +from neural_compressor.experimental import NAS, common +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ + PyTorchDataLoader + def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): fake_yaml = """ @@ -143,7 +145,7 @@ def test_basic_nas(self): self.assertTrue(len(best_model_archs) > 0) # Customized train, evaluation - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def train_func(model): @@ -197,6 +199,8 @@ def test_dynas(self): config.dynas.batch_size = 64 nas_agent = NAS(config) best_model_archs = nas_agent.search() + self.assertTrue(len(best_model_archs) > 0) + nas_agent.acc_predictor.get_parameters() nas_agent.acc_predictor.save('tmp.pickle') nas_agent.acc_predictor.load('tmp.pickle') @@ -206,11 +210,19 @@ def test_dynas(self): nas_agent.runner_validate.measure_latency(subnet_cfg) nas_agent.validation_interface.clear_csv() os.remove('tmp.pickle') - from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference - reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1) - reference.validate_macs() - reference.measure_latency() - self.assertTrue(len(best_model_archs) > 0) + + def test_vision_reference(self): + from neural_compressor.experimental.nas.dynast.dynas_utils import \ + TorchVisionReference + reference = TorchVisionReference('ofa_mbv3', dataset_path=None, batch_size=1) + macs = reference.validate_macs() + + self.assertEqual(macs, 217234208) + + reference.measure_latency( + warmup_steps=1, + measure_steps=1, + ) if __name__ == "__main__": diff --git a/test/objective/test_objective.py b/test/objective/test_objective.py index e21aa0e57bc..cd4cea35464 100644 --- a/test/objective/test_objective.py +++ b/test/objective/test_objective.py @@ -225,8 +225,8 @@ def tearDownClass(self): shutil.rmtree('./saved', ignore_errors=True) def test_performance(self): - from neural_compressor.data import DATASETS - dataset = DATASETS('tensorflow')['dummy']((100, 256, 256, 1), label=True) + from neural_compressor.data import Datasets + dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) from neural_compressor.experimental import Quantization, common from neural_compressor.utils.utility import get_size @@ -245,8 +245,8 @@ def test_performance(self): def test_model_size(self): from neural_compressor.experimental import Benchmark, common - from neural_compressor.data import DATASETS - dataset = DATASETS('tensorflow')['dummy']((100, 256, 256, 1), label=True) + from neural_compressor.data import Datasets + dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) benchmarker = Benchmark('fake_yaml_model_size.yaml') benchmarker.b_dataloader = common.DataLoader(dataset) @@ -255,8 +255,8 @@ def test_model_size(self): def test_footprint(self): from neural_compressor.experimental import Benchmark, common - from neural_compressor.data import DATASETS - dataset = DATASETS('tensorflow')['dummy']((100, 256, 256, 1), label=True) + from neural_compressor.data import Datasets + dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) benchmarker = Benchmark('fake_yaml_footprint.yaml') benchmarker.b_dataloader = common.DataLoader(dataset) diff --git a/test/pruning/test_pruning.py b/test/pruning/test_pruning.py index 3e1290e6bb7..57fdb9fd604 100644 --- a/test/pruning/test_pruning.py +++ b/test/pruning/test_pruning.py @@ -5,127 +5,71 @@ import torch import torchvision import torch.nn as nn - -from neural_compressor.config import Pruner, PruningConfig -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.training import prepare_compression - - -def build_fake_yaml(): - fake_yaml = """ - model: - name: imagenet_prune - framework: pytorch - - pruning: - approach: - weight_compression: - initial_sparsity: 0.0 - target_sparsity: 0.97 - start_epoch: 0 - end_epoch: 2 - pruners: - - !Pruner - start_epoch: 1 - end_epoch: 2 - prune_type: basic_magnitude - names: ['layer1.0.conv1.weight'] - - - !Pruner - target_sparsity: 0.6 - prune_type: basic_magnitude - update_frequency: 2 - names: ['layer1.0.conv2.weight'] - """ - with open('fake.yaml', 'w', encoding="utf-8") as f: - f.write(fake_yaml) +from neural_compressor.pruning import Pruning, WeightPruningConfig class TestPruning(unittest.TestCase): - model = torchvision.models.resnet18() - @classmethod - def setUpClass(cls): - build_fake_yaml() - - @classmethod - def tearDownClass(cls): - os.remove('fake.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - - def test_pruning(self): - pruner1 = Pruner(start_epoch=1, end_epoch=2, names=['layer1.0.conv1.weight']) - pruner2 = Pruner(target_sparsity=0.6, update_frequency=2, names=['layer1.0.conv2.weight']) - conf = PruningConfig(pruners=[pruner1, pruner2], end_epoch=2) - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - compression_manager = prepare_compression(self.model, conf) - model = compression_manager.model + def test_pruning_basic(self): + local_configs = [ + { + "op_names": ['layer1.*'], + 'target_sparsity': 0.5, + "pattern": '8x2', + "pruning_type": "magnitude_progressive" + }, + { + "op_names": ['layer2.*'], + 'target_sparsity': 0.5, + 'pattern': '2:4' + }, + { + "op_names": ['layer3.*'], + 'target_sparsity': 0.7, + 'pattern': '5x1', + "pruning_type": "snip_progressive" + } + ] + config = WeightPruningConfig( + local_configs, + target_sparsity=0.8 + ) + prune = Pruning(config) + prune.update_config(start_step=1, end_step=10) + prune.model = self.model - epochs = 2 - iters = 3 criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - for nepoch in range(epochs): - model.train() - cnt = 0 - compression_manager.callbacks.on_epoch_begin(nepoch) - for image, target in dummy_dataloader: - compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') - cnt += 1 - output = model(image) - loss = criterion(output, target) - optimizer.zero_grad() - loss.backward() - optimizer.step() - compression_manager.callbacks.on_step_end() - if cnt >= iters: - break - compression_manager.callbacks.on_epoch_end() - - model.save("./saved") - - def test_pruning_external(self): - from neural_compressor.experimental import common - from neural_compressor import Pruning - from neural_compressor.conf.config import PruningConf - pruners = [Pruner(1,3,names=['layer1.0.conv1.weight']), - Pruner(target_sparsity=0.6,update_frequency=2,names=['layer1.0.conv2.weight'])] - conf = PruningConfig(pruners) - - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) - compression_manager = prepare_compression(self.model, conf) - model = compression_manager.model - epochs = 2 - iters = 3 - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - for nepoch in range(epochs): - model.train() - cnt = 0 - compression_manager.callbacks.on_epoch_begin(nepoch) + prune.on_train_begin() + prune.update_config(pruning_frequency=4) + for epoch in range(2): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 for image, target in dummy_dataloader: - compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') - cnt += 1 - output = model(image) + prune.on_step_begin(local_step) + output = self.model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() + prune.on_before_optimizer_step() optimizer.step() - compression_manager.callbacks.on_step_end() - if cnt >= iters: - break - compression_manager.callbacks.on_epoch_end() - model.save("./saved") + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() if __name__ == "__main__": diff --git a/test/pruning/test_pruning_config.py b/test/pruning/test_pruning_config.py new file mode 100644 index 00000000000..4430affbb49 --- /dev/null +++ b/test/pruning/test_pruning_config.py @@ -0,0 +1,80 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + + +class TestPytorchPruning(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_class_config(self): + local_configs = [ + { + "op_names": ['layer1.*', 'layer2.*'], + "excluded_op_names": ['downsample.*'], + 'target_sparsity': 0.6, + "pattern": 'channelx1', + "pruning_type": "snip_progressive", + "pruning_scope": "local", + "start_step": 0, + "end_step": 10 + }, + { + "op_names": ['layer3.*'], + "pruning_type": "pattern_lock" + } + ] + config = WeightPruningConfig( + local_configs, + pruning_frequency=2, + target_sparsity=0.8, + ) + prune = Pruning(config) + prune.model = self.model + + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(12, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + + prune.on_train_begin() + prune.update_config(pruning_frequency=4) + assert prune.pruners[0].config['pruning_frequency'] == 4 + assert prune.pruners[0].config['target_sparsity'] == 0.6 + assert prune.pruners[1].config['target_sparsity'] == 0.8 + assert prune.pruners[0].config['pattern'] == "channelx1" + assert prune.pruners[1].config['pruning_type'] == 'pattern_lock' + + for epoch in range(1): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_criteria.py b/test/pruning/test_pruning_criteria.py new file mode 100644 index 00000000000..03a54d60d7c --- /dev/null +++ b/test/pruning/test_pruning_criteria.py @@ -0,0 +1,87 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + + +class TestPruningCriteria(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_criteria(self): + local_configs = [ + { + "op_names": ['layer1.*'], + 'target_sparsity': 0.4, + "pattern": '8x2', + "pruning_type": "magnitude_progressive", + "pruning_scope": "local", + "sparsity_decay_type": "cube" + }, + { + "op_names": ['layer2.*'], + 'target_sparsity': 0.45, + 'pattern': '2:4', + "pruning_type": "snip", + 'start_step': 6, + 'end_step': 6 + }, + { + "op_names": ['layer3.*'], + 'excluded_op_names': ['downsample.*'], + 'target_sparsity': 0.7, + 'pattern': '4x1', + "pruning_type": "snip_momentum_progressive", + "pruning_frequency": 4, + "min_sparsity_ratio_per_op": 0.5, + "max_sparsity_ratio_per_op": 0.8, + } + ] + config = WeightPruningConfig( + local_configs, + target_sparsity=0.8, + sparsity_decay_type="cube" + ) + prune = Pruning(config) + prune.update_config(start_step=1, end_step=10) + prune.model = self.model + + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + + prune.on_train_begin() + prune.update_config(pruning_frequency=4) + for epoch in range(2): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_patterns.py b/test/pruning/test_pruning_patterns.py new file mode 100644 index 00000000000..f5f6db91f34 --- /dev/null +++ b/test/pruning/test_pruning_patterns.py @@ -0,0 +1,83 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + + +class TestPruningPatterns(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_pattern(self): + local_configs = [ + { + "op_names": ['layer1.*'], + 'target_sparsity': 0.5, + "pattern": '5:8', + "pruning_type": "magnitude" + }, + { + "op_names": ['layer2.*'], + "pattern": '1xchannel', + "pruning_scope": "global" + }, + { + "start_step": 2, + "end_step": 20, + "op_names": ['layer3.*'], + 'target_sparsity': 0.666666, + 'pattern': '4x2', + "pruning_type": "snip_progressive", + "pruning_frequency": 5 + } + ] + config = WeightPruningConfig( + local_configs, + target_sparsity=0.8, + sparsity_decay_type="cos", + excluded_op_names=["downsample.*"], + pruning_scope="local", + min_sparsity_ratio_per_op=0.1 + ) + prune = Pruning(config) + prune.update_config(start_step=1, end_step=10) + prune.model = self.model + + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + + prune.on_train_begin() + for epoch in range(5): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_regs.py b/test/pruning/test_pruning_regs.py new file mode 100644 index 00000000000..7da5f44852f --- /dev/null +++ b/test/pruning/test_pruning_regs.py @@ -0,0 +1,98 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + +local_regs_config = [ + { + "start_step": 0, + "end_step": 10, + "pruning_type": "magnitude", + "op_names": ['layer1.*'], + "excluded_op_names": ['layer2.*'], + "pruning_scope": "global", + "target_sparsity": 0.5, + "pattern": "4x1", + "reg_type": "group_lasso", + "parameters": {'reg_coeff': 0.2} + }, + { + "start_step": 1, + "end_step": 1, + "target_sparsity": 0.5, + "pruning_type": "snip_momentum", + "pruning_frequency": 2, + "op_names": ['layer2.*'], + "pruning_scope": "local", + "target_sparsity": 0.75, + "pattern": "1x1", + "sparsity_decay_type": "exp", + "reg_type": "group_lasso", + "parameters": {'reg_coeff': 0.1} + }, + { + "start_step": 2, + "end_step": 8, + "target_sparsity": 0.1, + "pruning_type": "gradient", + "pruning_frequency": 2, + "op_names": ['fc'], + "pruning_scope": "local", + "target_sparsity": 0.75, + "pattern": "1x1", + "sparsity_decay_type": "cube", + "reg_type": "group_lasso", + "parameters": {'reg_coeff': 0.0} + } +] + +fake_snip_config = WeightPruningConfig(local_regs_config, target_sparsity=0.9, start_step=0, \ + end_step=10, pruning_frequency=1, sparsity_decay_type="exp") + + +class TestPruningRegs(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_regs(self): + prune = Pruning(fake_snip_config) + prune.update_config(start_step=1) + prune.model = self.model + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + prune.on_train_begin() + prune.update_config(pruning_frequency=1) + for epoch in range(2): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_schedulers.py b/test/pruning/test_pruning_schedulers.py new file mode 100644 index 00000000000..272b766f661 --- /dev/null +++ b/test/pruning/test_pruning_schedulers.py @@ -0,0 +1,81 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + +local_schedulers_config = [ + { + "start_step": 0, + "end_step": 2, + "pruning_type": "magnitude", + "op_names": ['layer1.*'], + "excluded_op_names": ['layer2.*'], + "pruning_scope": "global", + "target_sparsity": 0.5, + "pattern": "4x1" + }, + { + "start_step": 1, + "end_step": 10, + "target_sparsity": 0.5, + "pruning_type": "snip_momentum", + "pruning_frequency": 2, + "op_names": ['layer2.*'], + "pruning_scope": "local", + "target_sparsity": 0.75, + "pattern": "32x1", + "sparsity_decay_type": "exp" + } +] + +fake_snip_config = WeightPruningConfig(local_schedulers_config, target_sparsity=0.9, start_step=0, \ + end_step=10, pruning_frequency=1, sparsity_decay_type="exp") + + +class TestPruningCriteria(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_schedulers(self): + + prune = Pruning(fake_snip_config) + prune.update_config(start_step=1) + prune.model = self.model + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + prune.on_train_begin() + prune.update_config(pruning_frequency=1) + for epoch in range(2): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_types.py b/test/pruning/test_pruning_types.py new file mode 100644 index 00000000000..3adbc78452e --- /dev/null +++ b/test/pruning/test_pruning_types.py @@ -0,0 +1,87 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.pruning import Pruning, WeightPruningConfig + +local_types_config = [ + { + "start_step": 0, + "end_step": 0, + "pruning_type": "pattern_lock", + "op_names": ['layer1.*'], + "excluded_op_names": ['layer2.*'], + "pruning_scope": "global" + }, + { + "start_step": 1, + "end_step": 1, + "target_sparsity": 0.5, + "pruning_type": "snip_momentum_progressive", + "pruning_frequency": 2, + "op_names": ['layer2.*'], + "pruning_scope": "local", + "pattern": "4x1", + "sparsity_decay_type": "exp" + }, + { + "start_step": 2, + "end_step": 8, + "target_sparsity": 0.8, + "pruning_type": "snip_progressive", + "pruning_frequency": 1, + "op_names": ['layer3.*'], + "pruning_scope": "local", + "pattern": "16x1", + "sparsity_decay_type": "cube" + } +] + +fake_snip_config = WeightPruningConfig(local_types_config, target_sparsity=0.9, start_step=0, \ + end_step=10, pruning_frequency=3, sparsity_decay_type="exp") + + +class TestPruningTypes(unittest.TestCase): + model = torchvision.models.resnet18() + + def test_pruning_types(self): + prune = Pruning(fake_snip_config) + prune.model = self.model + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + prune.on_train_begin() + prune.update_config(pruning_frequency=1) + for epoch in range(2): + self.model.train() + prune.on_epoch_begin(epoch) + local_step = 0 + for image, target in dummy_dataloader: + prune.on_step_begin(local_step) + output = self.model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + prune.on_before_optimizer_step() + optimizer.step() + prune.on_after_optimizer_step() + prune.on_step_end() + local_step += 1 + + prune.on_epoch_end() + prune.get_sparsity_ratio() + prune.on_train_end() + prune.on_before_eval() + prune.on_after_eval() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pytorch_pruning.py b/test/pruning/test_pytorch_pruning.py deleted file mode 100644 index 73739de75ab..00000000000 --- a/test/pruning/test_pytorch_pruning.py +++ /dev/null @@ -1,203 +0,0 @@ -import os -import shutil -import unittest - -import torch -import torchvision -import torch.nn as nn - -from neural_compressor.data import DATASETS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader - - -def build_fake_yaml_basic(): - fake_snip_yaml = """ - model: - name: imagenet_prune - framework: pytorch - - pruning: - approach: - weight_compression_pytorch: - initial_sparsity: 0.0 - target_sparsity: 0.9 - start_step: 0 - end_step: 10 - excluded_names: ["classifier"] - - update_frequency_on_step: 1 - sparsity_decay_type: "exp" - pruners: - - !Pruner - start_step: 0 - sparsity_decay_type: "cos" - end_step: 10 - prune_type: "magnitude" - names: ['layer1.*'] - extra_excluded_names: ['layer2.*'] - prune_domain: "global" - pattern: "tile_pattern_4x1" - - - !Pruner - start_step: 1 - end_step: 1 - target_sparsity: 0.5 - prune_type: "snip_momentum" - update_frequency: 2 - names: ['layer2.*'] - prune_domain: local - pattern: "tile_pattern_2:4" - - - !Pruner - start_step: 2 - end_step: 8 - target_sparsity: 0.8 - prune_type: "snip" - names: ['layer3.*'] - prune_domain: "local" - pattern: "tile_pattern_16x1" - sparsity_decay_type: "cube" - - """ - with open('fake_snip.yaml', 'w', encoding="utf-8") as f: - f.write(fake_snip_yaml) - -def build_fake_yaml_channel(): - fake_channel_pruning_yaml = """ - model: - name: imagenet_prune - framework: pytorch - - pruning: - approach: - weight_compression_pytorch: - initial_sparsity: 0.0 - target_sparsity: 0.9 - start_step: 0 - end_step: 10 - excluded_names: ["classifier"] - - update_frequency_on_step: 1 - sparsity_decay_type: "exp" - pruners: - - !Pruner - start_step: 5 - end_step: 5 - prune_type: "pattern_lock" - names: ['layer1.*'] - extra_excluded_names: ['layer2.*'] - prune_domain: "global" - pattern: "channelx1" - - - !Pruner - start_step: 1 - end_step: 1 - target_sparsity: 0.5 - prune_type: "pattern_lock" - update_frequency: 2 - names: ['layer2.*'] - prune_domain: local - pattern: "2:4" - - - !Pruner - start_step: 2 - end_step: 8 - target_sparsity: 0.8 - prune_type: "snip" - names: ['layer3.*'] - prune_domain: "local" - pattern: "1xchannel" - sparsity_decay_type: "cube" - - """ - - with open('fake_channel_pruning.yaml', 'w', encoding="utf-8") as f: - f.write(fake_channel_pruning_yaml) - - -class TestPytorchPruning(unittest.TestCase): - - model = torchvision.models.resnet18() - - @classmethod - def setUpClass(cls): - build_fake_yaml_basic() - build_fake_yaml_channel() - - - @classmethod - def tearDownClass(cls): - os.remove('fake_channel_pruning.yaml') - os.remove('fake_snip.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - - def test_pytorch_pruning_basic(self): - from neural_compressor.experimental.pytorch_pruner.pruning import Pruning - - prune = Pruning("fake_snip.yaml") - ##prune.generate_pruners() - prune.update_items_for_all_pruners(start_step=1) - prune.model = self.model - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - prune.on_train_begin() - prune.update_items_for_all_pruners(update_frequency_on_step=1) - for epoch in range(2): - self.model.train() - prune.on_epoch_begin(epoch) - local_step = 0 - for image, target in dummy_dataloader: - prune.on_step_begin(local_step) - output = self.model(image) - loss = criterion(output, target) - optimizer.zero_grad() - loss.backward() - prune.on_before_optimizer_step() - optimizer.step() - prune.on_after_optimizer_step() - prune.on_step_end() - local_step += 1 - - prune.on_epoch_end() - prune.get_sparsity_ratio() - prune.on_train_end() - prune.on_before_eval() - prune.on_after_eval() - - def test_pytorch_pruner_channel_pruning(self): - from neural_compressor.experimental.pytorch_pruner.pruning import Pruning - prune = Pruning("fake_channel_pruning.yaml") - ##prune.generate_pruners() - prune.model = self.model - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - prune.on_train_begin() - for epoch in range(2): - self.model.train() - prune.on_epoch_begin(epoch) - local_step = 0 - for image, target in dummy_dataloader: - prune.on_step_begin(local_step) - output = self.model(image) - loss = criterion(output, target) - optimizer.zero_grad() - loss.backward() - prune.on_before_optimizer_step() - optimizer.step() - prune.on_after_optimizer_step() - prune.on_step_end() - local_step += 1 - - prune.on_epoch_end() - -if __name__ == "__main__": - unittest.main() - - diff --git a/test/pruning/test_gradient_sensitivity.py b/test/pruning_v1/test_gradient_sensitivity.py similarity index 99% rename from test/pruning/test_gradient_sensitivity.py rename to test/pruning_v1/test_gradient_sensitivity.py index 999fdcccc28..6d411e35b0f 100644 --- a/test/pruning/test_gradient_sensitivity.py +++ b/test/pruning_v1/test_gradient_sensitivity.py @@ -2,7 +2,7 @@ import shutil import unittest from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets import torch import torchvision @@ -206,7 +206,7 @@ def tearDownClass(cls): def test_unstructured_pruning(self): from neural_compressor.experimental import Pruning, common prune_cv = Pruning('fake_unstructured.yaml') - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) diff --git a/test/pruning/test_pattern_lock.py b/test/pruning_v1/test_pattern_lock.py similarity index 100% rename from test/pruning/test_pattern_lock.py rename to test/pruning_v1/test_pattern_lock.py diff --git a/test/pruning_v1/test_pruning.py b/test/pruning_v1/test_pruning.py new file mode 100644 index 00000000000..5871f6bcc34 --- /dev/null +++ b/test/pruning_v1/test_pruning.py @@ -0,0 +1,132 @@ +import os +import shutil +import unittest + +import torch +import torchvision +import torch.nn as nn + +from neural_compressor.config import Pruner, PruningConfig +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.training import prepare_compression + + +def build_fake_yaml(): + fake_yaml = """ + model: + name: imagenet_prune + framework: pytorch + + pruning: + approach: + weight_compression: + initial_sparsity: 0.0 + target_sparsity: 0.97 + start_epoch: 0 + end_epoch: 2 + pruners: + - !Pruner + start_epoch: 1 + end_epoch: 2 + prune_type: basic_magnitude + names: ['layer1.0.conv1.weight'] + + - !Pruner + target_sparsity: 0.6 + prune_type: basic_magnitude + update_frequency: 2 + names: ['layer1.0.conv2.weight'] + """ + with open('fake.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) + + +class TestPruning(unittest.TestCase): + + model = torchvision.models.resnet18() + + @classmethod + def setUpClass(cls): + build_fake_yaml() + + @classmethod + def tearDownClass(cls): + os.remove('fake.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_pruning(self): + pruner1 = Pruner(start_epoch=1, end_epoch=2, names=['layer1.0.conv1.weight']) + pruner2 = Pruner(target_sparsity=0.6, update_frequency=2, names=['layer1.0.conv2.weight']) + conf = PruningConfig(pruners=[pruner1, pruner2], end_epoch=2) + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + compression_manager = prepare_compression(self.model, conf) + model = compression_manager.model + + epochs = 2 + iters = 3 + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + for nepoch in range(epochs): + model.train() + cnt = 0 + compression_manager.callbacks.on_epoch_begin(nepoch) + for image, target in dummy_dataloader: + compression_manager.callbacks.on_step_begin(cnt) + print('.', end='') + cnt += 1 + output = model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + optimizer.step() + compression_manager.callbacks.on_step_end() + if cnt >= iters: + break + compression_manager.callbacks.on_epoch_end() + + model.save("./saved") + + def test_pruning_external(self): + from neural_compressor.experimental import common + from neural_compressor import Pruning + from neural_compressor.conf.config import PruningConf + pruners = [Pruner(1,3,names=['layer1.0.conv1.weight']), + Pruner(target_sparsity=0.6,update_frequency=2,names=['layer1.0.conv2.weight'])] + conf = PruningConfig(pruners) + + datasets = Datasets('pytorch') + dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + compression_manager = prepare_compression(self.model, conf) + model = compression_manager.model + + epochs = 2 + iters = 3 + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + for nepoch in range(epochs): + model.train() + cnt = 0 + compression_manager.callbacks.on_epoch_begin(nepoch) + for image, target in dummy_dataloader: + compression_manager.callbacks.on_step_begin(cnt) + print('.', end='') + cnt += 1 + output = model(image) + loss = criterion(output, target) + optimizer.zero_grad() + loss.backward() + optimizer.step() + compression_manager.callbacks.on_step_end() + if cnt >= iters: + break + compression_manager.callbacks.on_epoch_end() + model.save("./saved") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/pruning/test_pruning_group_lasso.py b/test/pruning_v1/test_pruning_group_lasso.py similarity index 98% rename from test/pruning/test_pruning_group_lasso.py rename to test/pruning_v1/test_pruning_group_lasso.py index 0f129874e95..73c9c4d70b6 100644 --- a/test/pruning/test_pruning_group_lasso.py +++ b/test/pruning_v1/test_pruning_group_lasso.py @@ -6,7 +6,7 @@ import torchvision import torch.nn as nn -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader def build_fake_yaml(): diff --git a/test/pruning/test_pruning_pattern.py b/test/pruning_v1/test_pruning_pattern.py similarity index 100% rename from test/pruning/test_pruning_pattern.py rename to test/pruning_v1/test_pruning_pattern.py diff --git a/test/pruning/test_pruning_pure_yaml.py b/test/pruning_v1/test_pruning_pure_yaml.py similarity index 98% rename from test/pruning/test_pruning_pure_yaml.py rename to test/pruning_v1/test_pruning_pure_yaml.py index 312b1d8cefb..b8b19dd36db 100644 --- a/test/pruning/test_pruning_pure_yaml.py +++ b/test/pruning_v1/test_pruning_pure_yaml.py @@ -6,7 +6,7 @@ import torchvision import torch.nn as nn -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader def build_fake_yaml(): diff --git a/test/pruning/test_tensorflow_distributed_pruning.py b/test/pruning_v1/test_tensorflow_distributed_pruning.py similarity index 100% rename from test/pruning/test_tensorflow_distributed_pruning.py rename to test/pruning_v1/test_tensorflow_distributed_pruning.py diff --git a/test/pruning/test_tensorflow_pruning.py b/test/pruning_v1/test_tensorflow_pruning.py similarity index 97% rename from test/pruning/test_tensorflow_pruning.py rename to test/pruning_v1/test_tensorflow_pruning.py index 5b16065df10..0afdecd66c0 100644 --- a/test/pruning/test_tensorflow_pruning.py +++ b/test/pruning_v1/test_tensorflow_pruning.py @@ -378,7 +378,9 @@ def test_create_train_func1(self): 'workspace_path': './nc_workspace/', 'q_dataloader': None, 'inputs': [], - 'outputs': []}) + 'outputs': [], + 'format': 'default', + 'backend': 'default'}) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) @@ -400,7 +402,9 @@ def test_create_train_func2(self): 'workspace_path': './nc_workspace/', 'q_dataloader': None, 'inputs': [], - 'outputs': []}) + 'outputs': [], + 'format': 'default', + 'backend': 'default'}) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) diff --git a/test/pruning/test_tensorflow_pruning_utility.py b/test/pruning_v1/test_tensorflow_pruning_utility.py similarity index 100% rename from test/pruning/test_tensorflow_pruning_utility.py rename to test/pruning_v1/test_tensorflow_pruning_utility.py diff --git a/test/quantization/test_quantization.py b/test/quantization/test_quantization.py index 9a06f9e70dc..bfd2a9e11c2 100644 --- a/test/quantization/test_quantization.py +++ b/test/quantization/test_quantization.py @@ -197,8 +197,8 @@ def build_fake_strategy(): "from collections import OrderedDict \n", "from .strategy import strategy_registry, TuneStrategy \n", "from ..utils import logger \n", - "from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", - "from .st_utils.tuning_structs import OpTuningConfig \n", + "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", + "from .utils.tuning_structs import OpTuningConfig \n", "import copy \n", "@strategy_registry \n", "class FakeTuneStrategy(TuneStrategy): \n", @@ -234,7 +234,7 @@ def build_fake_strategy(): " for calib_sampling_size in calib_sampling_size_lst: \n", " # step1. collect the ops that support static and dynamic \n", " quant_mode_wise_items = OrderedDict() \n", - " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", + " query_order = ['static', 'dynamic', 'bf16', 'fp32'] \n", " pre_items = set() \n", " for quant_mode in query_order: \n", " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", @@ -342,13 +342,13 @@ def test_resume(self): def test_autodump(self): # test auto_dump using old api - from neural_compressor.quantization import Quantization + from neural_compressor.experimental import Quantization, common quantizer = Quantization('fake_yaml3.yaml') dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) - dataloader = quantizer.dataloader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph - output_graph = quantizer(self.constant_graph, \ - q_dataloader=dataloader, eval_dataloader=dataloader) + output_graph = quantizer.fit() self.assertNotEqual(output_graph, None) def test_performance_only(self): diff --git a/test/quantization/test_tensorflow_qat.py b/test/quantization/test_tensorflow_qat.py index 48bfe4ac8d3..fcc7dd79db5 100644 --- a/test/quantization/test_tensorflow_qat.py +++ b/test/quantization/test_tensorflow_qat.py @@ -3,54 +3,6 @@ import yaml import shutil -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - - device: cpu - quantization: - approach: quant_aware_training - evaluation: - accuracy: - metric: - Accuracy: {} - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: - yaml.dump(y, f) - f.close() - - -def build_fake_yaml_by_train(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - - device: cpu - quantization: - approach: quant_aware_training - train: - optimizer: - SGD: - learning_rate: 0.1 - criterion: - CrossEntropyLoss: - reduction: none - evaluation: - accuracy: - metric: - Accuracy: {} - ''' - - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_train.yaml', "w", encoding="utf-8") as f: - yaml.dump(y, f) - f.close() - - def train_func(): import tensorflow as tf from tensorflow import keras @@ -89,46 +41,7 @@ def train_func(): print('Baseline test accuracy:', baseline_model_accuracy) model.save("baseline_model") - - -def q_func(model): - import tensorflow as tf - from tensorflow import keras - mnist = keras.datasets.mnist - (train_images, train_labels), (test_images, test_labels) = mnist.load_data() - - # Normalize the input image so that each pixel value is between 0 to 1. - train_images = train_images / 255.0 - test_images = test_images / 255.0 - - model = tf.keras.models.load_model("baseline_model") - - import tensorflow_model_optimization as tfmot - quantize_model = tfmot.quantization.keras.quantize_model - - # q_aware stands for for quantization aware. - q_aware_model = quantize_model(model) - - # `quantize_model` requires a recompile. - q_aware_model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True), - metrics=['accuracy']) - - train_images_subset = train_images[0:1000] # out of 60000 - train_labels_subset = train_labels[0:1000] - - q_aware_model.fit(train_images_subset, train_labels_subset, - batch_size=500, epochs=1, validation_split=0.1) - - _, q_aware_model_accuracy = q_aware_model.evaluate( - test_images, test_labels, verbose=0) - - print('Quant test accuracy:', q_aware_model_accuracy) - q_aware_model.save("trained_qat_model") - return 'trained_qat_model' - - + class Dataset(object): def __init__(self, batch_size=100): import tensorflow as tf @@ -148,31 +61,53 @@ def __len__(self): def __getitem__(self, idx): return self.test_images[idx], self.test_labels[idx] - class TestTensorflowQAT(unittest.TestCase): import tensorflow as tf @classmethod def setUpClass(self): - build_fake_yaml() train_func() - build_fake_yaml_by_train() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') shutil.rmtree('baseline_model',ignore_errors=True) shutil.rmtree('trained_qat_model',ignore_errors=True) - os.remove('fake_yaml_train.yaml') @unittest.skipIf(tf.version.VERSION < '2.3.0', " keras model need tensorflow version >= 2.3.0, so the case is skipped") - def test_qat_with_external_q_func(self): - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - quantizer.eval_dataloader = common.DataLoader(Dataset()) - quantizer.model = './baseline_model' - quantizer.q_func = q_func - quantizer.fit() + def test_qat(self): + import tensorflow as tf + from tensorflow import keras + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + from neural_compressor import training, QuantizationAwareTrainingConfig + config = QuantizationAwareTrainingConfig() + compression_manager = training.prepare_compression('./baseline_model', config) + compression_manager.callbacks.on_train_begin() + + q_aware_model = compression_manager.model + # `quantize_model` requires a recompile. + q_aware_model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['accuracy']) + + train_images_subset = train_images[0:1000] # out of 60000 + train_labels_subset = train_labels[0:1000] + + q_aware_model.fit(train_images_subset, train_labels_subset, + batch_size=500, epochs=1, validation_split=0.1) + + _, q_aware_model_accuracy = q_aware_model.evaluate( + test_images, test_labels, verbose=0) + + print('Quant test accuracy:', q_aware_model_accuracy) + compression_manager.callbacks.on_train_end() + compression_manager.save("trained_qat_model") if __name__ == '__main__': unittest.main() diff --git a/test/requirements.txt b/test/requirements.txt index 30712c4bafb..32535567cc6 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -13,8 +13,6 @@ pillow>=8.2.0 transformers<=4.12.3; python_version < '3.10' transformers==4.16.0; python_version == '3.10' tensorflow_model_optimization -sigopt -hyperopt horovod tensorflow-addons onnxruntime-extensions; python_version < '3.10' diff --git a/test/scheduler/test_oneshot.py b/test/scheduler/test_oneshot.py index f06486b60ba..484a7449c13 100644 --- a/test/scheduler/test_oneshot.py +++ b/test/scheduler/test_oneshot.py @@ -9,10 +9,10 @@ import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor.conf.config import DistillationConf, PruningConf -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.experimental.scheduler import Scheduler -from neural_compressor.training import fit, prepare +from neural_compressor.training import prepare_compression from neural_compressor.utils.pytorch import load from neural_compressor.utils import logger from packaging.version import Version @@ -189,7 +189,7 @@ def tearDownClass(cls): def test_prune_qat_oneshot(self): from neural_compressor.experimental import Pruning, Quantization - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) q_model = copy.deepcopy(self.q_model) @@ -204,8 +204,7 @@ def train_func_for_nc(model): iters = 3 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') - torch.quantization.prepare_qat(model, inplace=True) + combination.on_train_begin() for nepoch in range(epochs): model.train() cnt = 0 @@ -234,9 +233,9 @@ def train_func_for_nc(model): logger.info(20*'=' + 'test_prune_qat_oneshot' + 20*'=') try: - conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() + conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight + conv_weight = opt_model.model.layer1[0].conv1.weight self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) @@ -244,14 +243,14 @@ def train_func_for_nc(model): # reloading int8 model reloaded_model = load('./saved', self.q_model) try: - reloaded_conv_weight = reloaded_model.layer1[0].conv1.weight().dequantize() + reloaded_conv_weight = reloaded_model.layer1[0].conv1.weight().dequantize() except: - reloaded_conv_weight = reloaded_model.layer1[0].conv1.weight + reloaded_conv_weight = reloaded_model.layer1[0].conv1.weight self.assertEqual(reloaded_conv_weight.sum().item(), conv_weight.sum().item()) def test_distillation_qat_oneshot(self): from neural_compressor.experimental import Distillation, Quantization - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) @@ -268,8 +267,6 @@ def train_func_for_nc(model): iters = 3 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') - torch.quantization.prepare_qat(model, inplace=True) combination.on_train_begin() for nepoch in range(epochs): model.train() @@ -303,71 +300,70 @@ def train_func_for_nc(model): # reloading int8 model reloaded_model = load('./saved', self.q_model) - def test_distillation_prune_oneshot(self): - datasets = DATASETS('pytorch') + def test_distillation_prune_oneshot_with_new_API(self): + from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig + from neural_compressor.config import Pruner, PruningConfig + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) - d_conf = DistillationConf('./fake3.yaml') - p_conf = PruningConf('./fake.yaml') - callbacks, model = prepare( - [d_conf, p_conf], model=model, teacher_model=copy.deepcopy(model) - ) + distillation_criterion = KnowledgeDistillationLossConfig(loss_types=['CE', 'KL']) + d_conf = DistillationConfig(copy.deepcopy(self.model), distillation_criterion) + pruner1 = Pruner(start_epoch=1, end_epoch=3, names=['layer1.0.conv1.weight']) + pruner2 = Pruner(target_sparsity=0.6, update_frequency=2, names=['layer1.0.conv2.weight']) + p_conf = PruningConfig(pruners=[pruner1, pruner2], end_epoch=3) + + compression_manager = prepare_compression(model=model, confs=[d_conf, p_conf]) def train_func_for_nc(model): epochs = 3 iters = 3 criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - callbacks.on_train_begin() + optimizer = torch.optim.SGD(model.parameters(), + lr=0.001, + momentum=0.1, + nesterov=True, + weight_decay=0.001) + compression_manager.callbacks.on_train_begin() for nepoch in range(epochs): model.train() cnt = 0 - callbacks.on_epoch_begin(nepoch) + compression_manager.callbacks.on_epoch_begin(nepoch) for image, target in dummy_dataloader: - callbacks.on_step_begin(cnt) + compression_manager.callbacks.on_step_begin(cnt) print('.', end='') cnt += 1 output = model(image) loss = criterion(output, target) - loss = callbacks.on_after_compute_loss(image, output, loss) + loss = compression_manager.callbacks.on_after_compute_loss(image, output, loss) optimizer.zero_grad() loss.backward() - callbacks.on_before_optimizer_step() + compression_manager.callbacks.on_before_optimizer_step() optimizer.step() - callbacks.on_step_end() + compression_manager.callbacks.on_step_end() if cnt >= iters: break - callbacks.on_epoch_end() - callbacks.on_train_end() + compression_manager.callbacks.on_epoch_end() + compression_manager.callbacks.on_train_end() return model - def eval_func(model): - for image, target in dummy_dataloader: - model(image) - return 1 # metric is 1 here, just for unit test - - opt_model = fit( - model, callbacks, train_func=train_func_for_nc, - eval_func=eval_func - ) + train_func_for_nc(model) print(20*'=' + 'test_distillation_prune_oneshot' + 20*'=') - print(opt_model.model) try: - conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() + conv_weight = model.layer1[0].conv1.weight().dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight + conv_weight = model.layer1[0].conv1.weight self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) self.assertEqual( - callbacks.component.components[0].__repr__().lower(), - 'combination of distillation,pruning' + compression_manager.callbacks.callbacks.combination, + ['Distillation', 'Pruning'] ) def test_prune_qat_distillation_oneshot(self): from neural_compressor.experimental import Pruning, Quantization, Distillation - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) @@ -385,8 +381,6 @@ def train_func_for_nc(model): iters = 3 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') - torch.quantization.prepare_qat(model, inplace=True) combination.on_train_begin() for nepoch in range(epochs): model.train() @@ -427,7 +421,7 @@ def train_func_for_nc(model): def test_prune_qat_oneshot_fx(self): from neural_compressor.experimental import Pruning, Quantization - datasets = DATASETS('pytorch_fx') + datasets = Datasets('pytorch_fx') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune = Pruning('./fx_fake.yaml') @@ -485,7 +479,7 @@ def train_func_for_nc(model): "requires higher version of torch than 1.9.0") def test_distillation_qat_oneshot_fx(self): from neural_compressor.experimental import Distillation, Quantization - datasets = DATASETS('pytorch_fx') + datasets = Datasets('pytorch_fx') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = DynamicControlModel() @@ -501,8 +495,7 @@ def train_func_for_nc(model): iters = 3 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - combination.on_train_begin(dummy_dataloader) - model = combination.model.model + combination.on_train_begin() for nepoch in range(epochs): model.train() cnt = 0 @@ -539,7 +532,7 @@ def train_func_for_nc(model): def test_distillation_prune_oneshot_fx(self): from neural_compressor.experimental import Distillation, Pruning - datasets = DATASETS('pytorch_fx') + datasets = Datasets('pytorch_fx') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) distiller = Distillation('./fx_fake3.yaml') @@ -597,7 +590,7 @@ def train_func_for_nc(model): "requires higher version of torch than 1.9.0") def test_prune_qat_distillation_oneshot_fx(self): from neural_compressor.experimental import Pruning, Quantization, Distillation - datasets = DATASETS('pytorch_fx') + datasets = Datasets('pytorch_fx') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) @@ -614,8 +607,7 @@ def train_func_for_nc(model): iters = 3 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - combination.on_train_begin(dummy_dataloader) - model = combination.model.model + combination.on_train_begin() for nepoch in range(epochs): model.train() cnt = 0 diff --git a/test/scheduler/test_scheduler.py b/test/scheduler/test_scheduler.py index 7656cfb7c5a..ed1f0698f5a 100644 --- a/test/scheduler/test_scheduler.py +++ b/test/scheduler/test_scheduler.py @@ -7,7 +7,7 @@ import torch.nn as nn import neural_compressor.adaptor.pytorch as nc_torch -from neural_compressor.data import DATASETS +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.experimental.scheduler import Scheduler from packaging.version import Version @@ -325,7 +325,7 @@ def test_pruning(self): prune = Pruning('fake.yaml') scheduler = Scheduler() scheduler.model = self.model - datasets = DATASETS('pytorch') + datasets = Datasets('pytorch') dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 845e9b0ccae..15b6c4ff1cd 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -155,7 +155,7 @@ def build_fake_model(): tf.import_graph_def(graph_def, name='') return graph -class TestQuantization(unittest.TestCase): +class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -217,6 +217,20 @@ def test_run_basic_max_trials_multimetric_weight(self): quantizer.model = self.constant_graph quantizer.fit() + + def test_run_basic_one_trial_new_api(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + conf = PostTrainingQuantConfig() + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + self.assertIsNotNone(q_model) if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py new file mode 100644 index 00000000000..c3858063a1a --- /dev/null +++ b/test/strategy/test_hawq_v2_2.x.py @@ -0,0 +1,60 @@ +"""Tests for HAWQ v2 strategy""" + +import copy +import shutil +import unittest + +from neural_compressor.utils import logger + +# loss function for hawq-v2 +def hawq_v2_loss(output, target): + import torch + return torch.nn.CrossEntropyLoss()(output, target) + +class TestHAWQV2TuningStrategy(unittest.TestCase): + + @classmethod + def setUpClass(self): + import torchvision + self.model = torchvision.models.resnet18() + + @classmethod + def tearDownClass(self): + shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + + def test_hawq_v2_pipeline(self): + logger.info("*** Test: HAWQ v2 with pytorch model.") + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # model + model = copy.deepcopy(self.model) + + # fake evaluation function + self.test_hawq_v2_pipeline_fake_acc = 0 + def _fake_eval(model): + self.test_hawq_v2_pipeline_fake_acc -= 1 + return self.test_hawq_v2_pipeline_fake_acc + + # dataset and dataloader + dataset = Datasets("pytorch")["dummy"](((1, 3, 224, 224))) + dataloader = DATALOADERS["pytorch"](dataset) + + #tuning and accuracy criterion + strategy_kwargs = {'hawq_v2_loss': hawq_v2_loss} + tuning_criterion = TuningCriterion(strategy='hawq_v2', strategy_kwargs=strategy_kwargs, max_trials=5) + conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) + + # fit + q_model = fit(model=model, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval) + self.assertIsNone(q_model) + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_mse_v2.py b/test/strategy/test_mse_v2.py new file mode 100644 index 00000000000..e28adba79ce --- /dev/null +++ b/test/strategy/test_mse_v2.py @@ -0,0 +1,152 @@ +import copy +import os +import shutil +import unittest +import tensorflow as tf +import numpy as np +import torchvision +from neural_compressor.experimental import Quantization, common + + +def build_mse_yaml_tf(): + mse_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse_v2 + accuracy_criterion: + relative: 0.01 + exit_policy: + max_trials: 10 + timeout: 3600 + random_seed: 9527 + ''' + with open('mse_yaml_tf.yaml', 'w', encoding="utf-8") as f: + f.write(mse_yaml) + +def build_mse_yaml_pytorch(): + mse_yaml = ''' + model: + name: resnet18 + framework: pytorch_fx + + tuning: + strategy: + name: mse_v2 + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + ''' + with open('mse_yaml_pytorch.yaml', 'w', encoding="utf-8") as f: + f.write(mse_yaml) + +def build_fake_model(): + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) + last_identity = tf.identity(op2, name='op2_to_store') + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') + last_identity = tf.identity(op2, name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph +class Test_MSEV2Strategy_Tensorflow(unittest.TestCase): + @classmethod + def setUpClass(self): + build_mse_yaml_tf() + self.model = build_fake_model() + + @classmethod + def tearDownClass(self): + os.remove('mse_yaml_tf.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + def test_quantization_saved(self): + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): + # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] + i[0] += 1 + return eval_list[i[0]] + + quantizer = Quantization('mse_yaml_tf.yaml') + + quantizer.model = self.model + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.eval_func = fake_eval_func + q_model = quantizer.fit() + self.assertIsNotNone(q_model) + q_model.save('./saved') + +class Test_MSEV2Strategy_PyTorch(unittest.TestCase): + @classmethod + def setUpClass(self): + build_mse_yaml_pytorch() + self.model = torchvision.models.resnet18() + + @classmethod + def tearDownClass(self): + os.remove('mse_yaml_pytorch.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + def test_quantization_saved(self): + i = [0] + def fake_eval_func(model): + acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] + + i[0] += 1 + return acc_lst[i[0]] + + model = copy.deepcopy(self.model) + quantizer = Quantization('mse_yaml_pytorch.yaml') + dataset = quantizer.dataset('dummy', (1, 3, 224, 224)) + quantizer.model = model + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_func = fake_eval_func + q_model = quantizer.fit() + self.assertIsNotNone(q_model) + q_model.save('./saved') + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_mse_v2_2.x.py b/test/strategy/test_mse_v2_2.x.py new file mode 100644 index 00000000000..6f33aa14e1d --- /dev/null +++ b/test/strategy/test_mse_v2_2.x.py @@ -0,0 +1,141 @@ +import copy +import os +import shutil +import unittest +import tensorflow as tf +import numpy as np +import torchvision + +def build_fake_model(): + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) + last_identity = tf.identity(op2, name='op2_to_store') + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') + last_identity = tf.identity(op2, name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +class Test_MSEV2Strategy(unittest.TestCase): + @classmethod + def setUpClass(self): + self.tf_model = build_fake_model() + self.torch_model = torchvision.models.resnet18() + + @classmethod + def tearDownClass(self): + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + def test_quantization_saved_tf(self): + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): + # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] + i[0] += 1 + return eval_list[i[0]] + + from neural_compressor.quantization import fit + from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS['tensorflow'](dataset) + + conf = PostTrainingQuantConfig( + approach="static", + optimization_level=1, + tuning_criterion=TuningCriterion(strategy="mse_v2")) + + q_model = fit( + model=self.tf_model, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=fake_eval_func) + self.assertIsNotNone(q_model) + + def test_quantization_saved_tf_with_confidence_batches(self): + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): + # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] + i[0] += 1 + return eval_list[i[0]] + + from neural_compressor.quantization import fit + from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS['tensorflow'](dataset) + + conf = PostTrainingQuantConfig( + approach="static", + optimization_level=1, + tuning_criterion=TuningCriterion( + strategy="mse_v2", + strategy_kwargs={ + "confidence_batches": 5, + })) + + q_model = fit( + model=self.tf_model, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=fake_eval_func) + self.assertIsNotNone(q_model) + + def test_quantization_saved_torch(self): + i = [0] + def fake_eval_func(model): + acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] + i[0] += 1 + return acc_lst[i[0]] + + from neural_compressor.quantization import fit + from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("pytorch")["dummy"](((1, 3, 224, 224))) + dataloader = DATALOADERS['pytorch'](dataset) + + conf = PostTrainingQuantConfig( + approach="static", + optimization_level=1, + tuning_criterion=TuningCriterion(strategy="mse_v2")) + + q_model = fit( + model=self.torch_model, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=fake_eval_func) + self.assertIsNotNone(q_model) + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_optimization_level_2.x.py b/test/strategy/test_optimization_level_2.x.py new file mode 100644 index 00000000000..d3077bd45d3 --- /dev/null +++ b/test/strategy/test_optimization_level_2.x.py @@ -0,0 +1,150 @@ +"""Tests for optimization level & conservative strategy""" + +import shutil +import unittest +import time + +import numpy as np + +from neural_compressor.utils import logger + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) + last_identity = tf.identity(op2, name='op2_to_store') + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') + last_identity = tf.identity(op2, name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + + +class TestOptimizationLevel(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + + @classmethod + def tearDownClass(self): + shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + def test_tf_opt_level_0(self): + logger.info("*** Test: optimization level 0 with tensorflow model.") + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + + # fake evaluation function + def _fake_eval(model): + return 1 + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + optimization_level = 0 + conf = PostTrainingQuantConfig(optimization_level=0) + + # fit + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader= dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval) + self.assertIsNotNone(q_model) + + def test_tf_opt_level_1(self): + logger.info("*** Test: optimization level 1 with tensorflow model.") + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + + # fake evaluation function + self._fake_acc = 10 + def _fake_eval(model): + self._fake_acc -= 1 + return self._fake_acc + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + conf = PostTrainingQuantConfig() + + # fit + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader= dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval) + self.assertIsNone(q_model) + + def test_pt_opt_level_0(self): + logger.info("*** Test: optimization level 0 with pytorch model.") + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + import torchvision + + # model + resnet18 = torchvision.models.resnet18() + + # fake evaluation function + acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3] + perf_lst = [2.0, 1.5, 1.0, 0.5, 0.1] + self.test_pt_opt_level_0_index = -1 + def _fake_eval(model): + self.test_pt_opt_level_0_index += 1 + perf = perf_lst[self.test_pt_opt_level_0_index] + time.sleep(perf) + return acc_lst[self.test_pt_opt_level_0_index] + + # dataset and dataloader + dataset = Datasets("pytorch")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["pytorch"](dataset) + + # tuning and accuracy criterion + optimization_level = 0 + conf = PostTrainingQuantConfig(optimization_level=optimization_level) + + # fit + q_model = fit(model=resnet18, + conf=conf, + calib_dataloader= dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval) + self.assertIsNotNone(q_model) + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index ce7a7669862..1b78167edb7 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -104,7 +104,7 @@ def build_fake_model(): return graph @unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") -class TestQuantization(unittest.TestCase): +class TestSigoptTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -140,6 +140,28 @@ def test_run_basic_max_trials(self): quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() + + def test_run_sigopt_one_trial_new_api(self): + from neural_compressor.quantization import fit + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + accuracy_criterion = AccuracyCriterion(criterion='relative') + strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', + 'sigopt_project_id': 'sigopt_project_id_test', + 'sigopt_experiment_name': 'nc-tune'} + tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig(approach="static", backend="tensorflow", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + self.assertEqual(conf.strategy_kwargs, strategy_kwargs) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_sampler.py b/test/strategy/test_tuning_sampler.py index 786c3e0cf19..51310ebaea2 100644 --- a/test/strategy/test_tuning_sampler.py +++ b/test/strategy/test_tuning_sampler.py @@ -1,7 +1,7 @@ -from neural_compressor.strategy.st_utils.tuning_sampler import OpTypeWiseTuningSampler, ModelWiseTuningSampler -from neural_compressor.strategy.st_utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler -from neural_compressor.strategy.st_utils.tuning_structs import OpTuningConfig -from neural_compressor.strategy.st_utils.tuning_space import TuningSpace +from neural_compressor.strategy.utils.tuning_sampler import OpTypeWiseTuningSampler, ModelWiseTuningSampler +from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.strategy.utils.tuning_space import TuningSpace from collections import OrderedDict from copy import deepcopy import unittest @@ -170,7 +170,7 @@ def test_tuning_sampler(self): op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ['static', 'dynamic', 'bf16', 'fp32'] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) diff --git a/test/strategy/test_tuning_space.py b/test/strategy/test_tuning_space.py index d7be2c4ac76..6f195a3d195 100644 --- a/test/strategy/test_tuning_space.py +++ b/test/strategy/test_tuning_space.py @@ -1,5 +1,6 @@ -from neural_compressor.strategy.st_utils.tuning_space import TuningItem, TuningSpace +from neural_compressor.strategy.utils.tuning_space import TuningItem, TuningSpace from neural_compressor.conf.dotdict import DotDict +from neural_compressor.utils import logger from copy import deepcopy import unittest @@ -160,6 +161,28 @@ } +op_cap2 = { + # The granularity of op activation do not support per_tensor. + ('op_name4', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + },] +} + + class TestTuningSampler(unittest.TestCase): def setUp(self) -> None: self.capability = { @@ -189,11 +212,38 @@ def setUp(self) -> None: } } + self.op_wise_user_config2 = { + 'op_name4': { + 'activation': { + 'granularity': ['per_tensor'], + } + } + } + + self.capability2 = { + 'calib': {'calib_sampling_size': [1, 10]}, + 'op': deepcopy(op_cap2) + } + + def test_tuning_space_merge_op_wise_not_exist(self): + # op-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'op_wise': deepcopy(self.op_wise_user_config2), + } + } + } + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + + def test_tuning_space_creation(self): conf = None # Test the creation of tuning space tuning_space = TuningSpace(self.capability, conf) - print(tuning_space.root_item.get_details()) + logger.debug(tuning_space.root_item.get_details()) # ops supported static static_items = tuning_space.query_items_by_quant_mode('static') static_items_name = [item.name for item in static_items] @@ -227,7 +277,7 @@ def test_tuning_space_merge_model_wise(self): } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - print(tuning_space2.root_item.get_details()) + logger.debug(tuning_space2.root_item.get_details()) found_per_tensor = False for quant_mode in ['static', 'dynamic']: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): @@ -249,7 +299,7 @@ def test_tuning_space_merge_optype_wise(self): } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - print(tuning_space2.root_item.get_details()) + logger.debug(tuning_space2.root_item.get_details()) found_act_algo_kl_optype1 = False found_act_algo_kl_others = False for quant_mode in ['static', 'dynamic']: @@ -276,7 +326,7 @@ def test_tuning_space_merge_op_wise(self): } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - print(tuning_space2.root_item.get_details()) + logger.debug(tuning_space2.root_item.get_details()) found_quant_op_name4 = False found_fp32_op_name4 = False for quant_mode in ['static', 'dynamic']: @@ -306,7 +356,7 @@ def test_tuning_space_merge_model_wise_and_opty_wise(self): # the optype_wise config will overwrite the model-wise config conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - print(tuning_space2.root_item.get_details()) + logger.debug(tuning_space2.root_item.get_details()) found_per_tensor = False for quant_mode in ['static', 'dynamic']: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): diff --git a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py index 09a595be4a9..e5402c910fa 100644 --- a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py @@ -348,7 +348,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) diff --git a/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py b/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py index 3c558410c86..31dd69118b1 100644 --- a/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py @@ -217,7 +217,7 @@ def test_conv2d_biasadd_elu_fusion(self): self.assertNotEqual(output_graph, None) elu_fused = False for node in output_graph.graph_def.node: - if node.name == 'conv_eightbit_requantize': + if node.name == 'conv_eightbit_requantize_dequantize': if b'Elu' in node.attr['fused_ops'].list.s: elu_fused = True self.assertEqual(elu_fused, True) diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py index d99a48c1803..267a5627fd4 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py @@ -135,7 +135,7 @@ def test_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) self.assertEqual(qbn_num, 1) - self.assertEqual(dq_num, 1) + self.assertEqual(dq_num, 0) @disable_random() def test_training_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): @@ -174,7 +174,7 @@ def test_training_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): dq_num += 1 self.assertEqual(bn_num, 1) self.assertEqual(qbn_num, 0) - self.assertEqual(dq_num, 1) + self.assertEqual(dq_num, 0) bf16_enabled = bool(CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') if bf16_enabled: self.assertEqual(bf16_bn_num, 1) @@ -231,7 +231,7 @@ def test_bn_leakyrelu_conv_biasadd_relu(self): self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) self.assertEqual(qbn_num, 1) - self.assertEqual(dq_num, 1) + self.assertEqual(dq_num, 0) self.assertEqual(is_offset_const, True) self.assertEqual(is_mean_const, True) self.assertEqual(round(qbn_alpha, 7), 0.3) @@ -289,7 +289,7 @@ def test_bn_relu_conv_biasadd_relu(self): self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) self.assertEqual(qbn_num, 1) - self.assertEqual(dq_num, 1) + self.assertEqual(dq_num, 0) self.assertEqual(is_offset_const, True) self.assertEqual(is_mean_const, True) self.assertEqual(frozen_qbn_output_max, qbn_output_max_name) diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py index a5a98c63358..55a5a756018 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py @@ -204,7 +204,7 @@ def test_conv3d_relu6_fusion(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv3D': found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Requantize']): + if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Dequantize']): found_requantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_requantize_fusion, True) @@ -479,7 +479,7 @@ def test_conv3d_leakyrelu_fusion(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'LeakyRelu', b'Requantize']: + i.attr['fused_ops'].list.s == [b'BiasAdd', b'LeakyRelu', b'Dequantize']: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py index 981bdbee29a..cb25dffd52b 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py @@ -317,7 +317,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) diff --git a/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py b/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py index 9f4b1d69ba6..88f249ba522 100644 --- a/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py +++ b/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py @@ -11,7 +11,7 @@ from neural_compressor import data from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader from neural_compressor.data.dataloaders.dataloader import DataLoader -from neural_compressor.data import DATASETS, DATALOADERS, TRANSFORMS +from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS from neural_compressor.utils import logger from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 diff --git a/test/ux/utils/test_utils.py b/test/ux/utils/test_utils.py index 19b10459290..172eaa82387 100644 --- a/test/ux/utils/test_utils.py +++ b/test/ux/utils/test_utils.py @@ -308,14 +308,14 @@ def test_load_common_model_wise_params(self) -> None: "weight": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax"], "bit": 7.0, }, "activation": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax", "kl"], }, }, @@ -333,14 +333,14 @@ def test_load_pytorch_model_wise_params(self) -> None: "weight": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym", "asym_float"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax"], "bit": 7.0, }, "activation": { "granularity": ["per_channel", "per_tensor"], "scheme": ["asym", "sym"], - "dtype": ["int8", "uint8", "fp32", "bf16", "fp16"], + "dtype": ["int8", "uint8", "fp32", "bf16"], "algorithm": ["minmax", "kl", "placeholder"], "compute_dtype": ["int8", "uint8", "fp32", "bf16", "None"], },