| From b764276b183d379d5b80e3f601ac42f574da5455 Mon Sep 17 00:00:00 2001 |
| From: Chris Blume <cblume@chromium.org> |
| Date: Tue, 16 Jan 2018 21:19:28 +0000 |
| Subject: [PATCH] Revert "Revert "libpng: Optimize png_do_expand_palette with |
| NEON."" |
| |
| This reverts commit 20a18ebc02429a40e4bc33fa28c4de29a39b609c. |
| |
| Reason for revert: <INSERT REASONING HERE> |
| |
| Original change's description: |
| > Revert "libpng: Optimize png_do_expand_palette with NEON." |
| > |
| > This reverts commit c4811af6d72836d44a3630beecebb0ff55875ab1. |
| > |
| > Reason for revert: This is failing to compile on ios-device-xcode-clang bot. |
| > |
| > https://uberchromegw.corp.google.com/i/chromium.mac/builders/ios-device-xcode-clang/builds/50225 |
| > |
| > Original change's description: |
| > > libpng: Optimize png_do_expand_palette with NEON. |
| > > |
| > > ARM-specific optimization processes 8 or 4 pixels at once. |
| > > |
| > > * Without transparency: 22% performance gain on the A53 little core. |
| > > * With transparency: 10% improvement on a big A72 core, 24% on little. |
| > > |
| > > (Numbers from image_decode_bench with PNG140 on the elm chromebook). |
| > > |
| > > Bug: 706134 |
| > > Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54 |
| > > Reviewed-on: https://chromium-review.googlesource.com/817116 |
| > > Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org> |
| > > Reviewed-by: Mike Klein <mtklein@chromium.org> |
| > > Reviewed-by: Leon Scroggins <scroggo@chromium.org> |
| > > Reviewed-by: Chris Blume <cblume@chromium.org> |
| > > Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org> |
| > > Cr-Commit-Position: refs/heads/master@{#529473} |
| > |
| > TBR=scroggo@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org |
| > |
| > Change-Id: I2cd943e15ceadf4311b1b49a56de00d10684e294 |
| > No-Presubmit: true |
| > No-Tree-Checks: true |
| > No-Try: true |
| > Bug: 706134 |
| > Reviewed-on: https://chromium-review.googlesource.com/868770 |
| > Reviewed-by: Jonathan Ross <jonross@chromium.org> |
| > Commit-Queue: Jonathan Ross <jonross@chromium.org> |
| > Cr-Commit-Position: refs/heads/master@{#529484} |
| |
| TBR=scroggo@chromium.org,jonross@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org |
| |
| Change-Id: I6baf17d35efbd5c6bc348d4f81264e8e1023be4f |
| No-Presubmit: true |
| No-Tree-Checks: true |
| No-Try: true |
| Bug: 706134 |
| --- |
| third_party/libpng/BUILD.gn | 1 + |
| third_party/libpng/arm/palette_neon_intrinsics.c | 137 ++++ |
| third_party/libpng/patches/0000-plte.patch | 776 +++++++++++++++++++++++ |
| third_party/libpng/patches/README | 4 + |
| third_party/libpng/pngpriv.h | 23 + |
| third_party/libpng/pngrtran.c | 49 +- |
| third_party/libpng/pngstruct.h | 4 + |
| third_party/libpng/pngwrite.c | 4 + |
| 8 files changed, 990 insertions(+), 8 deletions(-) |
| create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c |
| create mode 100644 third_party/libpng/patches/0000-plte.patch |
| create mode 100644 third_party/libpng/patches/README |
| |
| diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn |
| index e2658a7ba623..e48c790326c7 100644 |
| --- a/third_party/libpng/BUILD.gn |
| +++ b/third_party/libpng/BUILD.gn |
| @@ -75,6 +75,7 @@ source_set("libpng_sources") { |
| sources += [ |
| "arm/arm_init.c", |
| "arm/filter_neon_intrinsics.c", |
| + "arm/palette_neon_intrinsics.c", |
| ] |
| defines += [ |
| "PNG_ARM_NEON_OPT=2", |
| diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c |
| new file mode 100644 |
| index 000000000000..703b9ff25053 |
| --- /dev/null |
| +++ b/third_party/libpng/arm/palette_neon_intrinsics.c |
| @@ -0,0 +1,137 @@ |
| +/* palette_neon_intrinsics.c - NEON optimised palette expansion functions |
| + * |
| + * Copyright (c) 2017 The Chromium Authors. All rights reserved. |
| + * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017. |
| + * |
| + * This code is released under the libpng license. |
| + * For conditions of distribution and use, see the disclaimer |
| + * and license in png.h |
| + */ |
| + |
| +#include "../pngpriv.h" |
| + |
| +#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| + |
| +#include <arm_neon.h> |
| + |
| +/* Build an RGBA palette from the RGB and separate alpha palettes. */ |
| +void |
| +png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info) |
| +{ |
| + png_const_colorp palette = png_ptr->palette; |
| + png_bytep riffled_palette = png_ptr->riffled_palette; |
| + png_const_bytep trans_alpha = png_ptr->trans_alpha; |
| + int num_trans = png_ptr->num_trans; |
| + |
| + if (row_info->bit_depth != 8) { |
| + png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba"); |
| + return; |
| + } |
| + |
| + /* Initially black, opaque. */ |
| + uint8x16x4_t w = {{ |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0xff), |
| + }}; |
| + |
| + int i; |
| + /* First, riffle the RGB colours into a RGBA palette, the A value is |
| + * set to opaque for now. */ |
| + for (i = 0; i < (1 << row_info->bit_depth); i += 16) { |
| + uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i)); |
| + w.val[0] = v.val[0]; |
| + w.val[1] = v.val[1]; |
| + w.val[2] = v.val[2]; |
| + vst4q_u8(riffled_palette + (i << 2), w); |
| + } |
| + |
| + /* Fix up the missing transparency values. */ |
| + for (i = 0; i < num_trans; i++) { |
| + riffled_palette[(i << 2) + 3] = trans_alpha[i]; |
| + } |
| +} |
| + |
| + |
| +/* Expands a palettized row into RGBA. */ |
| +int |
| +png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info, |
| + png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| +{ |
| + |
| + png_uint_32 row_width = row_info->width; |
| + const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette; |
| + const png_int_32 pixels_per_chunk = 4; |
| + |
| + if (row_width < pixels_per_chunk) { |
| + return 0; |
| + } |
| + |
| + /* This function originally gets the last byte of the output row. |
| + The NEON part writes forward from a given position, so we have |
| + to seek this back by 4 pixels x 4 bytes. */ |
| + *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1); |
| + |
| + int i; |
| + for (i = 0; i < row_width; i += pixels_per_chunk) { |
| + uint32x4_t cur; |
| + png_bytep sp = *ssp - i, dp = *ddp - (i << 2); |
| + cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); |
| + cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); |
| + cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); |
| + cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3); |
| + vst1q_u32((void *)dp, cur); |
| + } |
| + if (i != row_width) { |
| + i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| + } |
| + |
| + /* Decrement output pointers. */ |
| + *ssp = *ssp - i; |
| + *ddp = *ddp - (i << 2); |
| + return i; |
| +} |
| + |
| +/* Expands a palettized row into RGB format. */ |
| +int |
| +png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info, |
| + png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| +{ |
| + png_uint_32 row_width = row_info->width; |
| + png_const_bytep palette = (png_const_bytep)png_ptr->palette; |
| + const png_uint_32 pixels_per_chunk = 8; |
| + |
| + if (row_width <= pixels_per_chunk) { |
| + return 0; |
| + } |
| + |
| + /* Seeking this back by 8 pixels x 3 bytes. */ |
| + *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1); |
| + |
| + int i; |
| + for (i = 0; i < row_width; i += pixels_per_chunk) { |
| + uint8x8x3_t cur; |
| + png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); |
| + cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6); |
| + cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7); |
| + vst3_u8((void *)dp, cur); |
| + } |
| + |
| + if (i != row_width) { |
| + i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| + } |
| + |
| + /* Decrement output pointers. */ |
| + *ssp = *ssp - i; |
| + *ddp = *ddp - ((i << 1) + i); |
| + return i; |
| +} |
| + |
| +#endif /* PNG_ARM_NEON_IMPLEMENTATION */ |
| diff --git a/third_party/libpng/patches/0000-plte.patch b/third_party/libpng/patches/0000-plte.patch |
| new file mode 100644 |
| index 000000000000..feea4d874335 |
| --- /dev/null |
| +++ b/third_party/libpng/patches/0000-plte.patch |
| @@ -0,0 +1,776 @@ |
| +From 5d94e310951211886ee460701176cb36c6e4bc88 Mon Sep 17 00:00:00 2001 |
| +From: Chris Blume <cblume@chromium.org> |
| +Date: Tue, 16 Jan 2018 21:19:28 +0000 |
| +Subject: [PATCH 1/2] Revert "Revert "libpng: Optimize png_do_expand_palette |
| + with NEON."" |
| + |
| +This reverts commit 20a18ebc02429a40e4bc33fa28c4de29a39b609c. |
| + |
| +Reason for revert: <INSERT REASONING HERE> |
| + |
| +Original change's description: |
| +> Revert "libpng: Optimize png_do_expand_palette with NEON." |
| +> |
| +> This reverts commit c4811af6d72836d44a3630beecebb0ff55875ab1. |
| +> |
| +> Reason for revert: This is failing to compile on ios-device-xcode-clang bot. |
| +> |
| +> https://uberchromegw.corp.google.com/i/chromium.mac/builders/ios-device-xcode-clang/builds/50225 |
| +> |
| +> Original change's description: |
| +> > libpng: Optimize png_do_expand_palette with NEON. |
| +> > |
| +> > ARM-specific optimization processes 8 or 4 pixels at once. |
| +> > |
| +> > * Without transparency: 22% performance gain on the A53 little core. |
| +> > * With transparency: 10% improvement on a big A72 core, 24% on little. |
| +> > |
| +> > (Numbers from image_decode_bench with PNG140 on the elm chromebook). |
| +> > |
| +> > Bug: 706134 |
| +> > Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54 |
| +> > Reviewed-on: https://chromium-review.googlesource.com/817116 |
| +> > Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org> |
| +> > Reviewed-by: Mike Klein <mtklein@chromium.org> |
| +> > Reviewed-by: Leon Scroggins <scroggo@chromium.org> |
| +> > Reviewed-by: Chris Blume <cblume@chromium.org> |
| +> > Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org> |
| +> > Cr-Commit-Position: refs/heads/master@{#529473} |
| +> |
| +> TBR=scroggo@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org |
| +> |
| +> Change-Id: I2cd943e15ceadf4311b1b49a56de00d10684e294 |
| +> No-Presubmit: true |
| +> No-Tree-Checks: true |
| +> No-Try: true |
| +> Bug: 706134 |
| +> Reviewed-on: https://chromium-review.googlesource.com/868770 |
| +> Reviewed-by: Jonathan Ross <jonross@chromium.org> |
| +> Commit-Queue: Jonathan Ross <jonross@chromium.org> |
| +> Cr-Commit-Position: refs/heads/master@{#529484} |
| + |
| +TBR=scroggo@chromium.org,jonross@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org |
| + |
| +Change-Id: I6baf17d35efbd5c6bc348d4f81264e8e1023be4f |
| +No-Presubmit: true |
| +No-Tree-Checks: true |
| +No-Try: true |
| +Bug: 706134 |
| +--- |
| + third_party/libpng/BUILD.gn | 1 + |
| + third_party/libpng/arm/palette_neon_intrinsics.c | 137 +++++++++ |
| + third_party/libpng/patches/0000-plte.patch | 340 +++++++++++++++++++++++ |
| + third_party/libpng/patches/README | 4 + |
| + third_party/libpng/pngpriv.h | 23 ++ |
| + third_party/libpng/pngrtran.c | 49 +++- |
| + third_party/libpng/pngstruct.h | 4 + |
| + third_party/libpng/pngwrite.c | 4 + |
| + 8 files changed, 554 insertions(+), 8 deletions(-) |
| + create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c |
| + create mode 100644 third_party/libpng/patches/0000-plte.patch |
| + create mode 100644 third_party/libpng/patches/README |
| + |
| +diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn |
| +index e2658a7ba623..e48c790326c7 100644 |
| +--- a/third_party/libpng/BUILD.gn |
| ++++ b/third_party/libpng/BUILD.gn |
| +@@ -75,6 +75,7 @@ source_set("libpng_sources") { |
| + sources += [ |
| + "arm/arm_init.c", |
| + "arm/filter_neon_intrinsics.c", |
| ++ "arm/palette_neon_intrinsics.c", |
| + ] |
| + defines += [ |
| + "PNG_ARM_NEON_OPT=2", |
| +diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c |
| +new file mode 100644 |
| +index 000000000000..0c0c0a909f8d |
| +--- /dev/null |
| ++++ b/third_party/libpng/arm/palette_neon_intrinsics.c |
| +@@ -0,0 +1,137 @@ |
| ++/* palette_neon_intrinsics.c - NEON optimised palette expansion functions |
| ++ * |
| ++ * Copyright (c) 2017 The Chromium Authors. All rights reserved. |
| ++ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017. |
| ++ * |
| ++ * This code is released under the libpng license. |
| ++ * For conditions of distribution and use, see the disclaimer |
| ++ * and license in png.h |
| ++ */ |
| ++ |
| ++#include "../pngpriv.h" |
| ++ |
| ++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| ++ |
| ++#include <arm_neon.h> |
| ++ |
| ++/* Build an RGBA palette from the RGB and separate alpha palettes. */ |
| ++void |
| ++png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info) |
| ++{ |
| ++ png_const_colorp palette = png_ptr->palette; |
| ++ png_bytep riffled_palette = png_ptr->riffled_palette; |
| ++ png_const_bytep trans_alpha = png_ptr->trans_alpha; |
| ++ int num_trans = png_ptr->num_trans; |
| ++ |
| ++ if (row_info->bit_depth != 8) { |
| ++ png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba"); |
| ++ return; |
| ++ } |
| ++ |
| ++ /* Initially black, opaque. */ |
| ++ uint8x16x4_t w = { |
| ++ vdupq_n_u8(0x00), |
| ++ vdupq_n_u8(0x00), |
| ++ vdupq_n_u8(0x00), |
| ++ vdupq_n_u8(0xff), |
| ++ }; |
| ++ |
| ++ int i; |
| ++ /* First, riffle the RGB colours into a RGBA palette, the A value is |
| ++ * set to opaque for now. */ |
| ++ for (i = 0; i < (1 << row_info->bit_depth); i += 16) { |
| ++ uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i)); |
| ++ w.val[0] = v.val[0]; |
| ++ w.val[1] = v.val[1]; |
| ++ w.val[2] = v.val[2]; |
| ++ vst4q_u8(riffled_palette + (i << 2), w); |
| ++ } |
| ++ |
| ++ /* Fix up the missing transparency values. */ |
| ++ for (i = 0; i < num_trans; i++) { |
| ++ riffled_palette[(i << 2) + 3] = trans_alpha[i]; |
| ++ } |
| ++} |
| ++ |
| ++ |
| ++/* Expands a palettized row into RGBA. */ |
| ++int |
| ++png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info, |
| ++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| ++{ |
| ++ |
| ++ png_uint_32 row_width = row_info->width; |
| ++ const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette; |
| ++ const png_int_32 pixels_per_chunk = 4; |
| ++ |
| ++ if (row_width < pixels_per_chunk) { |
| ++ return 0; |
| ++ } |
| ++ |
| ++ /* This function originally gets the last byte of the output row. |
| ++ The NEON part writes forward from a given position, so we have |
| ++ to seek this back by 4 pixels x 4 bytes. */ |
| ++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1); |
| ++ |
| ++ int i; |
| ++ for (i = 0; i < row_width; i += pixels_per_chunk) { |
| ++ uint32x4_t cur; |
| ++ png_bytep sp = *ssp - i, dp = *ddp - (i << 2); |
| ++ cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); |
| ++ cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); |
| ++ cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); |
| ++ cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3); |
| ++ vst1q_u32((void *)dp, cur); |
| ++ } |
| ++ if (i != row_width) { |
| ++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| ++ } |
| ++ |
| ++ /* Decrement output pointers. */ |
| ++ *ssp = *ssp - i; |
| ++ *ddp = *ddp - (i << 2); |
| ++ return i; |
| ++} |
| ++ |
| ++/* Expands a palettized row into RGB format. */ |
| ++int |
| ++png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info, |
| ++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| ++{ |
| ++ png_uint_32 row_width = row_info->width; |
| ++ png_const_bytep palette = (png_const_bytep)png_ptr->palette; |
| ++ const png_uint_32 pixels_per_chunk = 8; |
| ++ |
| ++ if (row_width <= pixels_per_chunk) { |
| ++ return 0; |
| ++ } |
| ++ |
| ++ /* Seeking this back by 8 pixels x 3 bytes. */ |
| ++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1); |
| ++ |
| ++ int i; |
| ++ for (i = 0; i < row_width; i += pixels_per_chunk) { |
| ++ uint8x8x3_t cur; |
| ++ png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); |
| ++ cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6); |
| ++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7); |
| ++ vst3_u8((void *)dp, cur); |
| ++ } |
| ++ |
| ++ if (i != row_width) { |
| ++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| ++ } |
| ++ |
| ++ /* Decrement output pointers. */ |
| ++ *ssp = *ssp - i; |
| ++ *ddp = *ddp - ((i << 1) + i); |
| ++ return i; |
| ++} |
| ++ |
| ++#endif /* PNG_ARM_NEON_IMPLEMENTATION */ |
| +diff --git a/third_party/libpng/patches/0000-plte.patch b/third_party/libpng/patches/0000-plte.patch |
| +new file mode 100644 |
| +index 000000000000..6fceb2f2275a |
| +--- /dev/null |
| ++++ b/third_party/libpng/patches/0000-plte.patch |
| +@@ -0,0 +1,340 @@ |
| ++From aa270a19f7bb9d9cba207b38c0a98cb3a3dc681e Mon Sep 17 00:00:00 2001 |
| ++From: Richard Townsend <Richard.Townsend@arm.com> |
| ++Date: Mon, 20 Feb 2017 14:06:14 +0000 |
| ++Subject: [PATCH] libpng: Optimize png_do_expand_palette with NEON. |
| ++ |
| ++ARM-specific optimization processes 8 or 4 pixels at once. |
| ++ |
| ++* Without transparency: 22% performance gain on the A53 little core. |
| ++* With transparency: 10% improvement on a big A72 core, 24% on little. |
| ++ |
| ++(Numbers from image_decode_bench with PNG140 on the elm chromebook). |
| ++ |
| ++Bug: 706134 |
| ++Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54 |
| ++--- |
| ++ third_party/libpng/BUILD.gn | 1 + |
| ++ third_party/libpng/arm/palette_neon_intrinsics.c | 137 +++++++++++++++++++++++ |
| ++ third_party/libpng/pngpriv.h | 23 ++++ |
| ++ third_party/libpng/pngrtran.c | 49 ++++++-- |
| ++ third_party/libpng/pngstruct.h | 4 + |
| ++ third_party/libpng/pngwrite.c | 4 + |
| ++ 6 files changed, 210 insertions(+), 8 deletions(-) |
| ++ create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c |
| ++ |
| ++diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn |
| ++index e2658a7ba623..e48c790326c7 100644 |
| ++--- a/third_party/libpng/BUILD.gn |
| +++++ b/third_party/libpng/BUILD.gn |
| ++@@ -75,6 +75,7 @@ source_set("libpng_sources") { |
| ++ sources += [ |
| ++ "arm/arm_init.c", |
| ++ "arm/filter_neon_intrinsics.c", |
| +++ "arm/palette_neon_intrinsics.c", |
| ++ ] |
| ++ defines += [ |
| ++ "PNG_ARM_NEON_OPT=2", |
| ++diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c |
| ++new file mode 100644 |
| ++index 000000000000..0c0c0a909f8d |
| ++--- /dev/null |
| +++++ b/third_party/libpng/arm/palette_neon_intrinsics.c |
| ++@@ -0,0 +1,137 @@ |
| +++/* palette_neon_intrinsics.c - NEON optimised palette expansion functions |
| +++ * |
| +++ * Copyright (c) 2017 The Chromium Authors. All rights reserved. |
| +++ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017. |
| +++ * |
| +++ * This code is released under the libpng license. |
| +++ * For conditions of distribution and use, see the disclaimer |
| +++ * and license in png.h |
| +++ */ |
| +++ |
| +++#include "../pngpriv.h" |
| +++ |
| +++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| +++ |
| +++#include <arm_neon.h> |
| +++ |
| +++/* Build an RGBA palette from the RGB and separate alpha palettes. */ |
| +++void |
| +++png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info) |
| +++{ |
| +++ png_const_colorp palette = png_ptr->palette; |
| +++ png_bytep riffled_palette = png_ptr->riffled_palette; |
| +++ png_const_bytep trans_alpha = png_ptr->trans_alpha; |
| +++ int num_trans = png_ptr->num_trans; |
| +++ |
| +++ if (row_info->bit_depth != 8) { |
| +++ png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba"); |
| +++ return; |
| +++ } |
| +++ |
| +++ /* Initially black, opaque. */ |
| +++ uint8x16x4_t w = { |
| +++ vdupq_n_u8(0x00), |
| +++ vdupq_n_u8(0x00), |
| +++ vdupq_n_u8(0x00), |
| +++ vdupq_n_u8(0xff), |
| +++ }; |
| +++ |
| +++ int i; |
| +++ /* First, riffle the RGB colours into a RGBA palette, the A value is |
| +++ * set to opaque for now. */ |
| +++ for (i = 0; i < (1 << row_info->bit_depth); i += 16) { |
| +++ uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i)); |
| +++ w.val[0] = v.val[0]; |
| +++ w.val[1] = v.val[1]; |
| +++ w.val[2] = v.val[2]; |
| +++ vst4q_u8(riffled_palette + (i << 2), w); |
| +++ } |
| +++ |
| +++ /* Fix up the missing transparency values. */ |
| +++ for (i = 0; i < num_trans; i++) { |
| +++ riffled_palette[(i << 2) + 3] = trans_alpha[i]; |
| +++ } |
| +++} |
| +++ |
| +++ |
| +++/* Expands a palettized row into RGBA. */ |
| +++int |
| +++png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info, |
| +++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| +++{ |
| +++ |
| +++ png_uint_32 row_width = row_info->width; |
| +++ const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette; |
| +++ const png_int_32 pixels_per_chunk = 4; |
| +++ |
| +++ if (row_width < pixels_per_chunk) { |
| +++ return 0; |
| +++ } |
| +++ |
| +++ /* This function originally gets the last byte of the output row. |
| +++ The NEON part writes forward from a given position, so we have |
| +++ to seek this back by 4 pixels x 4 bytes. */ |
| +++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1); |
| +++ |
| +++ int i; |
| +++ for (i = 0; i < row_width; i += pixels_per_chunk) { |
| +++ uint32x4_t cur; |
| +++ png_bytep sp = *ssp - i, dp = *ddp - (i << 2); |
| +++ cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); |
| +++ cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); |
| +++ cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); |
| +++ cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3); |
| +++ vst1q_u32((void *)dp, cur); |
| +++ } |
| +++ if (i != row_width) { |
| +++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| +++ } |
| +++ |
| +++ /* Decrement output pointers. */ |
| +++ *ssp = *ssp - i; |
| +++ *ddp = *ddp - (i << 2); |
| +++ return i; |
| +++} |
| +++ |
| +++/* Expands a palettized row into RGB format. */ |
| +++int |
| +++png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info, |
| +++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) |
| +++{ |
| +++ png_uint_32 row_width = row_info->width; |
| +++ png_const_bytep palette = (png_const_bytep)png_ptr->palette; |
| +++ const png_uint_32 pixels_per_chunk = 8; |
| +++ |
| +++ if (row_width <= pixels_per_chunk) { |
| +++ return 0; |
| +++ } |
| +++ |
| +++ /* Seeking this back by 8 pixels x 3 bytes. */ |
| +++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1); |
| +++ |
| +++ int i; |
| +++ for (i = 0; i < row_width; i += pixels_per_chunk) { |
| +++ uint8x8x3_t cur; |
| +++ png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); |
| +++ cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6); |
| +++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7); |
| +++ vst3_u8((void *)dp, cur); |
| +++ } |
| +++ |
| +++ if (i != row_width) { |
| +++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */ |
| +++ } |
| +++ |
| +++ /* Decrement output pointers. */ |
| +++ *ssp = *ssp - i; |
| +++ *ddp = *ddp - ((i << 1) + i); |
| +++ return i; |
| +++} |
| +++ |
| +++#endif /* PNG_ARM_NEON_IMPLEMENTATION */ |
| ++diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h |
| ++index 1f2e90f2b37b..5652525b2b51 100644 |
| ++--- a/third_party/libpng/pngpriv.h |
| +++++ b/third_party/libpng/pngpriv.h |
| ++@@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, |
| ++ PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, |
| ++ png_const_charp key, png_bytep new_key), PNG_EMPTY); |
| ++ |
| +++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| +++PNG_INTERNAL_FUNCTION(void, |
| +++ png_riffle_palette_rgba, |
| +++ (png_structrp, png_row_infop), |
| +++ PNG_EMPTY); |
| +++PNG_INTERNAL_FUNCTION(int, |
| +++ png_do_expand_palette_neon_rgba, |
| +++ (png_structrp, |
| +++ png_row_infop, |
| +++ png_const_bytep, |
| +++ const png_bytepp, |
| +++ const png_bytepp), |
| +++ PNG_EMPTY); |
| +++PNG_INTERNAL_FUNCTION(int, |
| +++ png_do_expand_palette_neon_rgb, |
| +++ (png_structrp, |
| +++ png_row_infop, |
| +++ png_const_bytep, |
| +++ const png_bytepp, |
| +++ const png_bytepp), |
| +++ PNG_EMPTY); |
| +++#endif |
| +++ |
| ++ /* Maintainer: Put new private prototypes here ^ */ |
| ++ |
| ++ #include "pngdebug.h" |
| ++diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c |
| ++index c1896503130e..9dd82c929bdc 100644 |
| ++--- a/third_party/libpng/pngrtran.c |
| +++++ b/third_party/libpng/pngrtran.c |
| ++@@ -18,6 +18,13 @@ |
| ++ |
| ++ #include "pngpriv.h" |
| ++ |
| +++#ifdef PNG_ARM_NEON_IMPLEMENTATION |
| +++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| +++#define PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| +++#include <arm_neon.h> |
| +++#endif |
| +++#endif |
| +++ |
| ++ #ifdef PNG_READ_SUPPORTED |
| ++ |
| ++ /* Set the action on getting a CRC error for an ancillary or critical chunk. */ |
| ++@@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) |
| ++ * upon whether you supply trans and num_trans. |
| ++ */ |
| ++ static void |
| ++-png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| ++- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans) |
| +++png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, |
| +++ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, |
| +++ int num_trans) |
| ++ { |
| ++ int shift, value; |
| ++ png_bytep sp, dp; |
| ++@@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| ++ sp = row + (png_size_t)row_width - 1; |
| ++ dp = row + ((png_size_t)row_width << 2) - 1; |
| ++ |
| ++- for (i = 0; i < row_width; i++) |
| +++ i = 0; |
| +++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| +++ if (png_ptr->riffled_palette != NULL) { |
| +++ /* The RGBA optimization works with png_ptr->bit_depth == 8 |
| +++ but sometimes row_info->bit_depth has been changed to 8. |
| +++ In these cases, the palette hasn't been riffled. */ |
| +++ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp); |
| +++ } |
| +++#endif |
| +++ |
| +++ for (; i < row_width; i++) |
| ++ { |
| ++ if ((int)(*sp) >= num_trans) |
| ++ *dp-- = 0xff; |
| ++- |
| ++ else |
| ++ *dp-- = trans_alpha[*sp]; |
| ++- |
| ++ *dp-- = palette[*sp].blue; |
| ++ *dp-- = palette[*sp].green; |
| ++ *dp-- = palette[*sp].red; |
| ++@@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| ++ { |
| ++ sp = row + (png_size_t)row_width - 1; |
| ++ dp = row + (png_size_t)(row_width * 3) - 1; |
| +++ i = 0; |
| +++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| +++ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp); |
| +++#endif |
| ++ |
| ++- for (i = 0; i < row_width; i++) |
| +++ for (; i < row_width; i++) |
| ++ { |
| ++ *dp-- = palette[*sp].blue; |
| ++ *dp-- = palette[*sp].green; |
| ++@@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) |
| ++ { |
| ++ if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) |
| ++ { |
| ++- png_do_expand_palette(row_info, png_ptr->row_buf + 1, |
| ++- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| +++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| +++ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { |
| +++ /* Allocate space for the decompressed full palette. */ |
| +++ if (png_ptr->riffled_palette == NULL) { |
| +++ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4); |
| +++ if (png_ptr->riffled_palette == NULL) { |
| +++ png_error(png_ptr, "NULL row buffer"); |
| +++ } |
| +++ /* Build the RGBA palette. */ |
| +++ png_riffle_palette_rgba(png_ptr, row_info); |
| +++ } |
| +++ } |
| +++#endif |
| +++ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, |
| +++ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| ++ } |
| ++ |
| ++ else |
| ++diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h |
| ++index d83f971253fe..aac88df02d32 100644 |
| ++--- a/third_party/libpng/pngstruct.h |
| +++++ b/third_party/libpng/pngstruct.h |
| ++@@ -228,6 +228,10 @@ struct png_struct_def |
| ++ * big_row_buf; while writing it is separately |
| ++ * allocated. |
| ++ */ |
| +++#ifdef PNG_READ_EXPAND_SUPPORTED |
| +++ /* Buffer to accelerate palette transformations. */ |
| +++ png_bytep riffled_palette; |
| +++#endif |
| ++ #ifdef PNG_WRITE_FILTER_SUPPORTED |
| ++ png_bytep try_row; /* buffer to save trial row when filtering */ |
| ++ png_bytep tst_row; /* buffer to save best trial row when filtering */ |
| ++diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c |
| ++index a16d77ce00c6..e25e5dcfdc18 100644 |
| ++--- a/third_party/libpng/pngwrite.c |
| +++++ b/third_party/libpng/pngwrite.c |
| ++@@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr) |
| ++ png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list); |
| ++ png_free(png_ptr, png_ptr->row_buf); |
| ++ png_ptr->row_buf = NULL; |
| +++#ifdef PNG_READ_EXPANDED_SUPPORTED |
| +++ png_free(png_ptr, png_ptr->riffled_palette); |
| +++ png_ptr->riffled_palette = NULL; |
| +++#endif |
| ++ #ifdef PNG_WRITE_FILTER_SUPPORTED |
| ++ png_free(png_ptr, png_ptr->prev_row); |
| ++ png_free(png_ptr, png_ptr->try_row); |
| ++-- |
| ++2.15.1 |
| ++ |
| +diff --git a/third_party/libpng/patches/README b/third_party/libpng/patches/README |
| +new file mode 100644 |
| +index 000000000000..786aeabede37 |
| +--- /dev/null |
| ++++ b/third_party/libpng/patches/README |
| +@@ -0,0 +1,4 @@ |
| ++This directory contains patches applied on top of libpng, which haven't been |
| ++upstreamed to the canonical libpng repository [1] yet. |
| ++ |
| ++[1] https://github.com/glennrp/libpng |
| +diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h |
| +index 1f2e90f2b37b..5652525b2b51 100644 |
| +--- a/third_party/libpng/pngpriv.h |
| ++++ b/third_party/libpng/pngpriv.h |
| +@@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, |
| + PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, |
| + png_const_charp key, png_bytep new_key), PNG_EMPTY); |
| + |
| ++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| ++PNG_INTERNAL_FUNCTION(void, |
| ++ png_riffle_palette_rgba, |
| ++ (png_structrp, png_row_infop), |
| ++ PNG_EMPTY); |
| ++PNG_INTERNAL_FUNCTION(int, |
| ++ png_do_expand_palette_neon_rgba, |
| ++ (png_structrp, |
| ++ png_row_infop, |
| ++ png_const_bytep, |
| ++ const png_bytepp, |
| ++ const png_bytepp), |
| ++ PNG_EMPTY); |
| ++PNG_INTERNAL_FUNCTION(int, |
| ++ png_do_expand_palette_neon_rgb, |
| ++ (png_structrp, |
| ++ png_row_infop, |
| ++ png_const_bytep, |
| ++ const png_bytepp, |
| ++ const png_bytepp), |
| ++ PNG_EMPTY); |
| ++#endif |
| ++ |
| + /* Maintainer: Put new private prototypes here ^ */ |
| + |
| + #include "pngdebug.h" |
| +diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c |
| +index c1896503130e..9dd82c929bdc 100644 |
| +--- a/third_party/libpng/pngrtran.c |
| ++++ b/third_party/libpng/pngrtran.c |
| +@@ -18,6 +18,13 @@ |
| + |
| + #include "pngpriv.h" |
| + |
| ++#ifdef PNG_ARM_NEON_IMPLEMENTATION |
| ++#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| ++#define PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| ++#include <arm_neon.h> |
| ++#endif |
| ++#endif |
| ++ |
| + #ifdef PNG_READ_SUPPORTED |
| + |
| + /* Set the action on getting a CRC error for an ancillary or critical chunk. */ |
| +@@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) |
| + * upon whether you supply trans and num_trans. |
| + */ |
| + static void |
| +-png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| +- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans) |
| ++png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, |
| ++ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, |
| ++ int num_trans) |
| + { |
| + int shift, value; |
| + png_bytep sp, dp; |
| +@@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| + sp = row + (png_size_t)row_width - 1; |
| + dp = row + ((png_size_t)row_width << 2) - 1; |
| + |
| +- for (i = 0; i < row_width; i++) |
| ++ i = 0; |
| ++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| ++ if (png_ptr->riffled_palette != NULL) { |
| ++ /* The RGBA optimization works with png_ptr->bit_depth == 8 |
| ++ but sometimes row_info->bit_depth has been changed to 8. |
| ++ In these cases, the palette hasn't been riffled. */ |
| ++ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp); |
| ++ } |
| ++#endif |
| ++ |
| ++ for (; i < row_width; i++) |
| + { |
| + if ((int)(*sp) >= num_trans) |
| + *dp-- = 0xff; |
| +- |
| + else |
| + *dp-- = trans_alpha[*sp]; |
| +- |
| + *dp-- = palette[*sp].blue; |
| + *dp-- = palette[*sp].green; |
| + *dp-- = palette[*sp].red; |
| +@@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| + { |
| + sp = row + (png_size_t)row_width - 1; |
| + dp = row + (png_size_t)(row_width * 3) - 1; |
| ++ i = 0; |
| ++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| ++ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp); |
| ++#endif |
| + |
| +- for (i = 0; i < row_width; i++) |
| ++ for (; i < row_width; i++) |
| + { |
| + *dp-- = palette[*sp].blue; |
| + *dp-- = palette[*sp].green; |
| +@@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) |
| + { |
| + if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) |
| + { |
| +- png_do_expand_palette(row_info, png_ptr->row_buf + 1, |
| +- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| ++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| ++ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { |
| ++ /* Allocate space for the decompressed full palette. */ |
| ++ if (png_ptr->riffled_palette == NULL) { |
| ++ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4); |
| ++ if (png_ptr->riffled_palette == NULL) { |
| ++ png_error(png_ptr, "NULL row buffer"); |
| ++ } |
| ++ /* Build the RGBA palette. */ |
| ++ png_riffle_palette_rgba(png_ptr, row_info); |
| ++ } |
| ++ } |
| ++#endif |
| ++ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, |
| ++ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| + } |
| + |
| + else |
| +diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h |
| +index d83f971253fe..aac88df02d32 100644 |
| +--- a/third_party/libpng/pngstruct.h |
| ++++ b/third_party/libpng/pngstruct.h |
| +@@ -228,6 +228,10 @@ struct png_struct_def |
| + * big_row_buf; while writing it is separately |
| + * allocated. |
| + */ |
| ++#ifdef PNG_READ_EXPAND_SUPPORTED |
| ++ /* Buffer to accelerate palette transformations. */ |
| ++ png_bytep riffled_palette; |
| ++#endif |
| + #ifdef PNG_WRITE_FILTER_SUPPORTED |
| + png_bytep try_row; /* buffer to save trial row when filtering */ |
| + png_bytep tst_row; /* buffer to save best trial row when filtering */ |
| +diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c |
| +index a16d77ce00c6..e25e5dcfdc18 100644 |
| +--- a/third_party/libpng/pngwrite.c |
| ++++ b/third_party/libpng/pngwrite.c |
| +@@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr) |
| + png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list); |
| + png_free(png_ptr, png_ptr->row_buf); |
| + png_ptr->row_buf = NULL; |
| ++#ifdef PNG_READ_EXPANDED_SUPPORTED |
| ++ png_free(png_ptr, png_ptr->riffled_palette); |
| ++ png_ptr->riffled_palette = NULL; |
| ++#endif |
| + #ifdef PNG_WRITE_FILTER_SUPPORTED |
| + png_free(png_ptr, png_ptr->prev_row); |
| + png_free(png_ptr, png_ptr->try_row); |
| +-- |
| +2.16.0.rc1.238.g530d649a79-goog |
| + |
| + |
| +From 736276e0ec02b5078ca98cfe6ed0bb3f86524a8b Mon Sep 17 00:00:00 2001 |
| +From: Chris Blume <cblume@google.com> |
| +Date: Tue, 16 Jan 2018 13:25:38 -0800 |
| +Subject: [PATCH 2/2] Explicitly initialize the member of the variable |
| + |
| +--- |
| + third_party/libpng/arm/palette_neon_intrinsics.c | 4 ++-- |
| + 1 file changed, 2 insertions(+), 2 deletions(-) |
| + |
| +diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c |
| +index 0c0c0a909f8d..703b9ff25053 100644 |
| +--- a/third_party/libpng/arm/palette_neon_intrinsics.c |
| ++++ b/third_party/libpng/arm/palette_neon_intrinsics.c |
| +@@ -29,12 +29,12 @@ png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info) |
| + } |
| + |
| + /* Initially black, opaque. */ |
| +- uint8x16x4_t w = { |
| ++ uint8x16x4_t w = {{ |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0x00), |
| + vdupq_n_u8(0xff), |
| +- }; |
| ++ }}; |
| + |
| + int i; |
| + /* First, riffle the RGB colours into a RGBA palette, the A value is |
| +-- |
| +2.16.0.rc1.238.g530d649a79-goog |
| + |
| diff --git a/third_party/libpng/patches/README b/third_party/libpng/patches/README |
| new file mode 100644 |
| index 000000000000..786aeabede37 |
| --- /dev/null |
| +++ b/third_party/libpng/patches/README |
| @@ -0,0 +1,4 @@ |
| +This directory contains patches applied on top of libpng, which haven't been |
| +upstreamed to the canonical libpng repository [1] yet. |
| + |
| +[1] https://github.com/glennrp/libpng |
| diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h |
| index 1f2e90f2b37b..5652525b2b51 100644 |
| --- a/third_party/libpng/pngpriv.h |
| +++ b/third_party/libpng/pngpriv.h |
| @@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, |
| PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, |
| png_const_charp key, png_bytep new_key), PNG_EMPTY); |
| |
| +#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| +PNG_INTERNAL_FUNCTION(void, |
| + png_riffle_palette_rgba, |
| + (png_structrp, png_row_infop), |
| + PNG_EMPTY); |
| +PNG_INTERNAL_FUNCTION(int, |
| + png_do_expand_palette_neon_rgba, |
| + (png_structrp, |
| + png_row_infop, |
| + png_const_bytep, |
| + const png_bytepp, |
| + const png_bytepp), |
| + PNG_EMPTY); |
| +PNG_INTERNAL_FUNCTION(int, |
| + png_do_expand_palette_neon_rgb, |
| + (png_structrp, |
| + png_row_infop, |
| + png_const_bytep, |
| + const png_bytepp, |
| + const png_bytepp), |
| + PNG_EMPTY); |
| +#endif |
| + |
| /* Maintainer: Put new private prototypes here ^ */ |
| |
| #include "pngdebug.h" |
| diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c |
| index c1896503130e..9dd82c929bdc 100644 |
| --- a/third_party/libpng/pngrtran.c |
| +++ b/third_party/libpng/pngrtran.c |
| @@ -18,6 +18,13 @@ |
| |
| #include "pngpriv.h" |
| |
| +#ifdef PNG_ARM_NEON_IMPLEMENTATION |
| +#if PNG_ARM_NEON_IMPLEMENTATION == 1 |
| +#define PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| +#include <arm_neon.h> |
| +#endif |
| +#endif |
| + |
| #ifdef PNG_READ_SUPPORTED |
| |
| /* Set the action on getting a CRC error for an ancillary or critical chunk. */ |
| @@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) |
| * upon whether you supply trans and num_trans. |
| */ |
| static void |
| -png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| - png_const_colorp palette, png_const_bytep trans_alpha, int num_trans) |
| +png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, |
| + png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, |
| + int num_trans) |
| { |
| int shift, value; |
| png_bytep sp, dp; |
| @@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| sp = row + (png_size_t)row_width - 1; |
| dp = row + ((png_size_t)row_width << 2) - 1; |
| |
| - for (i = 0; i < row_width; i++) |
| + i = 0; |
| +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| + if (png_ptr->riffled_palette != NULL) { |
| + /* The RGBA optimization works with png_ptr->bit_depth == 8 |
| + but sometimes row_info->bit_depth has been changed to 8. |
| + In these cases, the palette hasn't been riffled. */ |
| + i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp); |
| + } |
| +#endif |
| + |
| + for (; i < row_width; i++) |
| { |
| if ((int)(*sp) >= num_trans) |
| *dp-- = 0xff; |
| - |
| else |
| *dp-- = trans_alpha[*sp]; |
| - |
| *dp-- = palette[*sp].blue; |
| *dp-- = palette[*sp].green; |
| *dp-- = palette[*sp].red; |
| @@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, |
| { |
| sp = row + (png_size_t)row_width - 1; |
| dp = row + (png_size_t)(row_width * 3) - 1; |
| + i = 0; |
| +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| + i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp); |
| +#endif |
| |
| - for (i = 0; i < row_width; i++) |
| + for (; i < row_width; i++) |
| { |
| *dp-- = palette[*sp].blue; |
| *dp-- = palette[*sp].green; |
| @@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) |
| { |
| if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) |
| { |
| - png_do_expand_palette(row_info, png_ptr->row_buf + 1, |
| - png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE |
| + if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { |
| + /* Allocate space for the decompressed full palette. */ |
| + if (png_ptr->riffled_palette == NULL) { |
| + png_ptr->riffled_palette = png_malloc(png_ptr, 256*4); |
| + if (png_ptr->riffled_palette == NULL) { |
| + png_error(png_ptr, "NULL row buffer"); |
| + } |
| + /* Build the RGBA palette. */ |
| + png_riffle_palette_rgba(png_ptr, row_info); |
| + } |
| + } |
| +#endif |
| + png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, |
| + png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); |
| } |
| |
| else |
| diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h |
| index d83f971253fe..aac88df02d32 100644 |
| --- a/third_party/libpng/pngstruct.h |
| +++ b/third_party/libpng/pngstruct.h |
| @@ -228,6 +228,10 @@ struct png_struct_def |
| * big_row_buf; while writing it is separately |
| * allocated. |
| */ |
| +#ifdef PNG_READ_EXPAND_SUPPORTED |
| + /* Buffer to accelerate palette transformations. */ |
| + png_bytep riffled_palette; |
| +#endif |
| #ifdef PNG_WRITE_FILTER_SUPPORTED |
| png_bytep try_row; /* buffer to save trial row when filtering */ |
| png_bytep tst_row; /* buffer to save best trial row when filtering */ |
| diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c |
| index a16d77ce00c6..e25e5dcfdc18 100644 |
| --- a/third_party/libpng/pngwrite.c |
| +++ b/third_party/libpng/pngwrite.c |
| @@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr) |
| png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list); |
| png_free(png_ptr, png_ptr->row_buf); |
| png_ptr->row_buf = NULL; |
| +#ifdef PNG_READ_EXPANDED_SUPPORTED |
| + png_free(png_ptr, png_ptr->riffled_palette); |
| + png_ptr->riffled_palette = NULL; |
| +#endif |
| #ifdef PNG_WRITE_FILTER_SUPPORTED |
| png_free(png_ptr, png_ptr->prev_row); |
| png_free(png_ptr, png_ptr->try_row); |
| -- |
| 2.16.0.rc1.238.g530d649a79-goog |
| |