blob: 45a5dcfe510d0964f7de9f75af05b025bd777a8a [file] [log] [blame]
From b764276b183d379d5b80e3f601ac42f574da5455 Mon Sep 17 00:00:00 2001
From: Chris Blume <cblume@chromium.org>
Date: Tue, 16 Jan 2018 21:19:28 +0000
Subject: [PATCH] Revert "Revert "libpng: Optimize png_do_expand_palette with
NEON.""
This reverts commit 20a18ebc02429a40e4bc33fa28c4de29a39b609c.
Reason for revert: <INSERT REASONING HERE>
Original change's description:
> Revert "libpng: Optimize png_do_expand_palette with NEON."
>
> This reverts commit c4811af6d72836d44a3630beecebb0ff55875ab1.
>
> Reason for revert: This is failing to compile on ios-device-xcode-clang bot.
>
> https://uberchromegw.corp.google.com/i/chromium.mac/builders/ios-device-xcode-clang/builds/50225
>
> Original change's description:
> > libpng: Optimize png_do_expand_palette with NEON.
> >
> > ARM-specific optimization processes 8 or 4 pixels at once.
> >
> > * Without transparency: 22% performance gain on the A53 little core.
> > * With transparency: 10% improvement on a big A72 core, 24% on little.
> >
> > (Numbers from image_decode_bench with PNG140 on the elm chromebook).
> >
> > Bug: 706134
> > Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54
> > Reviewed-on: https://chromium-review.googlesource.com/817116
> > Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org>
> > Reviewed-by: Mike Klein <mtklein@chromium.org>
> > Reviewed-by: Leon Scroggins <scroggo@chromium.org>
> > Reviewed-by: Chris Blume <cblume@chromium.org>
> > Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
> > Cr-Commit-Position: refs/heads/master@{#529473}
>
> TBR=scroggo@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org
>
> Change-Id: I2cd943e15ceadf4311b1b49a56de00d10684e294
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Bug: 706134
> Reviewed-on: https://chromium-review.googlesource.com/868770
> Reviewed-by: Jonathan Ross <jonross@chromium.org>
> Commit-Queue: Jonathan Ross <jonross@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#529484}
TBR=scroggo@chromium.org,jonross@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org
Change-Id: I6baf17d35efbd5c6bc348d4f81264e8e1023be4f
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 706134
---
third_party/libpng/BUILD.gn | 1 +
third_party/libpng/arm/palette_neon_intrinsics.c | 137 ++++
third_party/libpng/patches/0000-plte.patch | 776 +++++++++++++++++++++++
third_party/libpng/patches/README | 4 +
third_party/libpng/pngpriv.h | 23 +
third_party/libpng/pngrtran.c | 49 +-
third_party/libpng/pngstruct.h | 4 +
third_party/libpng/pngwrite.c | 4 +
8 files changed, 990 insertions(+), 8 deletions(-)
create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c
create mode 100644 third_party/libpng/patches/0000-plte.patch
create mode 100644 third_party/libpng/patches/README
diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn
index e2658a7ba623..e48c790326c7 100644
--- a/third_party/libpng/BUILD.gn
+++ b/third_party/libpng/BUILD.gn
@@ -75,6 +75,7 @@ source_set("libpng_sources") {
sources += [
"arm/arm_init.c",
"arm/filter_neon_intrinsics.c",
+ "arm/palette_neon_intrinsics.c",
]
defines += [
"PNG_ARM_NEON_OPT=2",
diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c
new file mode 100644
index 000000000000..703b9ff25053
--- /dev/null
+++ b/third_party/libpng/arm/palette_neon_intrinsics.c
@@ -0,0 +1,137 @@
+/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
+ *
+ * Copyright (c) 2017 The Chromium Authors. All rights reserved.
+ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#if PNG_ARM_NEON_IMPLEMENTATION == 1
+
+#include <arm_neon.h>
+
+/* Build an RGBA palette from the RGB and separate alpha palettes. */
+void
+png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
+{
+ png_const_colorp palette = png_ptr->palette;
+ png_bytep riffled_palette = png_ptr->riffled_palette;
+ png_const_bytep trans_alpha = png_ptr->trans_alpha;
+ int num_trans = png_ptr->num_trans;
+
+ if (row_info->bit_depth != 8) {
+ png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
+ return;
+ }
+
+ /* Initially black, opaque. */
+ uint8x16x4_t w = {{
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0xff),
+ }};
+
+ int i;
+ /* First, riffle the RGB colours into a RGBA palette, the A value is
+ * set to opaque for now. */
+ for (i = 0; i < (1 << row_info->bit_depth); i += 16) {
+ uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
+ w.val[0] = v.val[0];
+ w.val[1] = v.val[1];
+ w.val[2] = v.val[2];
+ vst4q_u8(riffled_palette + (i << 2), w);
+ }
+
+ /* Fix up the missing transparency values. */
+ for (i = 0; i < num_trans; i++) {
+ riffled_palette[(i << 2) + 3] = trans_alpha[i];
+ }
+}
+
+
+/* Expands a palettized row into RGBA. */
+int
+png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
+ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
+{
+
+ png_uint_32 row_width = row_info->width;
+ const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette;
+ const png_int_32 pixels_per_chunk = 4;
+
+ if (row_width < pixels_per_chunk) {
+ return 0;
+ }
+
+ /* This function originally gets the last byte of the output row.
+ The NEON part writes forward from a given position, so we have
+ to seek this back by 4 pixels x 4 bytes. */
+ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
+
+ int i;
+ for (i = 0; i < row_width; i += pixels_per_chunk) {
+ uint32x4_t cur;
+ png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
+ cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
+ cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
+ cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
+ cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
+ vst1q_u32((void *)dp, cur);
+ }
+ if (i != row_width) {
+ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
+ }
+
+ /* Decrement output pointers. */
+ *ssp = *ssp - i;
+ *ddp = *ddp - (i << 2);
+ return i;
+}
+
+/* Expands a palettized row into RGB format. */
+int
+png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info,
+ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
+{
+ png_uint_32 row_width = row_info->width;
+ png_const_bytep palette = (png_const_bytep)png_ptr->palette;
+ const png_uint_32 pixels_per_chunk = 8;
+
+ if (row_width <= pixels_per_chunk) {
+ return 0;
+ }
+
+ /* Seeking this back by 8 pixels x 3 bytes. */
+ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
+
+ int i;
+ for (i = 0; i < row_width; i += pixels_per_chunk) {
+ uint8x8x3_t cur;
+ png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
+ cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
+ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
+ vst3_u8((void *)dp, cur);
+ }
+
+ if (i != row_width) {
+ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
+ }
+
+ /* Decrement output pointers. */
+ *ssp = *ssp - i;
+ *ddp = *ddp - ((i << 1) + i);
+ return i;
+}
+
+#endif /* PNG_ARM_NEON_IMPLEMENTATION */
diff --git a/third_party/libpng/patches/0000-plte.patch b/third_party/libpng/patches/0000-plte.patch
new file mode 100644
index 000000000000..feea4d874335
--- /dev/null
+++ b/third_party/libpng/patches/0000-plte.patch
@@ -0,0 +1,776 @@
+From 5d94e310951211886ee460701176cb36c6e4bc88 Mon Sep 17 00:00:00 2001
+From: Chris Blume <cblume@chromium.org>
+Date: Tue, 16 Jan 2018 21:19:28 +0000
+Subject: [PATCH 1/2] Revert "Revert "libpng: Optimize png_do_expand_palette
+ with NEON.""
+
+This reverts commit 20a18ebc02429a40e4bc33fa28c4de29a39b609c.
+
+Reason for revert: <INSERT REASONING HERE>
+
+Original change's description:
+> Revert "libpng: Optimize png_do_expand_palette with NEON."
+>
+> This reverts commit c4811af6d72836d44a3630beecebb0ff55875ab1.
+>
+> Reason for revert: This is failing to compile on ios-device-xcode-clang bot.
+>
+> https://uberchromegw.corp.google.com/i/chromium.mac/builders/ios-device-xcode-clang/builds/50225
+>
+> Original change's description:
+> > libpng: Optimize png_do_expand_palette with NEON.
+> >
+> > ARM-specific optimization processes 8 or 4 pixels at once.
+> >
+> > * Without transparency: 22% performance gain on the A53 little core.
+> > * With transparency: 10% improvement on a big A72 core, 24% on little.
+> >
+> > (Numbers from image_decode_bench with PNG140 on the elm chromebook).
+> >
+> > Bug: 706134
+> > Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54
+> > Reviewed-on: https://chromium-review.googlesource.com/817116
+> > Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org>
+> > Reviewed-by: Mike Klein <mtklein@chromium.org>
+> > Reviewed-by: Leon Scroggins <scroggo@chromium.org>
+> > Reviewed-by: Chris Blume <cblume@chromium.org>
+> > Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
+> > Cr-Commit-Position: refs/heads/master@{#529473}
+>
+> TBR=scroggo@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org
+>
+> Change-Id: I2cd943e15ceadf4311b1b49a56de00d10684e294
+> No-Presubmit: true
+> No-Tree-Checks: true
+> No-Try: true
+> Bug: 706134
+> Reviewed-on: https://chromium-review.googlesource.com/868770
+> Reviewed-by: Jonathan Ross <jonross@chromium.org>
+> Commit-Queue: Jonathan Ross <jonross@chromium.org>
+> Cr-Commit-Position: refs/heads/master@{#529484}
+
+TBR=scroggo@chromium.org,jonross@chromium.org,cavalcantii@chromium.org,richard.townsend@arm.com,cblume@chromium.org,mtklein@chromium.org
+
+Change-Id: I6baf17d35efbd5c6bc348d4f81264e8e1023be4f
+No-Presubmit: true
+No-Tree-Checks: true
+No-Try: true
+Bug: 706134
+---
+ third_party/libpng/BUILD.gn | 1 +
+ third_party/libpng/arm/palette_neon_intrinsics.c | 137 +++++++++
+ third_party/libpng/patches/0000-plte.patch | 340 +++++++++++++++++++++++
+ third_party/libpng/patches/README | 4 +
+ third_party/libpng/pngpriv.h | 23 ++
+ third_party/libpng/pngrtran.c | 49 +++-
+ third_party/libpng/pngstruct.h | 4 +
+ third_party/libpng/pngwrite.c | 4 +
+ 8 files changed, 554 insertions(+), 8 deletions(-)
+ create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c
+ create mode 100644 third_party/libpng/patches/0000-plte.patch
+ create mode 100644 third_party/libpng/patches/README
+
+diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn
+index e2658a7ba623..e48c790326c7 100644
+--- a/third_party/libpng/BUILD.gn
++++ b/third_party/libpng/BUILD.gn
+@@ -75,6 +75,7 @@ source_set("libpng_sources") {
+ sources += [
+ "arm/arm_init.c",
+ "arm/filter_neon_intrinsics.c",
++ "arm/palette_neon_intrinsics.c",
+ ]
+ defines += [
+ "PNG_ARM_NEON_OPT=2",
+diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c
+new file mode 100644
+index 000000000000..0c0c0a909f8d
+--- /dev/null
++++ b/third_party/libpng/arm/palette_neon_intrinsics.c
+@@ -0,0 +1,137 @@
++/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
++ *
++ * Copyright (c) 2017 The Chromium Authors. All rights reserved.
++ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
++ *
++ * This code is released under the libpng license.
++ * For conditions of distribution and use, see the disclaimer
++ * and license in png.h
++ */
++
++#include "../pngpriv.h"
++
++#if PNG_ARM_NEON_IMPLEMENTATION == 1
++
++#include <arm_neon.h>
++
++/* Build an RGBA palette from the RGB and separate alpha palettes. */
++void
++png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
++{
++ png_const_colorp palette = png_ptr->palette;
++ png_bytep riffled_palette = png_ptr->riffled_palette;
++ png_const_bytep trans_alpha = png_ptr->trans_alpha;
++ int num_trans = png_ptr->num_trans;
++
++ if (row_info->bit_depth != 8) {
++ png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
++ return;
++ }
++
++ /* Initially black, opaque. */
++ uint8x16x4_t w = {
++ vdupq_n_u8(0x00),
++ vdupq_n_u8(0x00),
++ vdupq_n_u8(0x00),
++ vdupq_n_u8(0xff),
++ };
++
++ int i;
++ /* First, riffle the RGB colours into a RGBA palette, the A value is
++ * set to opaque for now. */
++ for (i = 0; i < (1 << row_info->bit_depth); i += 16) {
++ uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
++ w.val[0] = v.val[0];
++ w.val[1] = v.val[1];
++ w.val[2] = v.val[2];
++ vst4q_u8(riffled_palette + (i << 2), w);
++ }
++
++ /* Fix up the missing transparency values. */
++ for (i = 0; i < num_trans; i++) {
++ riffled_palette[(i << 2) + 3] = trans_alpha[i];
++ }
++}
++
++
++/* Expands a palettized row into RGBA. */
++int
++png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
++{
++
++ png_uint_32 row_width = row_info->width;
++ const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette;
++ const png_int_32 pixels_per_chunk = 4;
++
++ if (row_width < pixels_per_chunk) {
++ return 0;
++ }
++
++ /* This function originally gets the last byte of the output row.
++ The NEON part writes forward from a given position, so we have
++ to seek this back by 4 pixels x 4 bytes. */
++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
++
++ int i;
++ for (i = 0; i < row_width; i += pixels_per_chunk) {
++ uint32x4_t cur;
++ png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
++ cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
++ cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
++ cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
++ cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
++ vst1q_u32((void *)dp, cur);
++ }
++ if (i != row_width) {
++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
++ }
++
++ /* Decrement output pointers. */
++ *ssp = *ssp - i;
++ *ddp = *ddp - (i << 2);
++ return i;
++}
++
++/* Expands a palettized row into RGB format. */
++int
++png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info,
++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
++{
++ png_uint_32 row_width = row_info->width;
++ png_const_bytep palette = (png_const_bytep)png_ptr->palette;
++ const png_uint_32 pixels_per_chunk = 8;
++
++ if (row_width <= pixels_per_chunk) {
++ return 0;
++ }
++
++ /* Seeking this back by 8 pixels x 3 bytes. */
++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
++
++ int i;
++ for (i = 0; i < row_width; i += pixels_per_chunk) {
++ uint8x8x3_t cur;
++ png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
++ cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
++ vst3_u8((void *)dp, cur);
++ }
++
++ if (i != row_width) {
++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
++ }
++
++ /* Decrement output pointers. */
++ *ssp = *ssp - i;
++ *ddp = *ddp - ((i << 1) + i);
++ return i;
++}
++
++#endif /* PNG_ARM_NEON_IMPLEMENTATION */
+diff --git a/third_party/libpng/patches/0000-plte.patch b/third_party/libpng/patches/0000-plte.patch
+new file mode 100644
+index 000000000000..6fceb2f2275a
+--- /dev/null
++++ b/third_party/libpng/patches/0000-plte.patch
+@@ -0,0 +1,340 @@
++From aa270a19f7bb9d9cba207b38c0a98cb3a3dc681e Mon Sep 17 00:00:00 2001
++From: Richard Townsend <Richard.Townsend@arm.com>
++Date: Mon, 20 Feb 2017 14:06:14 +0000
++Subject: [PATCH] libpng: Optimize png_do_expand_palette with NEON.
++
++ARM-specific optimization processes 8 or 4 pixels at once.
++
++* Without transparency: 22% performance gain on the A53 little core.
++* With transparency: 10% improvement on a big A72 core, 24% on little.
++
++(Numbers from image_decode_bench with PNG140 on the elm chromebook).
++
++Bug: 706134
++Change-Id: I7b4a93d72a0afa2823f3bf9ff5f798b88c843e54
++---
++ third_party/libpng/BUILD.gn | 1 +
++ third_party/libpng/arm/palette_neon_intrinsics.c | 137 +++++++++++++++++++++++
++ third_party/libpng/pngpriv.h | 23 ++++
++ third_party/libpng/pngrtran.c | 49 ++++++--
++ third_party/libpng/pngstruct.h | 4 +
++ third_party/libpng/pngwrite.c | 4 +
++ 6 files changed, 210 insertions(+), 8 deletions(-)
++ create mode 100644 third_party/libpng/arm/palette_neon_intrinsics.c
++
++diff --git a/third_party/libpng/BUILD.gn b/third_party/libpng/BUILD.gn
++index e2658a7ba623..e48c790326c7 100644
++--- a/third_party/libpng/BUILD.gn
+++++ b/third_party/libpng/BUILD.gn
++@@ -75,6 +75,7 @@ source_set("libpng_sources") {
++ sources += [
++ "arm/arm_init.c",
++ "arm/filter_neon_intrinsics.c",
+++ "arm/palette_neon_intrinsics.c",
++ ]
++ defines += [
++ "PNG_ARM_NEON_OPT=2",
++diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c
++new file mode 100644
++index 000000000000..0c0c0a909f8d
++--- /dev/null
+++++ b/third_party/libpng/arm/palette_neon_intrinsics.c
++@@ -0,0 +1,137 @@
+++/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
+++ *
+++ * Copyright (c) 2017 The Chromium Authors. All rights reserved.
+++ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
+++ *
+++ * This code is released under the libpng license.
+++ * For conditions of distribution and use, see the disclaimer
+++ * and license in png.h
+++ */
+++
+++#include "../pngpriv.h"
+++
+++#if PNG_ARM_NEON_IMPLEMENTATION == 1
+++
+++#include <arm_neon.h>
+++
+++/* Build an RGBA palette from the RGB and separate alpha palettes. */
+++void
+++png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
+++{
+++ png_const_colorp palette = png_ptr->palette;
+++ png_bytep riffled_palette = png_ptr->riffled_palette;
+++ png_const_bytep trans_alpha = png_ptr->trans_alpha;
+++ int num_trans = png_ptr->num_trans;
+++
+++ if (row_info->bit_depth != 8) {
+++ png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
+++ return;
+++ }
+++
+++ /* Initially black, opaque. */
+++ uint8x16x4_t w = {
+++ vdupq_n_u8(0x00),
+++ vdupq_n_u8(0x00),
+++ vdupq_n_u8(0x00),
+++ vdupq_n_u8(0xff),
+++ };
+++
+++ int i;
+++ /* First, riffle the RGB colours into a RGBA palette, the A value is
+++ * set to opaque for now. */
+++ for (i = 0; i < (1 << row_info->bit_depth); i += 16) {
+++ uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
+++ w.val[0] = v.val[0];
+++ w.val[1] = v.val[1];
+++ w.val[2] = v.val[2];
+++ vst4q_u8(riffled_palette + (i << 2), w);
+++ }
+++
+++ /* Fix up the missing transparency values. */
+++ for (i = 0; i < num_trans; i++) {
+++ riffled_palette[(i << 2) + 3] = trans_alpha[i];
+++ }
+++}
+++
+++
+++/* Expands a palettized row into RGBA. */
+++int
+++png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
+++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
+++{
+++
+++ png_uint_32 row_width = row_info->width;
+++ const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->riffled_palette;
+++ const png_int_32 pixels_per_chunk = 4;
+++
+++ if (row_width < pixels_per_chunk) {
+++ return 0;
+++ }
+++
+++ /* This function originally gets the last byte of the output row.
+++ The NEON part writes forward from a given position, so we have
+++ to seek this back by 4 pixels x 4 bytes. */
+++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
+++
+++ int i;
+++ for (i = 0; i < row_width; i += pixels_per_chunk) {
+++ uint32x4_t cur;
+++ png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
+++ cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
+++ cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
+++ cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
+++ cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
+++ vst1q_u32((void *)dp, cur);
+++ }
+++ if (i != row_width) {
+++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
+++ }
+++
+++ /* Decrement output pointers. */
+++ *ssp = *ssp - i;
+++ *ddp = *ddp - (i << 2);
+++ return i;
+++}
+++
+++/* Expands a palettized row into RGB format. */
+++int
+++png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info,
+++ png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
+++{
+++ png_uint_32 row_width = row_info->width;
+++ png_const_bytep palette = (png_const_bytep)png_ptr->palette;
+++ const png_uint_32 pixels_per_chunk = 8;
+++
+++ if (row_width <= pixels_per_chunk) {
+++ return 0;
+++ }
+++
+++ /* Seeking this back by 8 pixels x 3 bytes. */
+++ *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
+++
+++ int i;
+++ for (i = 0; i < row_width; i += pixels_per_chunk) {
+++ uint8x8x3_t cur;
+++ png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
+++ cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
+++ cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
+++ vst3_u8((void *)dp, cur);
+++ }
+++
+++ if (i != row_width) {
+++ i -= pixels_per_chunk; /* Remove the amount that wasn't processed. */
+++ }
+++
+++ /* Decrement output pointers. */
+++ *ssp = *ssp - i;
+++ *ddp = *ddp - ((i << 1) + i);
+++ return i;
+++}
+++
+++#endif /* PNG_ARM_NEON_IMPLEMENTATION */
++diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h
++index 1f2e90f2b37b..5652525b2b51 100644
++--- a/third_party/libpng/pngpriv.h
+++++ b/third_party/libpng/pngpriv.h
++@@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
++ PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
++ png_const_charp key, png_bytep new_key), PNG_EMPTY);
++
+++#if PNG_ARM_NEON_IMPLEMENTATION == 1
+++PNG_INTERNAL_FUNCTION(void,
+++ png_riffle_palette_rgba,
+++ (png_structrp, png_row_infop),
+++ PNG_EMPTY);
+++PNG_INTERNAL_FUNCTION(int,
+++ png_do_expand_palette_neon_rgba,
+++ (png_structrp,
+++ png_row_infop,
+++ png_const_bytep,
+++ const png_bytepp,
+++ const png_bytepp),
+++ PNG_EMPTY);
+++PNG_INTERNAL_FUNCTION(int,
+++ png_do_expand_palette_neon_rgb,
+++ (png_structrp,
+++ png_row_infop,
+++ png_const_bytep,
+++ const png_bytepp,
+++ const png_bytepp),
+++ PNG_EMPTY);
+++#endif
+++
++ /* Maintainer: Put new private prototypes here ^ */
++
++ #include "pngdebug.h"
++diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c
++index c1896503130e..9dd82c929bdc 100644
++--- a/third_party/libpng/pngrtran.c
+++++ b/third_party/libpng/pngrtran.c
++@@ -18,6 +18,13 @@
++
++ #include "pngpriv.h"
++
+++#ifdef PNG_ARM_NEON_IMPLEMENTATION
+++#if PNG_ARM_NEON_IMPLEMENTATION == 1
+++#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
+++#include <arm_neon.h>
+++#endif
+++#endif
+++
++ #ifdef PNG_READ_SUPPORTED
++
++ /* Set the action on getting a CRC error for an ancillary or critical chunk. */
++@@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
++ * upon whether you supply trans and num_trans.
++ */
++ static void
++-png_do_expand_palette(png_row_infop row_info, png_bytep row,
++- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
+++png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
+++ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
+++ int num_trans)
++ {
++ int shift, value;
++ png_bytep sp, dp;
++@@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
++ sp = row + (png_size_t)row_width - 1;
++ dp = row + ((png_size_t)row_width << 2) - 1;
++
++- for (i = 0; i < row_width; i++)
+++ i = 0;
+++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+++ if (png_ptr->riffled_palette != NULL) {
+++ /* The RGBA optimization works with png_ptr->bit_depth == 8
+++ but sometimes row_info->bit_depth has been changed to 8.
+++ In these cases, the palette hasn't been riffled. */
+++ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
+++ }
+++#endif
+++
+++ for (; i < row_width; i++)
++ {
++ if ((int)(*sp) >= num_trans)
++ *dp-- = 0xff;
++-
++ else
++ *dp-- = trans_alpha[*sp];
++-
++ *dp-- = palette[*sp].blue;
++ *dp-- = palette[*sp].green;
++ *dp-- = palette[*sp].red;
++@@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
++ {
++ sp = row + (png_size_t)row_width - 1;
++ dp = row + (png_size_t)(row_width * 3) - 1;
+++ i = 0;
+++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+++ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
+++#endif
++
++- for (i = 0; i < row_width; i++)
+++ for (; i < row_width; i++)
++ {
++ *dp-- = palette[*sp].blue;
++ *dp-- = palette[*sp].green;
++@@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
++ {
++ if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
++ {
++- png_do_expand_palette(row_info, png_ptr->row_buf + 1,
++- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
+++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+++ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
+++ /* Allocate space for the decompressed full palette. */
+++ if (png_ptr->riffled_palette == NULL) {
+++ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
+++ if (png_ptr->riffled_palette == NULL) {
+++ png_error(png_ptr, "NULL row buffer");
+++ }
+++ /* Build the RGBA palette. */
+++ png_riffle_palette_rgba(png_ptr, row_info);
+++ }
+++ }
+++#endif
+++ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
+++ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
++ }
++
++ else
++diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h
++index d83f971253fe..aac88df02d32 100644
++--- a/third_party/libpng/pngstruct.h
+++++ b/third_party/libpng/pngstruct.h
++@@ -228,6 +228,10 @@ struct png_struct_def
++ * big_row_buf; while writing it is separately
++ * allocated.
++ */
+++#ifdef PNG_READ_EXPAND_SUPPORTED
+++ /* Buffer to accelerate palette transformations. */
+++ png_bytep riffled_palette;
+++#endif
++ #ifdef PNG_WRITE_FILTER_SUPPORTED
++ png_bytep try_row; /* buffer to save trial row when filtering */
++ png_bytep tst_row; /* buffer to save best trial row when filtering */
++diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c
++index a16d77ce00c6..e25e5dcfdc18 100644
++--- a/third_party/libpng/pngwrite.c
+++++ b/third_party/libpng/pngwrite.c
++@@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr)
++ png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
++ png_free(png_ptr, png_ptr->row_buf);
++ png_ptr->row_buf = NULL;
+++#ifdef PNG_READ_EXPANDED_SUPPORTED
+++ png_free(png_ptr, png_ptr->riffled_palette);
+++ png_ptr->riffled_palette = NULL;
+++#endif
++ #ifdef PNG_WRITE_FILTER_SUPPORTED
++ png_free(png_ptr, png_ptr->prev_row);
++ png_free(png_ptr, png_ptr->try_row);
++--
++2.15.1
++
+diff --git a/third_party/libpng/patches/README b/third_party/libpng/patches/README
+new file mode 100644
+index 000000000000..786aeabede37
+--- /dev/null
++++ b/third_party/libpng/patches/README
+@@ -0,0 +1,4 @@
++This directory contains patches applied on top of libpng, which haven't been
++upstreamed to the canonical libpng repository [1] yet.
++
++[1] https://github.com/glennrp/libpng
+diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h
+index 1f2e90f2b37b..5652525b2b51 100644
+--- a/third_party/libpng/pngpriv.h
++++ b/third_party/libpng/pngpriv.h
+@@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
+ PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
+ png_const_charp key, png_bytep new_key), PNG_EMPTY);
+
++#if PNG_ARM_NEON_IMPLEMENTATION == 1
++PNG_INTERNAL_FUNCTION(void,
++ png_riffle_palette_rgba,
++ (png_structrp, png_row_infop),
++ PNG_EMPTY);
++PNG_INTERNAL_FUNCTION(int,
++ png_do_expand_palette_neon_rgba,
++ (png_structrp,
++ png_row_infop,
++ png_const_bytep,
++ const png_bytepp,
++ const png_bytepp),
++ PNG_EMPTY);
++PNG_INTERNAL_FUNCTION(int,
++ png_do_expand_palette_neon_rgb,
++ (png_structrp,
++ png_row_infop,
++ png_const_bytep,
++ const png_bytepp,
++ const png_bytepp),
++ PNG_EMPTY);
++#endif
++
+ /* Maintainer: Put new private prototypes here ^ */
+
+ #include "pngdebug.h"
+diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c
+index c1896503130e..9dd82c929bdc 100644
+--- a/third_party/libpng/pngrtran.c
++++ b/third_party/libpng/pngrtran.c
+@@ -18,6 +18,13 @@
+
+ #include "pngpriv.h"
+
++#ifdef PNG_ARM_NEON_IMPLEMENTATION
++#if PNG_ARM_NEON_IMPLEMENTATION == 1
++#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
++#include <arm_neon.h>
++#endif
++#endif
++
+ #ifdef PNG_READ_SUPPORTED
+
+ /* Set the action on getting a CRC error for an ancillary or critical chunk. */
+@@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
+ * upon whether you supply trans and num_trans.
+ */
+ static void
+-png_do_expand_palette(png_row_infop row_info, png_bytep row,
+- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
++png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
++ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
++ int num_trans)
+ {
+ int shift, value;
+ png_bytep sp, dp;
+@@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
+ sp = row + (png_size_t)row_width - 1;
+ dp = row + ((png_size_t)row_width << 2) - 1;
+
+- for (i = 0; i < row_width; i++)
++ i = 0;
++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
++ if (png_ptr->riffled_palette != NULL) {
++ /* The RGBA optimization works with png_ptr->bit_depth == 8
++ but sometimes row_info->bit_depth has been changed to 8.
++ In these cases, the palette hasn't been riffled. */
++ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
++ }
++#endif
++
++ for (; i < row_width; i++)
+ {
+ if ((int)(*sp) >= num_trans)
+ *dp-- = 0xff;
+-
+ else
+ *dp-- = trans_alpha[*sp];
+-
+ *dp-- = palette[*sp].blue;
+ *dp-- = palette[*sp].green;
+ *dp-- = palette[*sp].red;
+@@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
+ {
+ sp = row + (png_size_t)row_width - 1;
+ dp = row + (png_size_t)(row_width * 3) - 1;
++ i = 0;
++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
++ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
++#endif
+
+- for (i = 0; i < row_width; i++)
++ for (; i < row_width; i++)
+ {
+ *dp-- = palette[*sp].blue;
+ *dp-- = palette[*sp].green;
+@@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
+ {
+ if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
+ {
+- png_do_expand_palette(row_info, png_ptr->row_buf + 1,
+- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
++#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
++ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
++ /* Allocate space for the decompressed full palette. */
++ if (png_ptr->riffled_palette == NULL) {
++ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
++ if (png_ptr->riffled_palette == NULL) {
++ png_error(png_ptr, "NULL row buffer");
++ }
++ /* Build the RGBA palette. */
++ png_riffle_palette_rgba(png_ptr, row_info);
++ }
++ }
++#endif
++ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
++ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
+ }
+
+ else
+diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h
+index d83f971253fe..aac88df02d32 100644
+--- a/third_party/libpng/pngstruct.h
++++ b/third_party/libpng/pngstruct.h
+@@ -228,6 +228,10 @@ struct png_struct_def
+ * big_row_buf; while writing it is separately
+ * allocated.
+ */
++#ifdef PNG_READ_EXPAND_SUPPORTED
++ /* Buffer to accelerate palette transformations. */
++ png_bytep riffled_palette;
++#endif
+ #ifdef PNG_WRITE_FILTER_SUPPORTED
+ png_bytep try_row; /* buffer to save trial row when filtering */
+ png_bytep tst_row; /* buffer to save best trial row when filtering */
+diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c
+index a16d77ce00c6..e25e5dcfdc18 100644
+--- a/third_party/libpng/pngwrite.c
++++ b/third_party/libpng/pngwrite.c
+@@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr)
+ png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
+ png_free(png_ptr, png_ptr->row_buf);
+ png_ptr->row_buf = NULL;
++#ifdef PNG_READ_EXPANDED_SUPPORTED
++ png_free(png_ptr, png_ptr->riffled_palette);
++ png_ptr->riffled_palette = NULL;
++#endif
+ #ifdef PNG_WRITE_FILTER_SUPPORTED
+ png_free(png_ptr, png_ptr->prev_row);
+ png_free(png_ptr, png_ptr->try_row);
+--
+2.16.0.rc1.238.g530d649a79-goog
+
+
+From 736276e0ec02b5078ca98cfe6ed0bb3f86524a8b Mon Sep 17 00:00:00 2001
+From: Chris Blume <cblume@google.com>
+Date: Tue, 16 Jan 2018 13:25:38 -0800
+Subject: [PATCH 2/2] Explicitly initialize the member of the variable
+
+---
+ third_party/libpng/arm/palette_neon_intrinsics.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/libpng/arm/palette_neon_intrinsics.c b/third_party/libpng/arm/palette_neon_intrinsics.c
+index 0c0c0a909f8d..703b9ff25053 100644
+--- a/third_party/libpng/arm/palette_neon_intrinsics.c
++++ b/third_party/libpng/arm/palette_neon_intrinsics.c
+@@ -29,12 +29,12 @@ png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
+ }
+
+ /* Initially black, opaque. */
+- uint8x16x4_t w = {
++ uint8x16x4_t w = {{
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0x00),
+ vdupq_n_u8(0xff),
+- };
++ }};
+
+ int i;
+ /* First, riffle the RGB colours into a RGBA palette, the A value is
+--
+2.16.0.rc1.238.g530d649a79-goog
+
diff --git a/third_party/libpng/patches/README b/third_party/libpng/patches/README
new file mode 100644
index 000000000000..786aeabede37
--- /dev/null
+++ b/third_party/libpng/patches/README
@@ -0,0 +1,4 @@
+This directory contains patches applied on top of libpng, which haven't been
+upstreamed to the canonical libpng repository [1] yet.
+
+[1] https://github.com/glennrp/libpng
diff --git a/third_party/libpng/pngpriv.h b/third_party/libpng/pngpriv.h
index 1f2e90f2b37b..5652525b2b51 100644
--- a/third_party/libpng/pngpriv.h
+++ b/third_party/libpng/pngpriv.h
@@ -2108,6 +2108,29 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
png_const_charp key, png_bytep new_key), PNG_EMPTY);
+#if PNG_ARM_NEON_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void,
+ png_riffle_palette_rgba,
+ (png_structrp, png_row_infop),
+ PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(int,
+ png_do_expand_palette_neon_rgba,
+ (png_structrp,
+ png_row_infop,
+ png_const_bytep,
+ const png_bytepp,
+ const png_bytepp),
+ PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(int,
+ png_do_expand_palette_neon_rgb,
+ (png_structrp,
+ png_row_infop,
+ png_const_bytep,
+ const png_bytepp,
+ const png_bytepp),
+ PNG_EMPTY);
+#endif
+
/* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h"
diff --git a/third_party/libpng/pngrtran.c b/third_party/libpng/pngrtran.c
index c1896503130e..9dd82c929bdc 100644
--- a/third_party/libpng/pngrtran.c
+++ b/third_party/libpng/pngrtran.c
@@ -18,6 +18,13 @@
#include "pngpriv.h"
+#ifdef PNG_ARM_NEON_IMPLEMENTATION
+#if PNG_ARM_NEON_IMPLEMENTATION == 1
+#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
+#include <arm_neon.h>
+#endif
+#endif
+
#ifdef PNG_READ_SUPPORTED
/* Set the action on getting a CRC error for an ancillary or critical chunk. */
@@ -4202,8 +4209,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
* upon whether you supply trans and num_trans.
*/
static void
-png_do_expand_palette(png_row_infop row_info, png_bytep row,
- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
+png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
+ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
+ int num_trans)
{
int shift, value;
png_bytep sp, dp;
@@ -4307,14 +4315,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
sp = row + (png_size_t)row_width - 1;
dp = row + ((png_size_t)row_width << 2) - 1;
- for (i = 0; i < row_width; i++)
+ i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ if (png_ptr->riffled_palette != NULL) {
+ /* The RGBA optimization works with png_ptr->bit_depth == 8
+ but sometimes row_info->bit_depth has been changed to 8.
+ In these cases, the palette hasn't been riffled. */
+ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
+ }
+#endif
+
+ for (; i < row_width; i++)
{
if ((int)(*sp) >= num_trans)
*dp-- = 0xff;
-
else
*dp-- = trans_alpha[*sp];
-
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
@@ -4331,8 +4347,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
{
sp = row + (png_size_t)row_width - 1;
dp = row + (png_size_t)(row_width * 3) - 1;
+ i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
+#endif
- for (i = 0; i < row_width; i++)
+ for (; i < row_width; i++)
{
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
@@ -4748,8 +4768,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
{
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
{
- png_do_expand_palette(row_info, png_ptr->row_buf + 1,
- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
+ /* Allocate space for the decompressed full palette. */
+ if (png_ptr->riffled_palette == NULL) {
+ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
+ if (png_ptr->riffled_palette == NULL) {
+ png_error(png_ptr, "NULL row buffer");
+ }
+ /* Build the RGBA palette. */
+ png_riffle_palette_rgba(png_ptr, row_info);
+ }
+ }
+#endif
+ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
+ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
}
else
diff --git a/third_party/libpng/pngstruct.h b/third_party/libpng/pngstruct.h
index d83f971253fe..aac88df02d32 100644
--- a/third_party/libpng/pngstruct.h
+++ b/third_party/libpng/pngstruct.h
@@ -228,6 +228,10 @@ struct png_struct_def
* big_row_buf; while writing it is separately
* allocated.
*/
+#ifdef PNG_READ_EXPAND_SUPPORTED
+ /* Buffer to accelerate palette transformations. */
+ png_bytep riffled_palette;
+#endif
#ifdef PNG_WRITE_FILTER_SUPPORTED
png_bytep try_row; /* buffer to save trial row when filtering */
png_bytep tst_row; /* buffer to save best trial row when filtering */
diff --git a/third_party/libpng/pngwrite.c b/third_party/libpng/pngwrite.c
index a16d77ce00c6..e25e5dcfdc18 100644
--- a/third_party/libpng/pngwrite.c
+++ b/third_party/libpng/pngwrite.c
@@ -948,6 +948,10 @@ png_write_destroy(png_structrp png_ptr)
png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
png_free(png_ptr, png_ptr->row_buf);
png_ptr->row_buf = NULL;
+#ifdef PNG_READ_EXPANDED_SUPPORTED
+ png_free(png_ptr, png_ptr->riffled_palette);
+ png_ptr->riffled_palette = NULL;
+#endif
#ifdef PNG_WRITE_FILTER_SUPPORTED
png_free(png_ptr, png_ptr->prev_row);
png_free(png_ptr, png_ptr->try_row);
--
2.16.0.rc1.238.g530d649a79-goog