mirror of
https://github.com/giongto35/cloud-game.git
synced 2026-01-23 02:34:42 +00:00
Use static libyuv for macs
This commit is contained in:
parent
b1b33713d6
commit
f11cad157b
37 changed files with 45 additions and 10943 deletions
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
|
|
@ -38,7 +38,7 @@ jobs:
|
|||
- name: Get MacOS dev libraries and tools
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: |
|
||||
brew install pkg-config libvpx x264 opus sdl2
|
||||
brew install pkg-config libvpx x264 opus sdl2 jpeg-turbo
|
||||
|
||||
- name: Get Windows dev libraries and tools
|
||||
if: matrix.os == 'windows-latest'
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -38,7 +38,7 @@ test:
|
|||
go test -v ./pkg/...
|
||||
|
||||
verify-cores:
|
||||
go test -run TestAll ./pkg/worker/room -v -renderFrames $(GL_CTX) -outputPath "../../../_rendered"
|
||||
go test -run TestAll ./pkg/worker/room -v -renderFrames $(GL_CTX) -outputPath "./_rendered"
|
||||
|
||||
dev.build: compile build
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ a better sense of performance.
|
|||
apt-get install -y make gcc pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev libyuv-dev
|
||||
|
||||
# MacOS
|
||||
brew install pkg-config libvpx x264 opus sdl2
|
||||
brew install pkg-config libvpx x264 opus sdl2 jpeg-turbo
|
||||
|
||||
# Windows (MSYS2)
|
||||
pacman -Sy --noconfirm --needed git make mingw-w64-x86_64-{gcc,pkgconf,dlfcn,libvpx,opus,x264-git,SDL2,libyuv}
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of Google nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
|
||||
#include <stddef.h> // For size_t and NULL
|
||||
|
||||
#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
|
||||
#define INT_TYPES_DEFINED
|
||||
|
||||
#include <stdint.h> // for uintptr_t and C99 types
|
||||
|
||||
#endif // INT_TYPES_DEFINED
|
||||
|
||||
#if !defined(LIBYUV_API)
|
||||
#define LIBYUV_API
|
||||
#endif // LIBYUV_API
|
||||
|
||||
#define LIBYUV_BOOL int
|
||||
|
||||
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
|
|
@ -1,336 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "convert.h"
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "cpu_id.h"
|
||||
#include "planar_functions.h"
|
||||
#include "row.h"
|
||||
|
||||
// Subsample amount uses a shift.
|
||||
// v is value
|
||||
// a is amount to add to round up
|
||||
// s is shift to subsample down
|
||||
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
|
||||
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
|
||||
// Copy I420 with optional flipping.
|
||||
// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
|
||||
// is does row coalescing.
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
// Copy UV planes.
|
||||
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
|
||||
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t *src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVRow)(const uint8_t *src_argb0, int src_stride_argb,
|
||||
uint8_t *dst_u, uint8_t *dst_v, int width) =
|
||||
ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8_t *src_argb, uint8_t *dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ABGR to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8_t *src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ABGRToUVRow)(const uint8_t *src_abgr0, int src_stride_abgr,
|
||||
uint8_t *dst_u, uint8_t *dst_v, int width) =
|
||||
ABGRToUVRow_C;
|
||||
void (*ABGRToYRow)(const uint8_t *src_abgr, uint8_t *dst_y, int width) =
|
||||
ABGRToYRow_C;
|
||||
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
|
||||
src_stride_abgr = -src_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_ABGRTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToUVRow = ABGRToUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
|
||||
src_abgr += src_stride_abgr * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert RGB565 to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8_t *src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*RGB565ToARGBRow)(const uint8_t *src_rgb, uint8_t *dst_argb,
|
||||
int width) = RGB565ToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8_t *src_argb0, int src_stride_argb,
|
||||
uint8_t *dst_u, uint8_t *dst_v, int width) =
|
||||
ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8_t *src_argb, uint8_t *dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
|
||||
src_stride_rgb565 = -src_stride_rgb565;
|
||||
}
|
||||
|
||||
#if defined(HAS_RGB565TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
#if !(defined(HAS_RGB565TOYROW_NEON))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int row_size = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, row_size * 2);
|
||||
#endif
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RGB565TOYROW_NEON))
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
|
||||
ARGBToUVRow(row, row_size, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb565 += src_stride_rgb565 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RGB565TOYROW_NEON))
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RGB565TOYROW_NEON))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_H_
|
||||
#define INCLUDE_LIBYUV_CONVERT_H_
|
||||
|
||||
#include "rotate.h" // For enum RotationMode.
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t *src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8_t *src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8_t *src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// "src_size" is needed to parse MJPG.
|
||||
// "dst_stride_y" number of bytes in a row of the dst_y plane.
|
||||
// Normally this would be the same as dst_width, with recommended alignment
|
||||
// to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected. The caller should
|
||||
// allocate the I420 buffer according to rotation.
|
||||
// "dst_stride_u" number of bytes in a row of the dst_u plane.
|
||||
// Normally this would be the same as (dst_width + 1) / 2, with
|
||||
// recommended alignment to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected.
|
||||
// "crop_x" and "crop_y" are starting position for cropping.
|
||||
// To center, crop_x = (src_width - dst_width) / 2
|
||||
// crop_y = (src_height - dst_height) / 2
|
||||
// "src_width" / "src_height" is size of src_frame in pixels.
|
||||
// "src_height" can be negative indicating a vertically flipped image source.
|
||||
// "crop_width" / "crop_height" is the size to crop the src to.
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8_t *sample,
|
||||
size_t sample_size,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32_t fourcc);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_H_
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_
|
||||
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
// Conversion matrix for YVU to BGR
|
||||
LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601
|
||||
LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // BT.601 full
|
||||
LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709
|
||||
LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full
|
||||
LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
|
||||
LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_
|
||||
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "convert.h"
|
||||
#include "video_common.h"
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8_t *sample,
|
||||
size_t sample_size,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
const uint8_t *src;
|
||||
// TODO(nisse): Why allow crop_height < 0?
|
||||
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
|
||||
format != FOURCC_NV21 && format != FOURCC_YV12) ||
|
||||
dst_y == sample;
|
||||
uint8_t *tmp_y = dst_y;
|
||||
uint8_t *tmp_u = dst_u;
|
||||
uint8_t *tmp_v = dst_v;
|
||||
int tmp_y_stride = dst_stride_y;
|
||||
int tmp_u_stride = dst_stride_u;
|
||||
int tmp_v_stride = dst_stride_v;
|
||||
uint8_t *rotate_buffer = NULL;
|
||||
const int inv_crop_height =
|
||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||
|
||||
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
|
||||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination dst_y is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8_t *) malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
dst_y = rotate_buffer;
|
||||
dst_u = dst_y + y_size;
|
||||
dst_v = dst_u + uv_size;
|
||||
dst_stride_y = crop_width;
|
||||
dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
default:
|
||||
r = -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride, crop_width, abs_crop_height,
|
||||
rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
|
@ -1,204 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "cpu_id.h"
|
||||
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
|
||||
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
|
||||
// For ArmCpuCaps() but unittested on all platforms
|
||||
#include <stdio.h> // For fopen()
|
||||
#include <string.h>
|
||||
|
||||
// For functions that use the stack and have runtime checks for overflow,
|
||||
// use SAFEBUFFERS to avoid additional check.
|
||||
#define SAFEBUFFERS
|
||||
|
||||
// cpu_info_ variable for SIMD instruction sets detected.
|
||||
LIBYUV_API int cpu_info_ = 0;
|
||||
|
||||
// Low level cpuid for X86.
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER)
|
||||
LIBYUV_API
|
||||
void CpuId(int info_eax, int info_ecx, int *cpu_info) {
|
||||
#if defined(_MSC_VER)
|
||||
// GCC version uses inline x86 assembly.
|
||||
#else // defined(_MSC_VER)
|
||||
int info_ebx, info_edx;
|
||||
asm volatile(
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D"(info_ebx),
|
||||
#else
|
||||
"cpuid \n"
|
||||
: "=b"(info_ebx),
|
||||
#endif // defined( __i386__) && defined(__PIC__)
|
||||
"+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
|
||||
cpu_info[0] = info_eax;
|
||||
cpu_info[1] = info_ebx;
|
||||
cpu_info[2] = info_ecx;
|
||||
cpu_info[3] = info_edx;
|
||||
#endif // defined(_MSC_VER)
|
||||
}
|
||||
|
||||
#else // (defined(_M_IX86) || defined(_M_X64) ...
|
||||
LIBYUV_API
|
||||
void CpuId(int eax, int ecx, int* cpu_info) {
|
||||
(void)eax;
|
||||
(void)ecx;
|
||||
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// For VS2010 and earlier emit can be used:
|
||||
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
|
||||
// __asm {
|
||||
// xor ecx, ecx // xcr 0
|
||||
// xgetbv
|
||||
// mov xcr0, eax
|
||||
// }
|
||||
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=529
|
||||
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", off)
|
||||
#endif
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
|
||||
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
static int GetXCR0() {
|
||||
int xcr0 = 0;
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
|
||||
#endif // defined(__i386__) || defined(__x86_64__)
|
||||
return xcr0;
|
||||
}
|
||||
|
||||
#else
|
||||
// xgetbv unavailable to query for OSSave support. Return 0.
|
||||
#define GetXCR0() 0
|
||||
#endif // defined(_M_IX86) || defined(_M_X64) ..
|
||||
// Return optimization to previous setting.
|
||||
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", on)
|
||||
#endif
|
||||
|
||||
// Based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char *cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE *f = fopen(cpuinfo_name, "re");
|
||||
if (!f) {
|
||||
// Assume Neon if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
|
||||
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
|
||||
char *p = strstr(cpuinfo_line, " neon");
|
||||
if (p && (p[5] == ' ' || p[5] == '\n')) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
// aarch64 uses asimd for Neon.
|
||||
p = strstr(cpuinfo_line, " asimd");
|
||||
if (p) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
int cpu_info = 0;
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86))
|
||||
int cpu_info0[4] = {0, 0, 0, 0};
|
||||
int cpu_info1[4] = {0, 0, 0, 0};
|
||||
int cpu_info7[4] = {0, 0, 0, 0};
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
|
||||
|
||||
// AVX requires OS saves YMM registers.
|
||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
|
||||
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
|
||||
|
||||
// Detect AVX512bw
|
||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
|
||||
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
|
||||
// flag in it.
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
#else
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
#endif
|
||||
cpu_info |= kCpuHasARM;
|
||||
#endif // __arm__
|
||||
cpu_info |= kCpuInitialized;
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
// Note that use of this function is not thread safe.
|
||||
LIBYUV_API
|
||||
int MaskCpuFlags(int enable_flags) {
|
||||
int cpu_info = GetCpuFlags() & enable_flags;
|
||||
SetCpuFlags(cpu_info);
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void) {
|
||||
return MaskCpuFlags(-1);
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
|
||||
#define INCLUDE_LIBYUV_CPU_ID_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
// Internal flag to indicate cpuid requires initialization.
|
||||
static const int kCpuInitialized = 0x1;
|
||||
|
||||
// These flags are only valid on ARM processors.
|
||||
static const int kCpuHasARM = 0x2;
|
||||
static const int kCpuHasNEON = 0x4;
|
||||
// 0x8 reserved for future ARM flag.
|
||||
|
||||
// These flags are only valid on x86 processors.
|
||||
static const int kCpuHasX86 = 0x10;
|
||||
static const int kCpuHasSSE2 = 0x20;
|
||||
static const int kCpuHasSSSE3 = 0x40;
|
||||
static const int kCpuHasSSE41 = 0x80;
|
||||
static const int kCpuHasSSE42 = 0x100; // unused at this time.
|
||||
static const int kCpuHasAVX = 0x200;
|
||||
static const int kCpuHasAVX2 = 0x400;
|
||||
static const int kCpuHasERMS = 0x800;
|
||||
static const int kCpuHasFMA3 = 0x1000;
|
||||
static const int kCpuHasF16C = 0x2000;
|
||||
static const int kCpuHasGFNI = 0x4000;
|
||||
static const int kCpuHasAVX512BW = 0x8000;
|
||||
static const int kCpuHasAVX512VL = 0x10000;
|
||||
static const int kCpuHasAVX512VNNI = 0x20000;
|
||||
static const int kCpuHasAVX512VBMI = 0x40000;
|
||||
static const int kCpuHasAVX512VBMI2 = 0x80000;
|
||||
static const int kCpuHasAVX512VBITALG = 0x100000;
|
||||
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
|
||||
|
||||
// Optional init function. TestCpuFlag does an auto-init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void);
|
||||
|
||||
// Detect CPU has SSE2 etc.
|
||||
// Test_flag parameter should be one of kCpuHas constants above.
|
||||
// Returns non-zero if instruction set is detected
|
||||
static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
|
||||
#else
|
||||
int cpu_info = cpu_info_;
|
||||
#endif
|
||||
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
|
||||
}
|
||||
|
||||
// Internal function for parsing /proc/cpuinfo.
|
||||
LIBYUV_API
|
||||
int ArmCpuCaps(const char *cpuinfo_name);
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
|
||||
// MaskCpuFlags(1) to disable all cpu specific optimizations.
|
||||
// MaskCpuFlags(0) to reset state so next call will auto init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
int MaskCpuFlags(int enable_flags);
|
||||
|
||||
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
|
||||
// should be a valid combination of the kCpuHas constants above and include
|
||||
// kCpuInitialized. Use this method when running in a sandboxed process where
|
||||
// the detection code might fail (as it might access /proc/cpuinfo). In such
|
||||
// cases the cpu_info can be obtained from a non sandboxed process by calling
|
||||
// InitCpuFlags() and passed to the sandboxed process (via command line
|
||||
// parameters, IPC...) which can then call this method to initialize the CPU
|
||||
// flags.
|
||||
// Notes:
|
||||
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
|
||||
// again.
|
||||
// - enabling CPU features that are not supported by the CPU will result in
|
||||
// undefined behavior.
|
||||
// TODO(fbarchard): consider writing a helper function that translates from
|
||||
// other library CPU info to libyuv CPU info and add a .md doc that explains
|
||||
// CPU detection.
|
||||
static __inline void SetCpuFlags(int cpu_flags) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
|
||||
#else
|
||||
cpu_info_ = cpu_flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Low level cpuid for X86. Returns zeros on other CPUs.
|
||||
// eax is the info type that you want.
|
||||
// ecx is typically the cpu number, and should normally be zero.
|
||||
LIBYUV_API
|
||||
void CpuId(int info_eax, int info_ecx, int *cpu_info);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CPU_ID_H_
|
||||
|
|
@ -1,18 +1,43 @@
|
|||
//go:build !darwin && !no_libyuv
|
||||
|
||||
// Package libyuv contains the wrapper for: https://chromium.googlesource.com/libyuv/libyuv.
|
||||
// Libs are downloaded from: https://packages.macports.org/libyuv/.
|
||||
package libyuv
|
||||
|
||||
// see: https://chromium.googlesource.com/libyuv/libyuv
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -Wall
|
||||
#cgo LDFLAGS: -lyuv
|
||||
#cgo !darwin LDFLAGS: -lyuv
|
||||
|
||||
#include <stdlib.h>
|
||||
#cgo darwin CFLAGS: -DINCLUDE_LIBYUV_VERSION_H_
|
||||
#cgo darwin LDFLAGS: -L${SRCDIR} -lstdc++
|
||||
#cgo darwin,amd64 LDFLAGS: -lyuv_darwin_x86_64 -ljpeg -lstdc++
|
||||
#cgo darwin,arm64 LDFLAGS: -lyuv_darwin_arm64 -ljpeg -lstdc++
|
||||
|
||||
#include <stdint.h> // for uintptr_t and C99 types
|
||||
|
||||
#if !defined(LIBYUV_API)
|
||||
#define LIBYUV_API
|
||||
#endif // LIBYUV_API
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#include "libyuv/version.h"
|
||||
#include "libyuv/video_common.h"
|
||||
#else
|
||||
#define LIBYUV_VERSION 1874 // darwin static libs version
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define FOURCC(a, b, c, d) \
|
||||
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
|
||||
|
||||
enum FourCC {
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_ANY = -1,
|
||||
};
|
||||
|
||||
//
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
|
|
@ -20,7 +45,6 @@ typedef enum RotationMode {
|
|||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
} RotationModeEnum;
|
||||
|
||||
//
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
|
|
@ -65,6 +89,11 @@ int I420Scale(const uint8_t *src_y,
|
|||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
*/
|
||||
import "C"
|
||||
import "fmt"
|
||||
|
|
|
|||
|
|
@ -1,89 +0,0 @@
|
|||
//go:build darwin || no_libyuv
|
||||
|
||||
package libyuv
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -Wall
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "version.h"
|
||||
#include "video_common.h"
|
||||
#include "rotate.h"
|
||||
#include "scale.h"
|
||||
#include "convert.h"
|
||||
|
||||
*/
|
||||
import "C"
|
||||
import "fmt"
|
||||
|
||||
const FourccRgbp uint32 = C.FOURCC_RGBP
|
||||
const FourccArgb uint32 = C.FOURCC_ARGB
|
||||
const FourccAbgr uint32 = C.FOURCC_ABGR
|
||||
|
||||
func Y420(src []byte, dst []byte, _, h, stride int, dw, dh int, rot uint, pix uint32, cx, cy int) {
|
||||
cw := (dw + 1) / 2
|
||||
ch := (dh + 1) / 2
|
||||
i0 := dw * dh
|
||||
i1 := i0 + cw*ch
|
||||
yStride := dw
|
||||
cStride := cw
|
||||
|
||||
C.ConvertToI420(
|
||||
(*C.uchar)(&src[0]),
|
||||
C.size_t(0),
|
||||
(*C.uchar)(&dst[0]),
|
||||
C.int(yStride),
|
||||
(*C.uchar)(&dst[i0]),
|
||||
C.int(cStride),
|
||||
(*C.uchar)(&dst[i1]),
|
||||
C.int(cStride),
|
||||
C.int(0),
|
||||
C.int(0),
|
||||
C.int(stride),
|
||||
C.int(h),
|
||||
C.int(cx),
|
||||
C.int(cy),
|
||||
C.enum_RotationMode(rot),
|
||||
C.uint32_t(pix))
|
||||
}
|
||||
|
||||
func Y420Scale(src []byte, dst []byte, w, h int, dw, dh int) {
|
||||
srcWidthUV, dstWidthUV := (w+1)>>1, (dw+1)>>1
|
||||
srcHeightUV, dstHeightUV := (h+1)>>1, (dh+1)>>1
|
||||
|
||||
srcYPlaneSize, dstYPlaneSize := w*h, dw*dh
|
||||
srcUVPlaneSize, dstUVPlaneSize := srcWidthUV*srcHeightUV, dstWidthUV*dstHeightUV
|
||||
|
||||
srcStrideY, dstStrideY := w, dw
|
||||
srcStrideU, dstStrideU := srcWidthUV, dstWidthUV
|
||||
srcStrideV, dstStrideV := srcWidthUV, dstWidthUV
|
||||
|
||||
srcY := (*C.uchar)(&src[0])
|
||||
srcU := (*C.uchar)(&src[srcYPlaneSize])
|
||||
srcV := (*C.uchar)(&src[srcYPlaneSize+srcUVPlaneSize])
|
||||
|
||||
dstY := (*C.uchar)(&dst[0])
|
||||
dstU := (*C.uchar)(&dst[dstYPlaneSize])
|
||||
dstV := (*C.uchar)(&dst[dstYPlaneSize+dstUVPlaneSize])
|
||||
|
||||
C.I420Scale(
|
||||
srcY,
|
||||
C.int(srcStrideY),
|
||||
srcU,
|
||||
C.int(srcStrideU),
|
||||
srcV,
|
||||
C.int(srcStrideV),
|
||||
C.int(w),
|
||||
C.int(h),
|
||||
dstY,
|
||||
C.int(dstStrideY),
|
||||
dstU,
|
||||
C.int(dstStrideU),
|
||||
dstV,
|
||||
C.int(dstStrideV),
|
||||
C.int(dw),
|
||||
C.int(dh),
|
||||
C.enum_FilterMode(C.kFilterNone))
|
||||
}
|
||||
|
||||
func Version() string { return fmt.Sprintf("%v mod", int(C.LIBYUV_VERSION)) }
|
||||
BIN
pkg/encoder/yuv/libyuv/libyuv_darwin_arm64.a
Normal file
BIN
pkg/encoder/yuv/libyuv/libyuv_darwin_arm64.a
Normal file
Binary file not shown.
BIN
pkg/encoder/yuv/libyuv/libyuv_darwin_x86_64.a
Normal file
BIN
pkg/encoder/yuv/libyuv/libyuv_darwin_x86_64.a
Normal file
Binary file not shown.
|
|
@ -1,68 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "planar_functions.h"
|
||||
|
||||
#include "cpu_id.h"
|
||||
#include "row.h"
|
||||
|
||||
// Copy a plane of data
|
||||
LIBYUV_API
|
||||
void CopyPlane(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*CopyRow)(const uint8_t *src, uint8_t *dst, int width) = CopyRow_C;
|
||||
if (width <= 0 || height == 0) {
|
||||
return;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width && dst_stride_y == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = dst_stride_y = 0;
|
||||
}
|
||||
// Nothing to do.
|
||||
if (src_y == dst_y && src_stride_y == dst_stride_y) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_AVX)
|
||||
if (TestCpuFlag(kCpuHasAVX)) {
|
||||
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_ERMS)
|
||||
if (TestCpuFlag(kCpuHasERMS)) {
|
||||
CopyRow = CopyRow_ERMS;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Copy plane
|
||||
for (y = 0; y < height; ++y) {
|
||||
CopyRow(src_y, dst_y, width);
|
||||
src_y += src_stride_y;
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
|
||||
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
// TODO(fbarchard): Move cpu macros to row.h
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#endif
|
||||
|
||||
// Copy a plane of data.
|
||||
LIBYUV_API
|
||||
void CopyPlane(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rotate.h"
|
||||
|
||||
#include "convert.h"
|
||||
#include "cpu_id.h"
|
||||
#include "rotate_row.h"
|
||||
#include "row.h"
|
||||
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
|
||||
void (*TransposeWx8)(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int width) = TransposeWx8_C;
|
||||
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
TransposeWx8 = TransposeWx8_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
TransposeWx8 = TransposeWx8_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Work across the source in 8x8 tiles
|
||||
while (i >= 8) {
|
||||
TransposeWx8(src, src_stride, dst, dst_stride, width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
|
||||
}
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a transpose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
TransposePlane(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a transpose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
dst += dst_stride * (width - 1);
|
||||
dst_stride = -dst_stride;
|
||||
TransposePlane(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap top and bottom row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width);
|
||||
const uint8_t *src_bot = src + src_stride * (height - 1);
|
||||
uint8_t *dst_bot = dst + dst_stride * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
void (*MirrorRow)(const uint8_t *src, uint8_t *dst, int width) = MirrorRow_C;
|
||||
void (*CopyRow)(const uint8_t *src, uint8_t *dst, int width) = CopyRow_C;
|
||||
#if defined(HAS_MIRRORROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
MirrorRow = MirrorRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
MirrorRow = MirrorRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MirrorRow = MirrorRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
MirrorRow = MirrorRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_AVX)
|
||||
if (TestCpuFlag(kCpuHasAVX)) {
|
||||
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_ERMS)
|
||||
if (TestCpuFlag(kCpuHasERMS)) {
|
||||
CopyRow = CopyRow_ERMS;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_NEON)
|
||||
#endif
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
CopyRow(src, row, width); // Copy top row into buffer
|
||||
MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
|
||||
MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
dst_bot -= dst_stride;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || width <= 0 || height == 0 ||
|
||||
!dst_y || !dst_u || !dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_H_
|
||||
#define INCLUDE_LIBYUV_ROTATE_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
// Supported rotation.
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate180 = 180, // Rotate 180 degrees.
|
||||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
} RotationModeEnum;
|
||||
|
||||
// Rotate I420 frame.
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate planes by 90, 180, 270. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// The 90 and 270 functions are based on transposes.
|
||||
// Doing a transpose with reversing the read/write
|
||||
// order will result in a rotation by +- 90 degrees.
|
||||
// Deprecated.
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_H_
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rotate_row.h"
|
||||
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
|
||||
int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEWX8_SSSE3
|
||||
|
||||
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
|
||||
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
|
||||
|
||||
#endif
|
||||
#undef TANY
|
||||
|
||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
|
||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
|
||||
int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
|
||||
dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEUVWX8_SSE2
|
||||
|
||||
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
|
||||
|
||||
#endif
|
||||
#undef TUVANY
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rotate_row.h"
|
||||
|
||||
void TransposeWx8_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
dst[1] = src[1 * src_stride];
|
||||
dst[2] = src[2 * src_stride];
|
||||
dst[3] = src[3 * src_stride];
|
||||
dst[4] = src[4 * src_stride];
|
||||
dst[5] = src[5 * src_stride];
|
||||
dst[6] = src[6 * src_stride];
|
||||
dst[7] = src[7 * src_stride];
|
||||
++src;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t *dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
dst_b[0] = src[0 * src_stride + 1];
|
||||
dst_a[1] = src[1 * src_stride + 0];
|
||||
dst_b[1] = src[1 * src_stride + 1];
|
||||
dst_a[2] = src[2 * src_stride + 0];
|
||||
dst_b[2] = src[2 * src_stride + 1];
|
||||
dst_a[3] = src[3 * src_stride + 0];
|
||||
dst_b[3] = src[3 * src_stride + 1];
|
||||
dst_a[4] = src[4 * src_stride + 0];
|
||||
dst_b[4] = src[4 * src_stride + 1];
|
||||
dst_a[5] = src[5 * src_stride + 0];
|
||||
dst_b[5] = src[5 * src_stride + 1];
|
||||
dst_a[6] = src[6 * src_stride + 0];
|
||||
dst_b[6] = src[6 * src_stride + 1];
|
||||
dst_a[7] = src[7 * src_stride + 0];
|
||||
dst_b[7] = src[7 * src_stride + 1];
|
||||
src += 2;
|
||||
dst_a += dst_stride_a;
|
||||
dst_b += dst_stride_b;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst[i * dst_stride + j] = src[j * src_stride + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,370 +0,0 @@
|
|||
/*
|
||||
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rotate_row.h"
|
||||
#include "row.h"
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
void TransposeWx8_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t) (src_stride)), // %3
|
||||
"r"((intptr_t) (dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
|
||||
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
// Transpose 16x8. 64 bit
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t) (src_stride)), // %3
|
||||
"r"((intptr_t) (dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
|
||||
"xmm15");
|
||||
}
|
||||
|
||||
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
// Transpose UV 8x8. 64 bit.
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
|
||||
void TransposeUVWx8_SSE2(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t *dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t) (src_stride)), // %4
|
||||
"r"((intptr_t) (dst_stride_a)), // %5
|
||||
"r"((intptr_t) (dst_stride_b)) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9");
|
||||
}
|
||||
|
||||
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
#define INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// The following are available for GCC 32 or 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for 64 bit GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
|
||||
#define HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
void TransposeWxH_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_Any_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_C(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t *dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_SSE2(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t *dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8_t *src,
|
||||
int src_stride,
|
||||
uint8_t *dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t *dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
|
|
@ -1,426 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROW_H_
|
||||
#define INCLUDE_LIBYUV_ROW_H_
|
||||
|
||||
#include <stddef.h> // For NULL
|
||||
#include <stdlib.h> // For malloc
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
#define GCC_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
// Conversions:
|
||||
#define HAS_ABGRTOYROW_SSSE3
|
||||
#define HAS_ARGBTOYROW_SSSE3
|
||||
#define HAS_BGRATOYROW_SSSE3
|
||||
#define HAS_COPYROW_ERMS
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_INTERPOLATEROW_SSSE3
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MIRRORSPLITUVROW_SSSE3
|
||||
#if !defined(LIBYUV_BIT_EXACT)
|
||||
#define HAS_ABGRTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#endif
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBGRAYROW_SSSE3
|
||||
#define HAS_ARGBMIRRORROW_SSE2
|
||||
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, but
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
|
||||
defined(GCC_HAS_AVX2))
|
||||
#define HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||
#define HAS_ARGBMIRRORROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_INTERPOLATEROW_AVX2
|
||||
#define HAS_MIRRORROW_AVX2
|
||||
#if !defined(LIBYUV_BIT_EXACT)
|
||||
#define HAS_ARGBTOUVROW_AVX2
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// The following are available for gcc/clang x86 platforms:
|
||||
// TODO(fbarchard): Port to Visual C
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_MIRRORUVROW_SSSE3
|
||||
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 gcc/clang x86 platforms:
|
||||
// TODO(fbarchard): Port to Visual C
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || defined(__i386__)) && \
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_ABGRTOYROW_AVX2
|
||||
#define HAS_MIRRORUVROW_AVX2
|
||||
#if !defined(LIBYUV_BIT_EXACT)
|
||||
#define HAS_ABGRTOUVROW_AVX2
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
#if defined(VISUALC_HAS_AVX2)
|
||||
#define SIMD_ALIGNED(var) __declspec(align(32)) var
|
||||
#else
|
||||
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
||||
#endif
|
||||
#define LIBYUV_NOINLINE __declspec(noinline)
|
||||
typedef __declspec(align(16)) int16_t vec16[8];
|
||||
typedef __declspec(align(16)) int32_t vec32[4];
|
||||
typedef __declspec(align(16)) float vecf32[4];
|
||||
typedef __declspec(align(16)) int8_t vec8[16];
|
||||
typedef __declspec(align(16)) uint16_t uvec16[8];
|
||||
typedef __declspec(align(16)) uint32_t uvec32[4];
|
||||
typedef __declspec(align(16)) uint8_t uvec8[16];
|
||||
typedef __declspec(align(32)) int16_t lvec16[16];
|
||||
typedef __declspec(align(32)) int32_t lvec32[8];
|
||||
typedef __declspec(align(32)) int8_t lvec8[32];
|
||||
typedef __declspec(align(32)) uint16_t ulvec16[16];
|
||||
typedef __declspec(align(32)) uint32_t ulvec32[8];
|
||||
typedef __declspec(align(32)) uint8_t ulvec8[32];
|
||||
#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
|
||||
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
|
||||
#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
|
||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
|
||||
#else
|
||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
||||
#endif
|
||||
#define LIBYUV_NOINLINE __attribute__((noinline))
|
||||
typedef int16_t __attribute__((vector_size(16))) vec16;
|
||||
typedef int32_t __attribute__((vector_size(16))) vec32;
|
||||
typedef float __attribute__((vector_size(16))) vecf32;
|
||||
typedef int8_t __attribute__((vector_size(16))) vec8;
|
||||
typedef uint16_t __attribute__((vector_size(16))) uvec16;
|
||||
typedef uint32_t __attribute__((vector_size(16))) uvec32;
|
||||
typedef uint8_t __attribute__((vector_size(16))) uvec8;
|
||||
typedef int16_t __attribute__((vector_size(32))) lvec16;
|
||||
typedef int32_t __attribute__((vector_size(32))) lvec32;
|
||||
typedef int8_t __attribute__((vector_size(32))) lvec8;
|
||||
typedef uint16_t __attribute__((vector_size(32))) ulvec16;
|
||||
typedef uint32_t __attribute__((vector_size(32))) ulvec32;
|
||||
typedef uint8_t __attribute__((vector_size(32))) ulvec8;
|
||||
#else
|
||||
#define SIMD_ALIGNED(var) var
|
||||
#define LIBYUV_NOINLINE
|
||||
typedef int16_t vec16[8];
|
||||
typedef int32_t vec32[4];
|
||||
typedef float vecf32[4];
|
||||
typedef int8_t vec8[16];
|
||||
typedef uint16_t uvec16[8];
|
||||
typedef uint32_t uvec32[4];
|
||||
typedef uint8_t uvec8[16];
|
||||
typedef int16_t lvec16[16];
|
||||
typedef int32_t lvec32[8];
|
||||
typedef int8_t lvec8[32];
|
||||
typedef uint16_t ulvec16[16];
|
||||
typedef uint32_t ulvec32[8];
|
||||
typedef uint8_t ulvec8[32];
|
||||
#endif
|
||||
|
||||
#if !defined(__aarch64__) || !defined(__arm__)
|
||||
// This struct is for Intel color conversion.
|
||||
struct YuvConstants {
|
||||
uint8_t kUVToB[32];
|
||||
uint8_t kUVToG[32];
|
||||
uint8_t kUVToR[32];
|
||||
int16_t kYToRgb[16];
|
||||
int16_t kYBiasToRgb[16];
|
||||
};
|
||||
#endif
|
||||
|
||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
|
||||
|
||||
#define align_buffer_64(var, size) \
|
||||
void* var##_mem = malloc((size) + 63); /* NOLINT */ \
|
||||
uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
|
||||
|
||||
#define free_aligned_buffer_64(var) \
|
||||
free(var##_mem); \
|
||||
var = NULL
|
||||
|
||||
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
|
||||
#define OMITFP
|
||||
#else
|
||||
#define OMITFP __attribute__((optimize("omit-frame-pointer")))
|
||||
#endif
|
||||
|
||||
// NaCL macros for GCC x86 and x64.
|
||||
#if defined(__native_client__)
|
||||
#define LABELALIGN ".p2align 5\n"
|
||||
#else
|
||||
#define LABELALIGN
|
||||
#endif
|
||||
|
||||
void ARGBToYRow_AVX2(const uint8_t *src_argb, uint8_t *dst_y, int width);
|
||||
|
||||
void ARGBToYRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void ABGRToYRow_AVX2(const uint8_t *src_abgr, uint8_t *dst_y, int width);
|
||||
|
||||
void ABGRToYRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void ARGBToYRow_SSSE3(const uint8_t *src_argb, uint8_t *dst_y, int width);
|
||||
|
||||
void ABGRToYRow_SSSE3(const uint8_t *src_abgr, uint8_t *dst_y, int width);
|
||||
|
||||
void BGRAToYRow_SSSE3(const uint8_t *src_bgra, uint8_t *dst_y, int width);
|
||||
|
||||
void ABGRToYRow_SSSE3(const uint8_t *src_abgr, uint8_t *dst_y, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8_t *src_rgb, uint8_t *dst_y, int width);
|
||||
|
||||
void ABGRToYRow_C(const uint8_t *src_rgb, uint8_t *dst_y, int width);
|
||||
|
||||
void RGB565ToYRow_C(const uint8_t *src_rgb565, uint8_t *dst_y, int width);
|
||||
|
||||
void ARGBToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void BGRAToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void ABGRToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void ARGBToUVRow_AVX2(const uint8_t *src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ABGRToUVRow_AVX2(const uint8_t *src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_SSSE3(const uint8_t *src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void BGRAToUVRow_SSSE3(const uint8_t *src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ABGRToUVRow_SSSE3(const uint8_t *src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void RGBAToUVRow_SSSE3(const uint8_t *src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_Any_AVX2(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ABGRToUVRow_Any_AVX2(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_Any_SSSE3(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void BGRAToUVRow_Any_SSSE3(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ABGRToUVRow_Any_SSSE3(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void RGBAToUVRow_Any_SSSE3(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_C(const uint8_t *src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_C(const uint8_t *src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void BGRAToUVRow_C(const uint8_t *src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void ABGRToUVRow_C(const uint8_t *src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void RGBAToUVRow_C(const uint8_t *src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void RGB565ToUVRow_C(const uint8_t *src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width);
|
||||
|
||||
void MirrorRow_AVX2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void MirrorRow_SSSE3(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void MirrorRow_C(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void MirrorRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void MirrorRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void MirrorRow_Any_SSE2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void MirrorUVRow_AVX2(const uint8_t *src_uv, uint8_t *dst_uv, int width);
|
||||
|
||||
void MirrorUVRow_SSSE3(const uint8_t *src_uv, uint8_t *dst_uv, int width);
|
||||
|
||||
void MirrorUVRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void MirrorUVRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void ARGBMirrorRow_AVX2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void ARGBMirrorRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void ARGBMirrorRow_C(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void ARGBMirrorRow_Any_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int width);
|
||||
|
||||
void ARGBMirrorRow_Any_SSE2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int width);
|
||||
|
||||
void CopyRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void CopyRow_AVX(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void CopyRow_ERMS(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void CopyRow_C(const uint8_t *src, uint8_t *dst, int count);
|
||||
|
||||
void CopyRow_Any_SSE2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void CopyRow_Any_AVX(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
|
||||
|
||||
void RGB565ToARGBRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
|
||||
|
||||
void RGB565ToARGBRow_AVX2(const uint8_t *src_rgb565,
|
||||
uint8_t *dst_argb,
|
||||
int width);
|
||||
|
||||
void RGB565ToARGBRow_C(const uint8_t *src_rgb565, uint8_t *dst_argb, int width);
|
||||
|
||||
void RGB565ToARGBRow_Any_SSE2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int width);
|
||||
|
||||
void RGB565ToARGBRow_Any_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int width);
|
||||
|
||||
// Used for I420Scale, ARGBScale, and ARGBInterpolate.
|
||||
void InterpolateRow_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int width,
|
||||
int source_y_fraction);
|
||||
|
||||
void InterpolateRow_SSSE3(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction);
|
||||
|
||||
void InterpolateRow_AVX2(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction);
|
||||
|
||||
void InterpolateRow_Any_SSSE3(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride_ptr,
|
||||
int width,
|
||||
int source_y_fraction);
|
||||
|
||||
void InterpolateRow_Any_AVX2(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride_ptr,
|
||||
int width,
|
||||
int source_y_fraction);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROW_H_
|
||||
|
|
@ -1,206 +0,0 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "row.h"
|
||||
|
||||
#include <string.h> // For memset.
|
||||
|
||||
// Subsampled source needs to be increase by 1 of not even.
|
||||
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
|
||||
|
||||
// Any 1 to 1.
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8_t vin[128]); \
|
||||
SIMD_ALIGNED(uint8_t vout[128]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(vin, vout, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_COPYROW_AVX
|
||||
|
||||
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_COPYROW_SSE2
|
||||
|
||||
ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_AVX2
|
||||
|
||||
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_AVX2
|
||||
|
||||
ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_SSSE3
|
||||
|
||||
ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
|
||||
|
||||
ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
|
||||
|
||||
#endif
|
||||
|
||||
#undef ANY11
|
||||
|
||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||
#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
|
||||
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
|
||||
int width, int source_y_fraction) { \
|
||||
SIMD_ALIGNED(TS vin[64 * 2]); \
|
||||
SIMD_ALIGNED(TD vout[64]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
|
||||
} \
|
||||
memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
|
||||
if (source_y_fraction) { \
|
||||
memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
|
||||
r * SBPP * sizeof(TS)); \
|
||||
} \
|
||||
ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
|
||||
memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||
|
||||
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_SSSE3
|
||||
|
||||
ANY11I(InterpolateRow_Any_SSSE3,
|
||||
InterpolateRow_SSSE3,
|
||||
uint8_t,
|
||||
uint8_t,
|
||||
1,
|
||||
1,
|
||||
15)
|
||||
|
||||
#endif
|
||||
|
||||
#undef ANY11I
|
||||
|
||||
// Any 1 to 1 mirror.
|
||||
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8_t vin[64]); \
|
||||
SIMD_ALIGNED(uint8_t vout[64]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(vin, src_ptr, r* BPP); \
|
||||
ANY_SIMD(vin, vout, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_MIRRORROW_AVX2
|
||||
|
||||
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_SSSE3
|
||||
|
||||
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_MIRRORUVROW_AVX2
|
||||
|
||||
ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_MIRRORUVROW_SSSE3
|
||||
|
||||
ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ARGBMIRRORROW_AVX2
|
||||
|
||||
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ARGBMIRRORROW_SSE2
|
||||
|
||||
ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
|
||||
|
||||
#endif
|
||||
#undef ANY11M
|
||||
|
||||
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
|
||||
// 128 byte row allows for 32 avx ARGB pixels.
|
||||
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
|
||||
uint8_t* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8_t vin[128 * 2]); \
|
||||
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
||||
BPP); \
|
||||
memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \
|
||||
memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
|
||||
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOUVROW_AVX2
|
||||
|
||||
ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
|
||||
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
|
||||
|
||||
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
|
||||
|
||||
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
|
||||
|
||||
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
|
||||
|
||||
#endif
|
||||
#undef ANY12S
|
||||
|
|
@ -1,887 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "row.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // For memcpy and memset.
|
||||
|
||||
#define STATIC_CAST(type, expr) (type)(expr)
|
||||
|
||||
// This macro controls YUV to RGB using unsigned math to extend range of
|
||||
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
|
||||
// LIBYUV_UNLIMITED_DATA
|
||||
|
||||
// Macros to enable unlimited data for each colorspace
|
||||
// LIBYUV_UNLIMITED_BT601
|
||||
// LIBYUV_UNLIMITED_BT709
|
||||
// LIBYUV_UNLIMITED_BT2020
|
||||
|
||||
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(_M_IX86))
|
||||
#define LIBYUV_ARGBTOUV_PAVGB 1
|
||||
#define LIBYUV_RGBTOU_TRUNCATE 1
|
||||
#endif
|
||||
#if defined(LIBYUV_BIT_EXACT)
|
||||
#define LIBYUV_UNATTENUATE_DUP 1
|
||||
#endif
|
||||
|
||||
// llvm x86 is poor at ternary operator, so use branchless min/max.
|
||||
|
||||
#define USE_BRANCHLESS 1
|
||||
#if USE_BRANCHLESS
|
||||
|
||||
static __inline int32_t clamp0(int32_t v) {
|
||||
return -(v >= 0) & v;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): make clamp255 preserve negative values.
|
||||
static __inline int32_t clamp255(int32_t v) {
|
||||
return (-(v >= 255) | v) & 255;
|
||||
}
|
||||
|
||||
static __inline int32_t clamp1023(int32_t v) {
|
||||
return (-(v >= 1023) | v) & 1023;
|
||||
}
|
||||
|
||||
// clamp to max
|
||||
static __inline int32_t ClampMax(int32_t v, int32_t max) {
|
||||
return (-(v >= max) | v) & max;
|
||||
}
|
||||
|
||||
static __inline uint32_t Abs(int32_t v) {
|
||||
int m = -(v < 0);
|
||||
return (v + m) ^ m;
|
||||
}
|
||||
|
||||
#else // USE_BRANCHLESS
|
||||
static __inline int32_t clamp0(int32_t v) {
|
||||
return (v < 0) ? 0 : v;
|
||||
}
|
||||
|
||||
static __inline int32_t clamp255(int32_t v) {
|
||||
return (v > 255) ? 255 : v;
|
||||
}
|
||||
|
||||
static __inline int32_t clamp1023(int32_t v) {
|
||||
return (v > 1023) ? 1023 : v;
|
||||
}
|
||||
|
||||
static __inline int32_t ClampMax(int32_t v, int32_t max) {
|
||||
return (v > max) ? max : v;
|
||||
}
|
||||
|
||||
static __inline uint32_t Abs(int32_t v) {
|
||||
return (v < 0) ? -v : v;
|
||||
}
|
||||
#endif // USE_BRANCHLESS
|
||||
|
||||
static __inline uint32_t Clamp(int32_t val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32_t) (clamp255(v));
|
||||
}
|
||||
|
||||
static __inline uint32_t Clamp10(int32_t val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32_t) (clamp1023(v));
|
||||
}
|
||||
|
||||
// Little Endian
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
|
||||
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
|
||||
#else
|
||||
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
|
||||
p[0] = (uint8_t)(v & 255);
|
||||
p[1] = (uint8_t)((v >> 8) & 255);
|
||||
p[2] = (uint8_t)((v >> 16) & 255);
|
||||
p[3] = (uint8_t)((v >> 24) & 255);
|
||||
}
|
||||
#endif
|
||||
|
||||
void RGB565ToARGBRow_C(const uint8_t *src_rgb565,
|
||||
uint8_t *dst_argb,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
|
||||
uint8_t g = STATIC_CAST(
|
||||
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
|
||||
dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
|
||||
dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
|
||||
dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
|
||||
dst_argb[3] = 255u;
|
||||
dst_argb += 4;
|
||||
src_rgb565 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// 8 bit
|
||||
// Intel SSE/AVX uses the following equivalent formula
|
||||
// 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
|
||||
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
|
||||
// 0x7e80) >> 8;
|
||||
|
||||
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
|
||||
}
|
||||
|
||||
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
|
||||
#ifdef LIBYUV_RGBTOU_TRUNCATE
|
||||
|
||||
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
|
||||
}
|
||||
|
||||
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
|
||||
}
|
||||
|
||||
#else
|
||||
// TODO(fbarchard): Add rounding to x86 SIMD and use this
|
||||
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
|
||||
}
|
||||
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
|
||||
}
|
||||
#endif
|
||||
|
||||
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
|
||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(
|
||||
uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
|
||||
}
|
||||
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(
|
||||
uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ARGBToY_C and ARGBToUV_C
|
||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
for (x = 0; x < width; ++x) { \
|
||||
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||
src_rgb += BPP; \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
|
||||
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
|
||||
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
|
||||
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
|
||||
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
|
||||
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
|
||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
|
||||
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
|
||||
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
|
||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
for (x = 0; x < width; ++x) { \
|
||||
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||
src_rgb += BPP; \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP] + 1) >> \
|
||||
1; \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint16_t ab = src_rgb[B] + src_rgb1[B]; \
|
||||
uint16_t ag = src_rgb[G] + src_rgb1[G]; \
|
||||
uint16_t ar = src_rgb[R] + src_rgb1[R]; \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
MAKEROWY(ARGB, 2, 1, 0, 4)
|
||||
|
||||
MAKEROWY(BGRA, 1, 2, 3, 4)
|
||||
|
||||
MAKEROWY(ABGR, 0, 1, 2, 4)
|
||||
|
||||
MAKEROWY(RGBA, 3, 2, 1, 4)
|
||||
|
||||
#undef MAKEROWY
|
||||
|
||||
// JPeg uses a variation on BT.601-1 full range
|
||||
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
|
||||
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
|
||||
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
|
||||
// BT.601 Mpeg range uses:
|
||||
// b 0.1016 * 255 = 25.908 = 25
|
||||
// g 0.5078 * 255 = 129.489 = 129
|
||||
// r 0.2578 * 255 = 65.739 = 66
|
||||
// JPeg 7 bit Y (deprecated)
|
||||
// b 0.11400 * 128 = 14.592 = 15
|
||||
// g 0.58700 * 128 = 75.136 = 75
|
||||
// r 0.29900 * 128 = 38.272 = 38
|
||||
// JPeg 8 bit Y:
|
||||
// b 0.11400 * 256 = 29.184 = 29
|
||||
// g 0.58700 * 256 = 150.272 = 150
|
||||
// r 0.29900 * 256 = 76.544 = 77
|
||||
// JPeg 8 bit U:
|
||||
// b 0.50000 * 255 = 127.5 = 127
|
||||
// g -0.33126 * 255 = -84.4713 = -84
|
||||
// r -0.16874 * 255 = -43.0287 = -43
|
||||
// JPeg 8 bit V:
|
||||
// b -0.08131 * 255 = -20.73405 = -20
|
||||
// g -0.41869 * 255 = -106.76595 = -107
|
||||
// r 0.50000 * 255 = 127.5 = 127
|
||||
|
||||
// 8 bit
|
||||
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (77 * r + 150 * g + 29 * b + 128) >> 8;
|
||||
}
|
||||
|
||||
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||
|
||||
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
||||
}
|
||||
|
||||
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||
}
|
||||
|
||||
#else
|
||||
static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
|
||||
}
|
||||
static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ARGBToYJ_C and ARGBToUVJ_C
|
||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
for (x = 0; x < width; ++x) { \
|
||||
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||
src_rgb += BPP; \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
|
||||
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
|
||||
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
|
||||
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
|
||||
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
|
||||
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
|
||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
|
||||
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
|
||||
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
|
||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
for (x = 0; x < width; ++x) { \
|
||||
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||
src_rgb += BPP; \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP] + 1) >> \
|
||||
1; \
|
||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
|
||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
MAKEROWYJ(ARGB, 2, 1, 0, 4)
|
||||
|
||||
MAKEROWYJ(ABGR, 0, 1, 2, 4)
|
||||
|
||||
MAKEROWYJ(RGBA, 3, 2, 1, 4)
|
||||
|
||||
MAKEROWYJ(RGB24, 2, 1, 0, 3)
|
||||
|
||||
MAKEROWYJ(RAW, 0, 1, 2, 3)
|
||||
|
||||
#undef MAKEROWYJ
|
||||
|
||||
void RGB565ToYRow_C(const uint8_t *src_rgb565, uint8_t *dst_y, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
uint8_t b = src_rgb565[0] & 0x1f;
|
||||
uint8_t g = STATIC_CAST(
|
||||
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r = src_rgb565[1] >> 3;
|
||||
b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
|
||||
g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
|
||||
r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
|
||||
dst_y[0] = RGBToY(r, g, b);
|
||||
src_rgb565 += 2;
|
||||
dst_y += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void RGB565ToUVRow_C(const uint8_t *src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t *dst_u,
|
||||
uint8_t *dst_v,
|
||||
int width) {
|
||||
const uint8_t *next_rgb565 = src_rgb565 + src_stride_rgb565;
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
|
||||
uint8_t g0 = STATIC_CAST(
|
||||
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
|
||||
uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
|
||||
uint8_t g1 = STATIC_CAST(
|
||||
uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
|
||||
uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
|
||||
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
|
||||
uint8_t g2 = STATIC_CAST(
|
||||
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
|
||||
uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
|
||||
uint8_t g3 = STATIC_CAST(
|
||||
uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
|
||||
uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
|
||||
|
||||
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
|
||||
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
|
||||
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
|
||||
b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
|
||||
g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
|
||||
r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
|
||||
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
|
||||
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
|
||||
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
||||
b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
|
||||
g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
|
||||
r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
|
||||
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_rgb565 += 4;
|
||||
next_rgb565 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
|
||||
uint8_t g0 = STATIC_CAST(
|
||||
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
|
||||
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
|
||||
uint8_t g2 = STATIC_CAST(
|
||||
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
|
||||
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
|
||||
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
|
||||
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
|
||||
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
|
||||
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
|
||||
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
|
||||
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
||||
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
uint8_t ab = AVGB(b0, b2);
|
||||
uint8_t ag = AVGB(g0, g2);
|
||||
uint8_t ar = AVGB(r0, r2);
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = b0 + b2;
|
||||
uint16_t g = g0 + g2;
|
||||
uint16_t r = r0 + r2;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#define REPEAT8(v) (v) | ((v) << 8)
|
||||
#define SHADE(f, v) v* f >> 24
|
||||
|
||||
#undef REPEAT8
|
||||
#undef SHADE
|
||||
|
||||
#define REPEAT8(v) (v) | ((v) << 8)
|
||||
#define SHADE(f, v) v* f >> 16
|
||||
|
||||
#undef REPEAT8
|
||||
#undef SHADE
|
||||
|
||||
#define SHADE(f, v) clamp255(v + f)
|
||||
|
||||
#undef SHADE
|
||||
|
||||
#define SHADE(f, v) clamp0(f - v)
|
||||
|
||||
#undef SHADE
|
||||
|
||||
// Macros to create SIMD specific yuv to rgb conversion constants.
|
||||
|
||||
// clang-format off
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
// Bias values include subtract 128 from U and V, bias from Y and rounding.
|
||||
// For B and R bias is negative. For G bias is positive.
|
||||
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
|
||||
{{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
|
||||
{YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
|
||||
0, 0}}
|
||||
#else
|
||||
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
|
||||
{{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
|
||||
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
|
||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
|
||||
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
|
||||
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
|
||||
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
|
||||
#endif
|
||||
|
||||
// clang-format on
|
||||
|
||||
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
|
||||
YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
|
||||
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
|
||||
|
||||
// TODO(fbarchard): Generate SIMD structures from float matrix.
|
||||
|
||||
// BT.601 limited range YUV to RGB reference
|
||||
// R = (Y - 16) * 1.164 + V * 1.596
|
||||
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
|
||||
// B = (Y - 16) * 1.164 + U * 2.018
|
||||
// KR = 0.299; KB = 0.114
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
|
||||
#define UB 129 /* round(2.018 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.018 * 64)) */
|
||||
#endif
|
||||
#define UG 25 /* round(0.391 * 64) */
|
||||
#define VG 52 /* round(0.813 * 64) */
|
||||
#define VR 102 /* round(1.596 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
|
||||
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
// BT.601 full range YUV to RGB reference (aka JPEG)
|
||||
// * R = Y + V * 1.40200
|
||||
// * G = Y - U * 0.34414 - V * 0.71414
|
||||
// * B = Y + U * 1.77200
|
||||
// KR = 0.299; KB = 0.114
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#define UB 113 /* round(1.77200 * 64) */
|
||||
#define UG 22 /* round(0.34414 * 64) */
|
||||
#define VG 46 /* round(0.71414 * 64) */
|
||||
#define VR 90 /* round(1.40200 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
|
||||
#define YB 32 /* 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
// BT.709 limited range YUV to RGB reference
|
||||
// R = (Y - 16) * 1.164 + V * 1.793
|
||||
// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
|
||||
// B = (Y - 16) * 1.164 + U * 2.112
|
||||
// KR = 0.2126, KB = 0.0722
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
|
||||
#define UB 135 /* round(2.112 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.112 * 64)) */
|
||||
#endif
|
||||
#define UG 14 /* round(0.213 * 64) */
|
||||
#define VG 34 /* round(0.533 * 64) */
|
||||
#define VR 115 /* round(1.793 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
|
||||
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
// BT.709 full range YUV to RGB reference
|
||||
// R = Y + V * 1.5748
|
||||
// G = Y - U * 0.18732 - V * 0.46812
|
||||
// B = Y + U * 1.8556
|
||||
// KR = 0.2126, KB = 0.0722
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#define UB 119 /* round(1.8556 * 64) */
|
||||
#define UG 12 /* round(0.18732 * 64) */
|
||||
#define VG 30 /* round(0.46812 * 64) */
|
||||
#define VR 101 /* round(1.5748 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
|
||||
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
|
||||
#define YB 32 /* 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
// BT.2020 limited range YUV to RGB reference
|
||||
// R = (Y - 16) * 1.164384 + V * 1.67867
|
||||
// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
|
||||
// B = (Y - 16) * 1.164384 + U * 2.14177
|
||||
// KR = 0.2627; KB = 0.0593
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
|
||||
#define UB 137 /* round(2.142 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.142 * 64)) */
|
||||
#endif
|
||||
#define UG 12 /* round(0.187326 * 64) */
|
||||
#define VG 42 /* round(0.65042 * 64) */
|
||||
#define VR 107 /* round(1.67867 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
|
||||
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
// BT.2020 full range YUV to RGB reference
|
||||
// R = Y + V * 1.474600
|
||||
// G = Y - U * 0.164553 - V * 0.571353
|
||||
// B = Y + U * 1.881400
|
||||
// KR = 0.2627; KB = 0.0593
|
||||
|
||||
#define UB 120 /* round(1.881400 * 64) */
|
||||
#define UG 11 /* round(0.164553 * 64) */
|
||||
#define VG 37 /* round(0.571353 * 64) */
|
||||
#define VR 94 /* round(1.474600 * 64) */
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
|
||||
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
|
||||
#define YB 32 /* 64 / 2 */
|
||||
|
||||
MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
|
||||
|
||||
#undef YG
|
||||
#undef YB
|
||||
#undef UB
|
||||
#undef UG
|
||||
#undef VG
|
||||
#undef VR
|
||||
|
||||
#undef BB
|
||||
#undef BG
|
||||
#undef BR
|
||||
|
||||
#undef MAKEYUVCONSTANTS
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
#define LOAD_YUV_CONSTANTS \
|
||||
int ub = yuvconstants->kUVCoeff[0]; \
|
||||
int vr = yuvconstants->kUVCoeff[1]; \
|
||||
int ug = yuvconstants->kUVCoeff[2]; \
|
||||
int vg = yuvconstants->kUVCoeff[3]; \
|
||||
int yg = yuvconstants->kRGBCoeffBias[0]; \
|
||||
int bb = yuvconstants->kRGBCoeffBias[1]; \
|
||||
int bg = yuvconstants->kRGBCoeffBias[2]; \
|
||||
int br = yuvconstants->kRGBCoeffBias[3]
|
||||
|
||||
#define CALC_RGB16 \
|
||||
int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
|
||||
int b16 = y1 + (u * ub) - bb; \
|
||||
int g16 = y1 + bg - (u * ug + v * vg); \
|
||||
int r16 = y1 + (v * vr) - br
|
||||
#else
|
||||
#define LOAD_YUV_CONSTANTS \
|
||||
int ub = yuvconstants->kUVToB[0]; \
|
||||
int ug = yuvconstants->kUVToG[0]; \
|
||||
int vg = yuvconstants->kUVToG[1]; \
|
||||
int vr = yuvconstants->kUVToR[1]; \
|
||||
int yg = yuvconstants->kYToRgb[0]; \
|
||||
int yb = yuvconstants->kYBiasToRgb[0]
|
||||
|
||||
#define CALC_RGB16 \
|
||||
int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
|
||||
int8_t ui = (int8_t)u; \
|
||||
int8_t vi = (int8_t)v; \
|
||||
ui -= 0x80; \
|
||||
vi -= 0x80; \
|
||||
int b16 = y1 + (ui * ub); \
|
||||
int g16 = y1 - (ui * ug + vi * vg); \
|
||||
int r16 = y1 + (vi * vr)
|
||||
#endif
|
||||
|
||||
void MirrorRow_C(const uint8_t *src, uint8_t *dst, int width) {
|
||||
int x;
|
||||
src += width - 1;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
dst[x] = src[0];
|
||||
dst[x + 1] = src[-1];
|
||||
src -= 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst[width - 1] = src[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 32768 = 9 bits
|
||||
// 16384 = 10 bits
|
||||
// 4096 = 12 bits
|
||||
// 256 = 16 bits
|
||||
// TODO(fbarchard): change scale to bits
|
||||
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
|
||||
|
||||
void CopyRow_C(const uint8_t *src, uint8_t *dst, int count) {
|
||||
memcpy(dst, src, count);
|
||||
}
|
||||
|
||||
// Divide source RGB by alpha and store to destination.
|
||||
// b = (b * 255 + (a / 2)) / a;
|
||||
// g = (g * 255 + (a / 2)) / a;
|
||||
// r = (r * 255 + (a / 2)) / a;
|
||||
// Reciprocal method is off by 1 on some values. ie 125
|
||||
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
|
||||
#define T(a) 0x01000000 + (0x10000 / a)
|
||||
const uint32_t fixed_invtbl8[256] = {
|
||||
0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
|
||||
T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
|
||||
T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
|
||||
T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
|
||||
T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
|
||||
T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
|
||||
T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
|
||||
T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
|
||||
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
|
||||
T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
|
||||
T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
|
||||
T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
|
||||
T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
|
||||
T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
|
||||
T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
|
||||
T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
|
||||
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
|
||||
T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
|
||||
T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
|
||||
T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
|
||||
T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
|
||||
T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
|
||||
T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
|
||||
T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
|
||||
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
|
||||
T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
|
||||
T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
|
||||
T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
|
||||
T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
|
||||
T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
|
||||
T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
|
||||
T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
|
||||
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
|
||||
T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
|
||||
T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
|
||||
T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
|
||||
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
|
||||
#undef T
|
||||
|
||||
// Blend 2 rows into 1.
|
||||
static void HalfRow_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_uv_stride,
|
||||
uint8_t *dst_uv,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
// C version 2x2 -> 2x1.
|
||||
void InterpolateRow_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int width,
|
||||
int source_y_fraction) {
|
||||
int y1_fraction = source_y_fraction;
|
||||
int y0_fraction = 256 - y1_fraction;
|
||||
const uint8_t *src_ptr1 = src_ptr + src_stride;
|
||||
int x;
|
||||
assert(source_y_fraction >= 0);
|
||||
assert(source_y_fraction < 256);
|
||||
|
||||
if (y1_fraction == 0) {
|
||||
memcpy(dst_ptr, src_ptr, width);
|
||||
return;
|
||||
}
|
||||
if (y1_fraction == 128) {
|
||||
HalfRow_C(src_ptr, src_stride, dst_ptr, width);
|
||||
return;
|
||||
}
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_ptr[0] = STATIC_CAST(
|
||||
uint8_t,
|
||||
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
|
||||
++src_ptr;
|
||||
++src_ptr1;
|
||||
++dst_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Work around GCC 7 punning warning -Wstrict-aliasing
|
||||
#if defined(__GNUC__)
|
||||
typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
|
||||
#else
|
||||
typedef uint32_t uint32_alias_t;
|
||||
#endif
|
||||
|
||||
#undef STATIC_CAST
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,946 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "scale.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cpu_id.h"
|
||||
#include "planar_functions.h" // For CopyPlane
|
||||
#include "row.h"
|
||||
#include "scale_row.h"
|
||||
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
|
||||
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
|
||||
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
|
||||
|
||||
// Scale plane, 1/2
|
||||
// This is an optimized version for scaling down a plane to 1/2 of
|
||||
// its original size.
|
||||
|
||||
static void ScalePlaneDown2(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
int y;
|
||||
void (*ScaleRowDown2)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width) =
|
||||
filtering == kFilterNone
|
||||
? ScaleRowDown2_C
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_C
|
||||
: ScaleRowDown2Box_C);
|
||||
int row_stride = src_stride * 2;
|
||||
(void) src_width;
|
||||
(void) src_height;
|
||||
if (!filtering) {
|
||||
src_ptr += src_stride; // Point to odd rows.
|
||||
src_stride = 0;
|
||||
}
|
||||
|
||||
|
||||
#if defined(HAS_SCALEROWDOWN2_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleRowDown2_Any_SSSE3
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
|
||||
: ScaleRowDown2Box_Any_SSSE3);
|
||||
if (IS_ALIGNED(dst_width, 16)) {
|
||||
ScaleRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleRowDown2_SSSE3
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
|
||||
: ScaleRowDown2Box_SSSE3);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN2_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ScaleRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleRowDown2_Any_AVX2
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
|
||||
: ScaleRowDown2Box_Any_AVX2);
|
||||
if (IS_ALIGNED(dst_width, 32)) {
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
|
||||
: (filtering == kFilterLinear
|
||||
? ScaleRowDown2Linear_AVX2
|
||||
: ScaleRowDown2Box_AVX2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||
for (y = 0; y < dst_height; ++y) {
|
||||
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
src_ptr += row_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane, 1/4
|
||||
// This is an optimized version for scaling down a plane to 1/4 of
|
||||
// its original size.
|
||||
|
||||
static void ScalePlaneDown4(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
int y;
|
||||
void (*ScaleRowDown4)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width) =
|
||||
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
|
||||
int row_stride = src_stride * 4;
|
||||
(void) src_width;
|
||||
(void) src_height;
|
||||
if (!filtering) {
|
||||
src_ptr += src_stride * 2; // Point to row 2.
|
||||
src_stride = 0;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEROWDOWN4_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleRowDown4 =
|
||||
filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN4_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ScaleRowDown4 =
|
||||
filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
|
||||
if (IS_ALIGNED(dst_width, 16)) {
|
||||
ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (y = 0; y < dst_height; ++y) {
|
||||
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
src_ptr += row_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane down, 3/4
|
||||
static void ScalePlaneDown34(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
int y;
|
||||
void (*ScaleRowDown34_0)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width);
|
||||
void (*ScaleRowDown34_1)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width);
|
||||
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
|
||||
(void) src_width;
|
||||
(void) src_height;
|
||||
assert(dst_width % 3 == 0);
|
||||
if (!filtering) {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_C;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_C;
|
||||
} else {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEROWDOWN34_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
|
||||
} else {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
|
||||
}
|
||||
if (dst_width % 24 == 0) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
|
||||
} else {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < dst_height - 2; y += 3) {
|
||||
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 2;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// Remainder 1 or 2 rows with last row vertically unfiltered
|
||||
if ((dst_height % 3) == 2) {
|
||||
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
|
||||
} else if ((dst_height % 3) == 1) {
|
||||
ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane, 3/8
|
||||
// This is an optimized version for scaling down a plane to 3/8
|
||||
// of its original size.
|
||||
//
|
||||
// Uses box filter arranges like this
|
||||
// aaabbbcc -> abc
|
||||
// aaabbbcc def
|
||||
// aaabbbcc ghi
|
||||
// dddeeeff
|
||||
// dddeeeff
|
||||
// dddeeeff
|
||||
// ggghhhii
|
||||
// ggghhhii
|
||||
// Boxes are 3x3, 2x3, 3x2 and 2x2
|
||||
|
||||
static void ScalePlaneDown38(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
int y;
|
||||
void (*ScaleRowDown38_3)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width);
|
||||
void (*ScaleRowDown38_2)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, int dst_width);
|
||||
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
|
||||
assert(dst_width % 3 == 0);
|
||||
(void) src_width;
|
||||
(void) src_height;
|
||||
if (!filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_C;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_C;
|
||||
} else {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEROWDOWN38_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
|
||||
} else {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
|
||||
}
|
||||
if (dst_width % 12 == 0 && !filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
|
||||
}
|
||||
if (dst_width % 6 == 0 && filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < dst_height - 2; y += 3) {
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 2;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// Remainder 1 or 2 rows with last row vertically unfiltered
|
||||
if ((dst_height % 3) == 2) {
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
|
||||
} else if ((dst_height % 3) == 1) {
|
||||
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
|
||||
}
|
||||
}
|
||||
|
||||
#define MIN1(x) ((x) < 1 ? 1 : (x))
|
||||
|
||||
static __inline uint32_t SumPixels(int iboxwidth, const uint16_t *src_ptr) {
|
||||
uint32_t sum = 0u;
|
||||
int x;
|
||||
assert(iboxwidth > 0);
|
||||
for (x = 0; x < iboxwidth; ++x) {
|
||||
sum += src_ptr[x];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t *src_ptr) {
|
||||
uint32_t sum = 0u;
|
||||
int x;
|
||||
assert(iboxwidth > 0);
|
||||
for (x = 0; x < iboxwidth; ++x) {
|
||||
sum += src_ptr[x];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void ScaleAddCols2_C(int dst_width,
|
||||
int boxheight,
|
||||
int x,
|
||||
int dx,
|
||||
const uint16_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
int i;
|
||||
int scaletbl[2];
|
||||
int minboxwidth = dx >> 16;
|
||||
int boxwidth;
|
||||
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
|
||||
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
|
||||
for (i = 0; i < dst_width; ++i) {
|
||||
int ix = x >> 16;
|
||||
x += dx;
|
||||
boxwidth = MIN1((x >> 16) - ix);
|
||||
int scaletbl_index = boxwidth - minboxwidth;
|
||||
assert((scaletbl_index == 0) || (scaletbl_index == 1));
|
||||
*dst_ptr++ = (uint8_t) (SumPixels(boxwidth, src_ptr + ix) *
|
||||
scaletbl[scaletbl_index] >>
|
||||
16);
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleAddCols0_C(int dst_width,
|
||||
int boxheight,
|
||||
int x,
|
||||
int dx,
|
||||
const uint16_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
int scaleval = 65536 / boxheight;
|
||||
int i;
|
||||
(void) dx;
|
||||
src_ptr += (x >> 16);
|
||||
for (i = 0; i < dst_width; ++i) {
|
||||
*dst_ptr++ = (uint8_t) (src_ptr[i] * scaleval >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleAddCols1_C(int dst_width,
|
||||
int boxheight,
|
||||
int x,
|
||||
int dx,
|
||||
const uint16_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
int boxwidth = MIN1(dx >> 16);
|
||||
int scaleval = 65536 / (boxwidth * boxheight);
|
||||
int i;
|
||||
x >>= 16;
|
||||
for (i = 0; i < dst_width; ++i) {
|
||||
*dst_ptr++ = (uint8_t) (SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
|
||||
x += boxwidth;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane down to any dimensions, with interpolation.
|
||||
// (boxfilter).
|
||||
//
|
||||
// Same method as SimpleScale, which is fixed point, outputting
|
||||
// one pixel of destination using fixed point (16.16) to step
|
||||
// through source, sampling a box of pixel with simple
|
||||
// averaging.
|
||||
static void ScalePlaneBox(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
int j, k;
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int dx = 0;
|
||||
int dy = 0;
|
||||
const int max_y = (src_height << 16);
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
{
|
||||
// Allocate a row buffer of uint16_t.
|
||||
align_buffer_64(row16, src_width * 2);
|
||||
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
|
||||
const uint16_t *src_ptr, uint8_t *dst_ptr) =
|
||||
(dx & 0xffff) ? ScaleAddCols2_C
|
||||
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
|
||||
void (*ScaleAddRow)(const uint8_t *src_ptr, uint16_t *dst_ptr,
|
||||
int src_width) = ScaleAddRow_C;
|
||||
#if defined(HAS_SCALEADDROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleAddRow = ScaleAddRow_Any_SSE2;
|
||||
if (IS_ALIGNED(src_width, 16)) {
|
||||
ScaleAddRow = ScaleAddRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEADDROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ScaleAddRow = ScaleAddRow_Any_AVX2;
|
||||
if (IS_ALIGNED(src_width, 32)) {
|
||||
ScaleAddRow = ScaleAddRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int boxheight;
|
||||
int iy = y >> 16;
|
||||
const uint8_t *src = src_ptr + iy * (int64_t) src_stride;
|
||||
y += dy;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
boxheight = MIN1((y >> 16) - iy);
|
||||
memset(row16, 0, src_width * 2);
|
||||
for (k = 0; k < boxheight; ++k) {
|
||||
ScaleAddRow(src, (uint16_t *) (row16), src_width);
|
||||
src += src_stride;
|
||||
}
|
||||
ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t *) (row16), dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
free_aligned_buffer_64(row16);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane down with bilinear interpolation.
|
||||
static void ScalePlaneBilinearDown(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int dx = 0;
|
||||
int dy = 0;
|
||||
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
|
||||
// Allocate a row buffer.
|
||||
align_buffer_64(row, src_width);
|
||||
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
int j;
|
||||
void (*ScaleFilterCols)(uint8_t *dst_ptr, const uint8_t *src_ptr,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
|
||||
void (*InterpolateRow)(uint8_t *dst_ptr, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(src_width, 32)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
ScaleFilterCols = ScaleFilterCols_SSSE3;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi = y >> 16;
|
||||
const uint8_t *src = src_ptr + yi * (int64_t) src_stride;
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(row, src, src_stride, src_width, yf);
|
||||
ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
|
||||
}
|
||||
dst_ptr += dst_stride;
|
||||
y += dy;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
|
||||
// Scale up down with bilinear interpolation.
|
||||
static void ScalePlaneBilinearUp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int dx = 0;
|
||||
int dy = 0;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
void (*InterpolateRow)(uint8_t *dst_ptr, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleFilterCols)(uint8_t *dst_ptr, const uint8_t *src_ptr,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleFilterCols_C : ScaleCols_C;
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(dst_width, 32)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering && src_width >= 32768) {
|
||||
ScaleFilterCols = ScaleFilterCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
ScaleFilterCols = ScaleFilterCols_SSSE3;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleFilterCols = ScaleColsUp2_C;
|
||||
#if defined(HAS_SCALECOLS_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleFilterCols = ScaleColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
{
|
||||
int yi = y >> 16;
|
||||
const uint8_t *src = src_ptr + yi * (int64_t) src_stride;
|
||||
|
||||
// Allocate 2 row buffers.
|
||||
const int row_size = (dst_width + 31) & ~31;
|
||||
align_buffer_64(row, row_size * 2);
|
||||
|
||||
uint8_t *rowptr = row;
|
||||
int rowstride = row_size;
|
||||
int lasty = yi;
|
||||
|
||||
ScaleFilterCols(rowptr, src, dst_width, x, dx);
|
||||
if (src_height > 1) {
|
||||
src += src_stride;
|
||||
}
|
||||
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
|
||||
if (src_height > 2) {
|
||||
src += src_stride;
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
yi = y >> 16;
|
||||
if (yi != lasty) {
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
src = src_ptr + yi * (int64_t) src_stride;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
ScaleFilterCols(rowptr, src, dst_width, x, dx);
|
||||
rowptr += rowstride;
|
||||
rowstride = -rowstride;
|
||||
lasty = yi;
|
||||
if ((y + 65536) < max_y) {
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterLinear) {
|
||||
InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
|
||||
}
|
||||
dst_ptr += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane, horizontally up by 2 times.
|
||||
// Uses linear filter horizontally, nearest vertically.
|
||||
// This is an optimized version for scaling up a plane to 2 times of
|
||||
// its original width, using linear interpolation.
|
||||
// This is used to scale U and V planes of I422 to I444.
|
||||
static void ScalePlaneUp2_Linear(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
void (*ScaleRowUp)(const uint8_t *src_ptr, uint8_t *dst_ptr, int dst_width) =
|
||||
ScaleRowUp2_Linear_Any_C;
|
||||
int i;
|
||||
int y;
|
||||
int dy;
|
||||
|
||||
(void) src_width;
|
||||
// This function can only scale up by 2 times horizontally.
|
||||
assert(src_width == ((dst_width + 1) / 2));
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (dst_height == 1) {
|
||||
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t) src_stride, dst_ptr,
|
||||
dst_width);
|
||||
} else {
|
||||
dy = FixedDiv(src_height - 1, dst_height - 1);
|
||||
y = (1 << 15) - 1;
|
||||
for (i = 0; i < dst_height; ++i) {
|
||||
ScaleRowUp(src_ptr + (y >> 16) * (int64_t) src_stride, dst_ptr, dst_width);
|
||||
dst_ptr += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane, up by 2 times.
|
||||
// This is an optimized version for scaling up a plane to 2 times of
|
||||
// its original size, using bilinear interpolation.
|
||||
// This is used to scale U and V planes of I420 to I444.
|
||||
static void ScalePlaneUp2_Bilinear(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
void (*Scale2RowUp)(const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr, ptrdiff_t dst_stride, int dst_width) =
|
||||
ScaleRowUp2_Bilinear_Any_C;
|
||||
int x;
|
||||
|
||||
(void) src_width;
|
||||
// This function can only scale up by 2 times.
|
||||
assert(src_width == ((dst_width + 1) / 2));
|
||||
assert(src_height == ((dst_height + 1) / 2));
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
|
||||
dst_ptr += dst_stride;
|
||||
for (x = 0; x < src_height - 1; ++x) {
|
||||
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
||||
src_ptr += src_stride;
|
||||
// TODO(fbarchard): Test performance of writing one row of destination at a
|
||||
// time.
|
||||
dst_ptr += 2 * dst_stride;
|
||||
}
|
||||
if (!(dst_height & 1)) {
|
||||
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale Plane to/from any dimensions, without interpolation.
|
||||
// Fixed point math is used for performance: The upper 16 bits
|
||||
// of x and dx is the integer part of the source position and
|
||||
// the lower 16 bits are the fixed decimal part.
|
||||
|
||||
static void ScalePlaneSimple(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr) {
|
||||
int i;
|
||||
void (*ScaleCols)(uint8_t *dst_ptr, const uint8_t *src_ptr, int dst_width,
|
||||
int x, int dx) = ScaleCols_C;
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int dx = 0;
|
||||
int dy = 0;
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
|
||||
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleCols = ScaleColsUp2_C;
|
||||
#if defined(HAS_SCALECOLS_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleCols = ScaleColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < dst_height; ++i) {
|
||||
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t) src_stride, dst_width, x,
|
||||
dx);
|
||||
dst_ptr += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale a plane.
|
||||
// This function dispatches to a specialized scaler based on scale factor.
|
||||
LIBYUV_API
|
||||
void ScalePlane(const uint8_t *src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t *dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
// Simplify filtering when possible.
|
||||
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
|
||||
filtering);
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
src = src + (src_height - 1) * (int64_t) src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
// Use specialized scales to improve performance for common resolutions.
|
||||
// For example, all the 1/2 scalings will use ScalePlaneDown2()
|
||||
if (dst_width == src_width && dst_height == src_height) {
|
||||
// Straight copy.
|
||||
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
|
||||
return;
|
||||
}
|
||||
if (dst_width == src_width && filtering != kFilterBox) {
|
||||
int dy = 0;
|
||||
int y = 0;
|
||||
// When scaling down, use the center 2 rows to filter.
|
||||
// When scaling up, last row of destination uses the last 2 source rows.
|
||||
if (dst_height <= src_height) {
|
||||
dy = FixedDiv(src_height, dst_height);
|
||||
y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (src_height > 1 && dst_height > 1) {
|
||||
dy = FixedDiv1(src_height, dst_height);
|
||||
}
|
||||
// Arbitrary scale vertically, but unscaled horizontally.
|
||||
ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
|
||||
return;
|
||||
}
|
||||
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
|
||||
// Scale down.
|
||||
if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
|
||||
// optimized, 3/4
|
||||
ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
|
||||
// optimized, 1/2
|
||||
ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
// 3/8 rounded up for odd sized chroma height.
|
||||
if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
|
||||
// optimized, 3/8
|
||||
ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
|
||||
(filtering == kFilterBox || filtering == kFilterNone)) {
|
||||
// optimized, 1/4
|
||||
ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterBox && dst_height * 2 < src_height) {
|
||||
ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst);
|
||||
return;
|
||||
}
|
||||
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
|
||||
ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst);
|
||||
return;
|
||||
}
|
||||
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
|
||||
(filtering == kFilterBilinear || filtering == kFilterBox)) {
|
||||
ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst);
|
||||
return;
|
||||
}
|
||||
if (filtering && dst_height > src_height) {
|
||||
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering) {
|
||||
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst, filtering);
|
||||
return;
|
||||
}
|
||||
ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
|
||||
dst_stride, src, dst);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
|
||||
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
|
||||
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
|
||||
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
|
||||
|
||||
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
|
||||
dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
|
||||
dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
|
||||
ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
|
||||
dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
// Supported filtering.
|
||||
typedef enum FilterMode {
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
} FilterModeEnum;
|
||||
|
||||
// Scales a YUV 4:2:0 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// If filtering is kFilterBox, averaging is used to produce ever better
|
||||
// quality image, at further expense of speed.
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale(const uint8_t *src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t *src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t *src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t *dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t *dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t *dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_H_
|
||||
|
|
@ -1,632 +0,0 @@
|
|||
/*
|
||||
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "scale_row.h"
|
||||
|
||||
// Fixed scale down.
|
||||
// Mask may be non-power of 2, so use MOD
|
||||
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
// Fixed scale down for odd source width. Used by I420Blend subsampling.
|
||||
// Since dst_width is (width + 1) / 2, this function scales one less pixel
|
||||
// and copies the last pixel.
|
||||
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
|
||||
int n = (dst_width - 1) - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r + 1); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_SSSE3
|
||||
|
||||
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
|
||||
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3,
|
||||
ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||
|
||||
SDANY(ScaleUVRowDown2Box_Any_SSSE3,
|
||||
ScaleUVRowDown2Box_SSSE3,
|
||||
ScaleUVRowDown2Box_C,
|
||||
2,
|
||||
2,
|
||||
3)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||
|
||||
SDANY(ScaleUVRowDown2Box_Any_AVX2,
|
||||
ScaleUVRowDown2Box_AVX2,
|
||||
ScaleUVRowDown2Box_C,
|
||||
2,
|
||||
2,
|
||||
7)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
|
||||
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
|
||||
|
||||
SDANY(ScaleRowDown2Linear_Any_AVX2,
|
||||
ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_SSSE3
|
||||
|
||||
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
|
||||
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3,
|
||||
ScaleRowDown4Box_SSSE3,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
|
||||
SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
|
||||
|
||||
SDANY(ScaleRowDown4Box_Any_AVX2,
|
||||
ScaleRowDown4Box_AVX2,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_SSSE3
|
||||
|
||||
SDANY(ScaleRowDown34_Any_SSSE3,
|
||||
ScaleRowDown34_SSSE3,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
|
||||
SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
|
||||
ScaleRowDown34_0_Box_SSSE3,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
|
||||
SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
|
||||
ScaleRowDown34_1_Box_SSSE3,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN38_SSSE3
|
||||
|
||||
SDANY(ScaleRowDown38_Any_SSSE3,
|
||||
ScaleRowDown38_SSSE3,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
|
||||
SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
|
||||
ScaleRowDown38_3_Box_SSSE3,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
|
||||
SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
|
||||
ScaleRowDown38_2_Box_SSSE3,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#undef SDANY
|
||||
|
||||
// Scale down by even scale factor.
|
||||
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
|
||||
uint8_t* dst_ptr, int dst_width) { \
|
||||
int r = dst_width & MASK; \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef SASIMDONLY
|
||||
// This also works and uses memcpy and SIMD instead of C, but is slower on ARM
|
||||
|
||||
// Add rows box filter scale down. Using macro from row_any
|
||||
#define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint16_t dst_temp[32]); \
|
||||
SIMD_ALIGNED(uint8_t src_temp[32]); \
|
||||
memset(dst_temp, 0, 32 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
|
||||
ANY_SIMD(src_temp, dst_temp, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEADDROW_SSE2
|
||||
SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_AVX2
|
||||
SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
|
||||
#endif
|
||||
#undef SAANY
|
||||
|
||||
#else
|
||||
|
||||
// Add rows box filter scale down.
|
||||
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
|
||||
int n = src_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEADDROW_SSE2
|
||||
|
||||
SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
|
||||
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_AVX2
|
||||
|
||||
SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
|
||||
|
||||
#endif
|
||||
#undef SAANY
|
||||
|
||||
#endif // SASIMDONLY
|
||||
|
||||
// Scale up horizontally 2 times using linear filter.
|
||||
#define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
|
||||
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
|
||||
int work_width = (dst_width - 1) & ~1; \
|
||||
int r = work_width & MASK; \
|
||||
int n = work_width & ~MASK; \
|
||||
dst_ptr[0] = src_ptr[0]; \
|
||||
if (work_width > 0) { \
|
||||
if (n != 0) { \
|
||||
SIMD(src_ptr, dst_ptr + 1, n); \
|
||||
} \
|
||||
C(src_ptr + (n / 2), dst_ptr + n + 1, r); \
|
||||
} \
|
||||
dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \
|
||||
}
|
||||
|
||||
// Even the C versions need to be wrapped, because boundary pixels have to
|
||||
// be handled differently
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_Any_C,
|
||||
ScaleRowUp2_Linear_C,
|
||||
ScaleRowUp2_Linear_C,
|
||||
0,
|
||||
uint8_t)
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
0,
|
||||
uint16_t)
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
|
||||
ScaleRowUp2_Linear_SSE2,
|
||||
ScaleRowUp2_Linear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
|
||||
ScaleRowUp2_Linear_SSSE3,
|
||||
ScaleRowUp2_Linear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
|
||||
ScaleRowUp2_Linear_12_SSSE3,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
15,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
|
||||
ScaleRowUp2_Linear_16_SSE2,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
7,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
|
||||
ScaleRowUp2_Linear_AVX2,
|
||||
ScaleRowUp2_Linear_C,
|
||||
31,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
|
||||
ScaleRowUp2_Linear_12_AVX2,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
31,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||
|
||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
|
||||
ScaleRowUp2_Linear_16_AVX2,
|
||||
ScaleRowUp2_Linear_16_C,
|
||||
15,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
#undef SUH2LANY
|
||||
|
||||
// Scale up 2 times using bilinear filter.
|
||||
// This function produces 2 rows at a time.
|
||||
#define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
|
||||
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
|
||||
ptrdiff_t dst_stride, int dst_width) { \
|
||||
int work_width = (dst_width - 1) & ~1; \
|
||||
int r = work_width & MASK; \
|
||||
int n = work_width & ~MASK; \
|
||||
const PTYPE* sa = src_ptr; \
|
||||
const PTYPE* sb = src_ptr + src_stride; \
|
||||
PTYPE* da = dst_ptr; \
|
||||
PTYPE* db = dst_ptr + dst_stride; \
|
||||
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
|
||||
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
|
||||
if (work_width > 0) { \
|
||||
if (n != 0) { \
|
||||
SIMD(sa, sb - sa, da + 1, db - da, n); \
|
||||
} \
|
||||
C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \
|
||||
} \
|
||||
da[dst_width - 1] = \
|
||||
(3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \
|
||||
db[dst_width - 1] = \
|
||||
(sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \
|
||||
}
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_C,
|
||||
ScaleRowUp2_Bilinear_C,
|
||||
ScaleRowUp2_Bilinear_C,
|
||||
0,
|
||||
uint8_t)
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
0,
|
||||
uint16_t)
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
|
||||
ScaleRowUp2_Bilinear_SSE2,
|
||||
ScaleRowUp2_Bilinear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
|
||||
ScaleRowUp2_Bilinear_12_SSSE3,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
15,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2,
|
||||
ScaleRowUp2_Bilinear_16_SSE2,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
7,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
|
||||
ScaleRowUp2_Bilinear_SSSE3,
|
||||
ScaleRowUp2_Bilinear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
|
||||
ScaleRowUp2_Bilinear_AVX2,
|
||||
ScaleRowUp2_Bilinear_C,
|
||||
31,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
|
||||
ScaleRowUp2_Bilinear_12_AVX2,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
15,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||
|
||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
|
||||
ScaleRowUp2_Bilinear_16_AVX2,
|
||||
ScaleRowUp2_Bilinear_16_C,
|
||||
15,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#undef SU2BLANY
|
||||
|
||||
// Scale bi-planar plane up horizontally 2 times using linear filter.
|
||||
#define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
|
||||
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
|
||||
int work_width = (dst_width - 1) & ~1; \
|
||||
int r = work_width & MASK; \
|
||||
int n = work_width & ~MASK; \
|
||||
dst_ptr[0] = src_ptr[0]; \
|
||||
dst_ptr[1] = src_ptr[1]; \
|
||||
if (work_width > 0) { \
|
||||
if (n != 0) { \
|
||||
SIMD(src_ptr, dst_ptr + 2, n); \
|
||||
} \
|
||||
C(src_ptr + n, dst_ptr + 2 * n + 2, r); \
|
||||
} \
|
||||
dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \
|
||||
dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \
|
||||
}
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_C,
|
||||
ScaleUVRowUp2_Linear_C,
|
||||
ScaleUVRowUp2_Linear_C,
|
||||
0,
|
||||
uint8_t)
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
|
||||
ScaleUVRowUp2_Linear_16_C,
|
||||
ScaleUVRowUp2_Linear_16_C,
|
||||
0,
|
||||
uint16_t)
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
|
||||
ScaleUVRowUp2_Linear_SSSE3,
|
||||
ScaleUVRowUp2_Linear_C,
|
||||
7,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
|
||||
ScaleUVRowUp2_Linear_AVX2,
|
||||
ScaleUVRowUp2_Linear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
|
||||
ScaleUVRowUp2_Linear_16_SSE41,
|
||||
ScaleUVRowUp2_Linear_16_C,
|
||||
3,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||
|
||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
|
||||
ScaleUVRowUp2_Linear_16_AVX2,
|
||||
ScaleUVRowUp2_Linear_16_C,
|
||||
7,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#undef SBUH2LANY
|
||||
|
||||
// Scale bi-planar plane up 2 times using bilinear filter.
|
||||
// This function produces 2 rows at a time.
|
||||
#define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
|
||||
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
|
||||
ptrdiff_t dst_stride, int dst_width) { \
|
||||
int work_width = (dst_width - 1) & ~1; \
|
||||
int r = work_width & MASK; \
|
||||
int n = work_width & ~MASK; \
|
||||
const PTYPE* sa = src_ptr; \
|
||||
const PTYPE* sb = src_ptr + src_stride; \
|
||||
PTYPE* da = dst_ptr; \
|
||||
PTYPE* db = dst_ptr + dst_stride; \
|
||||
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
|
||||
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
|
||||
da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \
|
||||
db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \
|
||||
if (work_width > 0) { \
|
||||
if (n != 0) { \
|
||||
SIMD(sa, sb - sa, da + 2, db - da, n); \
|
||||
} \
|
||||
C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \
|
||||
} \
|
||||
da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \
|
||||
sb[((dst_width + 1) & ~1) - 2] + 2) >> \
|
||||
2; \
|
||||
db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \
|
||||
3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \
|
||||
2; \
|
||||
da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \
|
||||
sb[((dst_width + 1) & ~1) - 1] + 2) >> \
|
||||
2; \
|
||||
db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \
|
||||
3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \
|
||||
2; \
|
||||
}
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C,
|
||||
ScaleUVRowUp2_Bilinear_C,
|
||||
ScaleUVRowUp2_Bilinear_C,
|
||||
0,
|
||||
uint8_t)
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
|
||||
ScaleUVRowUp2_Bilinear_16_C,
|
||||
ScaleUVRowUp2_Bilinear_16_C,
|
||||
0,
|
||||
uint16_t)
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
|
||||
ScaleUVRowUp2_Bilinear_SSSE3,
|
||||
ScaleUVRowUp2_Bilinear_C,
|
||||
7,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
|
||||
ScaleUVRowUp2_Bilinear_AVX2,
|
||||
ScaleUVRowUp2_Bilinear_C,
|
||||
15,
|
||||
uint8_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
|
||||
ScaleUVRowUp2_Bilinear_16_SSE41,
|
||||
ScaleUVRowUp2_Bilinear_16_C,
|
||||
7,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||
|
||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
|
||||
ScaleUVRowUp2_Bilinear_16_AVX2,
|
||||
ScaleUVRowUp2_Bilinear_16_C,
|
||||
7,
|
||||
uint16_t)
|
||||
|
||||
#endif
|
||||
|
||||
#undef SBU2BLANY
|
||||
|
|
@ -1,930 +0,0 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "scale.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "cpu_id.h"
|
||||
#include "row.h"
|
||||
#include "scale_row.h"
|
||||
|
||||
#define STATIC_CAST(type, expr) (type)(expr)
|
||||
|
||||
// TODO(fbarchard): make clamp255 preserve negative values.
|
||||
static __inline int32_t clamp255(int32_t v) {
|
||||
return (-(v >= 255) | v) & 255;
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 32768 = 9 bits
|
||||
// 16384 = 10 bits
|
||||
// 4096 = 12 bits
|
||||
// 256 = 16 bits
|
||||
// TODO(fbarchard): change scale to bits
|
||||
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
|
||||
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
|
||||
// CPU agnostic row functions
|
||||
void ScaleRowDown2_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[1];
|
||||
dst[1] = src_ptr[3];
|
||||
dst += 2;
|
||||
src_ptr += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = src_ptr[1];
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
int x;
|
||||
(void) src_stride;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (s[0] + s[1] + 1) >> 1;
|
||||
dst[1] = (s[2] + s[3] + 1) >> 1;
|
||||
dst += 2;
|
||||
s += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (s[0] + s[1] + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
|
||||
dst += 2;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_Odd_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
int x;
|
||||
dst_width -= 1;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
|
||||
dst += 2;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
dst += 1;
|
||||
s += 2;
|
||||
t += 2;
|
||||
}
|
||||
dst[0] = (s[0] + t[0] + 1) >> 1;
|
||||
}
|
||||
|
||||
void ScaleRowDown4_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[2];
|
||||
dst[1] = src_ptr[6];
|
||||
dst += 2;
|
||||
src_ptr += 8;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = src_ptr[2];
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
|
||||
src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
|
||||
src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
|
||||
src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
|
||||
src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
|
||||
src_ptr[stride * 3 + 7] + 8) >>
|
||||
4;
|
||||
dst += 2;
|
||||
src_ptr += 8;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
dst[0] = src_ptr[0];
|
||||
dst[1] = src_ptr[1];
|
||||
dst[2] = src_ptr[3];
|
||||
dst += 3;
|
||||
src_ptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter rows 0 and 1 together, 3 : 1
|
||||
void ScaleRowDown34_0_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *d,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
int x;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
|
||||
uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
|
||||
uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
|
||||
uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
|
||||
uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
|
||||
uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
|
||||
d[0] = (a0 * 3 + b0 + 2) >> 2;
|
||||
d[1] = (a1 * 3 + b1 + 2) >> 2;
|
||||
d[2] = (a2 * 3 + b2 + 2) >> 2;
|
||||
d += 3;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter rows 1 and 2 together, 1 : 1
|
||||
void ScaleRowDown34_1_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *d,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
int x;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
|
||||
uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
|
||||
uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
|
||||
uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
|
||||
uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
|
||||
uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
|
||||
d[0] = (a0 + b0 + 1) >> 1;
|
||||
d[1] = (a1 + b1 + 1) >> 1;
|
||||
d[2] = (a2 + b2 + 1) >> 1;
|
||||
d += 3;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Sample position: (O is src sample position, X is dst sample position)
|
||||
//
|
||||
// v dst_ptr at here v stop at here
|
||||
// X O X X O X X O X X O X X O X
|
||||
// ^ src_ptr at here
|
||||
void ScaleRowUp2_Linear_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width) {
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
|
||||
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Sample position: (O is src sample position, X is dst sample position)
|
||||
//
|
||||
// src_ptr at here
|
||||
// X v X X X X X X X X X
|
||||
// O O O O O
|
||||
// X X X X X X X X X X
|
||||
// ^ dst_ptr at here ^ stop at here
|
||||
// X X X X X X X X X X
|
||||
// O O O O O
|
||||
// X X X X X X X X X X
|
||||
void ScaleRowUp2_Bilinear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
uint8_t *d = dst_ptr;
|
||||
uint8_t *e = dst_ptr + dst_stride;
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
d[2 * x + 0] =
|
||||
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
|
||||
d[2 * x + 1] =
|
||||
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
|
||||
e[2 * x + 0] =
|
||||
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
|
||||
e[2 * x + 1] =
|
||||
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Only suitable for at most 14 bit range.
|
||||
void ScaleRowUp2_Linear_16_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width) {
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
|
||||
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Only suitable for at most 12bit range.
|
||||
void ScaleRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width) {
|
||||
const uint16_t *s = src_ptr;
|
||||
const uint16_t *t = src_ptr + src_stride;
|
||||
uint16_t *d = dst_ptr;
|
||||
uint16_t *e = dst_ptr + dst_stride;
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
d[2 * x + 0] =
|
||||
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
|
||||
d[2 * x + 1] =
|
||||
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
|
||||
e[2 * x + 0] =
|
||||
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
|
||||
e[2 * x + 1] =
|
||||
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
|
||||
}
|
||||
}
|
||||
|
||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#else
|
||||
// Intel uses 7 bit math with rounding.
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
|
||||
#endif
|
||||
|
||||
void ScaleFilterCols_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
int xi = x >> 16;
|
||||
int a = src_ptr[xi];
|
||||
int b = src_ptr[xi + 1];
|
||||
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
|
||||
x += dx;
|
||||
xi = x >> 16;
|
||||
a = src_ptr[xi];
|
||||
b = src_ptr[xi + 1];
|
||||
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
|
||||
x += dx;
|
||||
dst_ptr += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
int xi = x >> 16;
|
||||
int a = src_ptr[xi];
|
||||
int b = src_ptr[xi + 1];
|
||||
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleFilterCols64_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx) {
|
||||
int64_t x = (int64_t) (x32);
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
int64_t xi = x >> 16;
|
||||
int a = src_ptr[xi];
|
||||
int b = src_ptr[xi + 1];
|
||||
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
|
||||
x += dx;
|
||||
xi = x >> 16;
|
||||
a = src_ptr[xi];
|
||||
b = src_ptr[xi + 1];
|
||||
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
|
||||
x += dx;
|
||||
dst_ptr += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
int64_t xi = x >> 16;
|
||||
int a = src_ptr[xi];
|
||||
int b = src_ptr[xi + 1];
|
||||
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
#undef BLENDER
|
||||
|
||||
// Same as 8 bit arm blender but return is cast to uint16_t
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint16_t)( \
|
||||
(int)(a) + \
|
||||
(int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#undef BLENDER
|
||||
|
||||
void ScaleRowDown38_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
assert(dst_width % 3 == 0);
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
dst[0] = src_ptr[0];
|
||||
dst[1] = src_ptr[3];
|
||||
dst[2] = src_ptr[6];
|
||||
dst += 3;
|
||||
src_ptr += 8;
|
||||
}
|
||||
}
|
||||
|
||||
// 8x3 -> 3x1
|
||||
void ScaleRowDown38_3_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] =
|
||||
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[1] =
|
||||
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// 8x2 -> 3x1
|
||||
void ScaleRowDown38_2_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
|
||||
(65536 / 4) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleAddRow_C(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width) {
|
||||
int x;
|
||||
assert(src_width > 0);
|
||||
for (x = 0; x < src_width - 1; x += 2) {
|
||||
dst_ptr[0] += src_ptr[0];
|
||||
dst_ptr[1] += src_ptr[1];
|
||||
src_ptr += 2;
|
||||
dst_ptr += 2;
|
||||
}
|
||||
if (src_width & 1) {
|
||||
dst_ptr[0] += src_ptr[0];
|
||||
}
|
||||
}
|
||||
|
||||
// UV scale row functions
|
||||
// same as ARGB but 2 channels
|
||||
|
||||
void ScaleUVRowDown2_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_uv[0] = src_uv[2]; // Store the 2nd UV
|
||||
dst_uv[1] = src_uv[3];
|
||||
src_uv += 4;
|
||||
dst_uv += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowDown2Linear_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void) src_stride;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
|
||||
dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
|
||||
src_uv += 4;
|
||||
dst_uv += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowDown2Box_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
|
||||
src_uv[src_stride + 2] + 2) >>
|
||||
2;
|
||||
dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
|
||||
src_uv[src_stride + 3] + 2) >>
|
||||
2;
|
||||
src_uv += 4;
|
||||
dst_uv += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowDownEven_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width) {
|
||||
const uint16_t *src = (const uint16_t *) (src_uv);
|
||||
uint16_t *dst = (uint16_t *) (dst_uv);
|
||||
(void) src_stride;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[src_stepx];
|
||||
src += src_stepx * 2;
|
||||
dst += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = src[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Scales a single row of pixels using point sampling.
|
||||
void ScaleCols_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[0] = src_ptr[x >> 16];
|
||||
x += dx;
|
||||
dst_ptr[1] = src_ptr[x >> 16];
|
||||
x += dx;
|
||||
dst_ptr += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst_ptr[0] = src_ptr[x >> 16];
|
||||
}
|
||||
}
|
||||
|
||||
// Scales a single row of pixels up by 2x using point sampling.
|
||||
void ScaleColsUp2_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
(void) x;
|
||||
(void) dx;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
|
||||
src_ptr += 1;
|
||||
dst_ptr += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowUp2_Linear_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width) {
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
dst_ptr[4 * x + 0] =
|
||||
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
|
||||
dst_ptr[4 * x + 1] =
|
||||
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
|
||||
dst_ptr[4 * x + 2] =
|
||||
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
|
||||
dst_ptr[4 * x + 3] =
|
||||
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width) {
|
||||
const uint8_t *s = src_ptr;
|
||||
const uint8_t *t = src_ptr + src_stride;
|
||||
uint8_t *d = dst_ptr;
|
||||
uint8_t *e = dst_ptr + dst_stride;
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
|
||||
t[2 * x + 2] * 1 + 8) >>
|
||||
4;
|
||||
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
|
||||
t[2 * x + 3] * 1 + 8) >>
|
||||
4;
|
||||
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
|
||||
t[2 * x + 2] * 3 + 8) >>
|
||||
4;
|
||||
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
|
||||
t[2 * x + 3] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
|
||||
t[2 * x + 2] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
|
||||
t[2 * x + 3] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
|
||||
t[2 * x + 2] * 9 + 8) >>
|
||||
4;
|
||||
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
|
||||
t[2 * x + 3] * 9 + 8) >>
|
||||
4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width) {
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
dst_ptr[4 * x + 0] =
|
||||
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
|
||||
dst_ptr[4 * x + 1] =
|
||||
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
|
||||
dst_ptr[4 * x + 2] =
|
||||
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
|
||||
dst_ptr[4 * x + 3] =
|
||||
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width) {
|
||||
const uint16_t *s = src_ptr;
|
||||
const uint16_t *t = src_ptr + src_stride;
|
||||
uint16_t *d = dst_ptr;
|
||||
uint16_t *e = dst_ptr + dst_stride;
|
||||
int src_width = dst_width >> 1;
|
||||
int x;
|
||||
assert((dst_width % 2 == 0) && (dst_width >= 0));
|
||||
for (x = 0; x < src_width; ++x) {
|
||||
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
|
||||
t[2 * x + 2] * 1 + 8) >>
|
||||
4;
|
||||
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
|
||||
t[2 * x + 3] * 1 + 8) >>
|
||||
4;
|
||||
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
|
||||
t[2 * x + 2] * 3 + 8) >>
|
||||
4;
|
||||
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
|
||||
t[2 * x + 3] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
|
||||
t[2 * x + 2] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
|
||||
t[2 * x + 3] * 3 + 8) >>
|
||||
4;
|
||||
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
|
||||
t[2 * x + 2] * 9 + 8) >>
|
||||
4;
|
||||
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
|
||||
t[2 * x + 3] * 9 + 8) >>
|
||||
4;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
|
||||
// Mimics SSSE3 blender
|
||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
|
||||
#define BLENDERC(a, b, f, s) \
|
||||
(uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
|
||||
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
|
||||
|
||||
void ScaleUVFilterCols_C(uint8_t *dst_uv,
|
||||
const uint8_t *src_uv,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint16_t *src = (const uint16_t *) (src_uv);
|
||||
uint16_t *dst = (uint16_t *) (dst_uv);
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
int xi = x >> 16;
|
||||
int xf = (x >> 9) & 0x7f;
|
||||
uint16_t a = src[xi];
|
||||
uint16_t b = src[xi + 1];
|
||||
dst[0] = BLENDER(a, b, xf);
|
||||
x += dx;
|
||||
xi = x >> 16;
|
||||
xf = (x >> 9) & 0x7f;
|
||||
a = src[xi];
|
||||
b = src[xi + 1];
|
||||
dst[1] = BLENDER(a, b, xf);
|
||||
x += dx;
|
||||
dst += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
int xi = x >> 16;
|
||||
int xf = (x >> 9) & 0x7f;
|
||||
uint16_t a = src[xi];
|
||||
uint16_t b = src[xi + 1];
|
||||
dst[0] = BLENDER(a, b, xf);
|
||||
}
|
||||
}
|
||||
|
||||
#undef BLENDER1
|
||||
#undef BLENDERC
|
||||
#undef BLENDER
|
||||
|
||||
// Scale plane vertically with bilinear interpolation.
|
||||
void ScalePlaneVertical(int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_argb,
|
||||
uint8_t *dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int bpp, // bytes per pixel. 4 for ARGB.
|
||||
enum FilterMode filtering) {
|
||||
// TODO(fbarchard): Allow higher bpp.
|
||||
int dst_width_bytes = dst_width * bpp;
|
||||
void (*InterpolateRow)(uint8_t *dst_argb, const uint8_t *src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
||||
int j;
|
||||
assert(bpp >= 1 && bpp <= 4);
|
||||
assert(src_height != 0);
|
||||
assert(dst_width > 0);
|
||||
assert(dst_height > 0);
|
||||
src_argb += (x >> 16) * bpp;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width_bytes, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 32)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi;
|
||||
int yf;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
yi = y >> 16;
|
||||
yf = filtering ? ((y >> 8) & 255) : 0;
|
||||
InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
|
||||
dst_width_bytes, yf);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
enum FilterMode ScaleFilterReduce(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (src_width < 0) {
|
||||
src_width = -src_width;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
}
|
||||
if (filtering == kFilterBox) {
|
||||
// If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
|
||||
if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
|
||||
filtering = kFilterBilinear;
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterBilinear) {
|
||||
if (src_height == 1) {
|
||||
filtering = kFilterLinear;
|
||||
}
|
||||
// TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
|
||||
if (dst_height == src_height || dst_height * 3 == src_height) {
|
||||
filtering = kFilterLinear;
|
||||
}
|
||||
// TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
|
||||
// avoid reading 2 pixels horizontally that causes memory exception.
|
||||
if (src_width == 1) {
|
||||
filtering = kFilterNone;
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterLinear) {
|
||||
if (src_width == 1) {
|
||||
filtering = kFilterNone;
|
||||
}
|
||||
// TODO(fbarchard): Detect any odd scale factor and reduce to None.
|
||||
if (dst_width == src_width || dst_width * 3 == src_width) {
|
||||
filtering = kFilterNone;
|
||||
}
|
||||
}
|
||||
return filtering;
|
||||
}
|
||||
|
||||
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering,
|
||||
int *x,
|
||||
int *y,
|
||||
int *dx,
|
||||
int *dy) {
|
||||
assert(x != NULL);
|
||||
assert(y != NULL);
|
||||
assert(dx != NULL);
|
||||
assert(dy != NULL);
|
||||
assert(src_width != 0);
|
||||
assert(src_height != 0);
|
||||
assert(dst_width > 0);
|
||||
assert(dst_height > 0);
|
||||
// Check for 1 pixel and avoid FixedDiv overflow.
|
||||
if (dst_width == 1 && src_width >= 32768) {
|
||||
dst_width = src_width;
|
||||
}
|
||||
if (dst_height == 1 && src_height >= 32768) {
|
||||
dst_height = src_height;
|
||||
}
|
||||
if (filtering == kFilterBox) {
|
||||
// Scale step for point sampling duplicates all pixels equally.
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*x = 0;
|
||||
*y = 0;
|
||||
} else if (filtering == kFilterBilinear) {
|
||||
// Scale step for bilinear sampling renders last pixel once for upsample.
|
||||
if (dst_width <= Abs(src_width)) {
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (src_width > 1 && dst_width > 1) {
|
||||
*dx = FixedDiv1(Abs(src_width), dst_width);
|
||||
*x = 0;
|
||||
}
|
||||
if (dst_height <= src_height) {
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (src_height > 1 && dst_height > 1) {
|
||||
*dy = FixedDiv1(src_height, dst_height);
|
||||
*y = 0;
|
||||
}
|
||||
} else if (filtering == kFilterLinear) {
|
||||
// Scale step for bilinear sampling renders last pixel once for upsample.
|
||||
if (dst_width <= Abs(src_width)) {
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (src_width > 1 && dst_width > 1) {
|
||||
*dx = FixedDiv1(Abs(src_width), dst_width);
|
||||
*x = 0;
|
||||
}
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*y = *dy >> 1;
|
||||
} else {
|
||||
// Scale step for point sampling duplicates all pixels equally.
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*x = CENTERSTART(*dx, 0);
|
||||
*y = CENTERSTART(*dy, 0);
|
||||
}
|
||||
// Negative src_width means horizontally mirror.
|
||||
if (src_width < 0) {
|
||||
*x += (dst_width - 1) * *dx;
|
||||
*dx = -*dx;
|
||||
// src_width = -src_width; // Caller must do this.
|
||||
}
|
||||
}
|
||||
|
||||
#undef CENTERSTART
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,768 +0,0 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_ROW_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "scale.h"
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
#define GCC_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_FIXEDDIV1_X86
|
||||
#define HAS_FIXEDDIV_X86
|
||||
#define HAS_SCALEADDROW_SSE2
|
||||
#define HAS_SCALECOLSUP2_SSE2
|
||||
#define HAS_SCALEFILTERCOLS_SSSE3
|
||||
#define HAS_SCALEROWDOWN2_SSSE3
|
||||
#define HAS_SCALEROWDOWN34_SSSE3
|
||||
#define HAS_SCALEROWDOWN38_SSSE3
|
||||
#define HAS_SCALEROWDOWN4_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for gcc/clang x86 platforms:
|
||||
// TODO(fbarchard): Port to Visual C
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||
#define HAS_SCALEROWUP2_LINEAR_SSE2
|
||||
#define HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||
#define HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||
#define HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||
#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
|
||||
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
|
||||
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||
#endif
|
||||
|
||||
// The following are available for gcc/clang x86 platforms, but
|
||||
// require clang 3.4 or gcc 4.7.
|
||||
// TODO(fbarchard): Port to Visual C
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || defined(__i386__)) && \
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||
#define HAS_SCALEROWUP2_LINEAR_AVX2
|
||||
#define HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||
#define HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||
#define HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||
#define HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||
#define HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||
#define HAS_SCALEUVROWUP2_LINEAR_AVX2
|
||||
#define HAS_SCALEUVROWUP2_BILINEAR_AVX2
|
||||
#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||
#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, but
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
// The code supports NaCL but requires a new compiler and validator.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
|
||||
defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
#endif
|
||||
|
||||
// Scale ARGB vertically with bilinear interpolation.
|
||||
void ScalePlaneVertical(int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t *src_argb,
|
||||
uint8_t *dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int bpp,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
enum FilterMode ScaleFilterReduce(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_X86(int num, int div);
|
||||
|
||||
int FixedDiv1_X86(int num, int div);
|
||||
|
||||
#ifdef HAS_FIXEDDIV_X86
|
||||
#define FixedDiv FixedDiv_X86
|
||||
#define FixedDiv1 FixedDiv1_X86
|
||||
#endif
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering,
|
||||
int *x,
|
||||
int *y,
|
||||
int *dx,
|
||||
int *dy);
|
||||
|
||||
void ScaleRowDown2_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_Odd_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_0_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *d,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_1_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *d,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_Any_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_Any_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_Any_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_Any_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleCols_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
void ScaleColsUp2_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int,
|
||||
int);
|
||||
|
||||
void ScaleFilterCols_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
void ScaleFilterCols64_C(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx);
|
||||
|
||||
void ScaleRowDown38_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_3_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_2_Box_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleAddRow_C(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
|
||||
|
||||
void ScaleUVRowDown2_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDown2Linear_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDown2Box_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDownEven_C(const uint8_t *src_uv,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_Any_C(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_Any_C(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_Any_C(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_Any_C(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
// Specialized scalers for x86.
|
||||
void ScaleRowDown2_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4Box_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_1_Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_0_Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_3_Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_2_Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_SSE2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_SSE2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_SSE2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_SSSE3(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_12_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_Any_SSE2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_Any_SSE2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_12_Any_SSSE3(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_Any_SSE2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_Any_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_12_Any_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_12_Any_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Linear_16_Any_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowUp2_Bilinear_16_Any_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_Odd_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_Odd_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4Box_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleAddRow_SSE2(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
|
||||
|
||||
void ScaleAddRow_AVX2(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
|
||||
|
||||
void ScaleAddRow_Any_SSE2(const uint8_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int src_width);
|
||||
|
||||
void ScaleAddRow_Any_AVX2(const uint8_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int src_width);
|
||||
|
||||
void ScaleFilterCols_SSSE3(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
void ScaleColsUp2_SSE2(uint8_t *dst_ptr,
|
||||
const uint8_t *src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
// UV Row functions
|
||||
void ScaleUVRowDown2Box_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDown2Box_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_uv,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDown2Box_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowDown2Box_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_Any_SSSE3(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_Any_SSSE3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_Any_AVX2(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_Any_AVX2(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Linear_16_Any_AVX2(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleUVRowUp2_Bilinear_16_Any_AVX2(const uint16_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride,
|
||||
int dst_width);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_ROW_H_
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1875
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "video_common.h"
|
||||
|
||||
struct FourCCAliasEntry {
|
||||
uint32_t alias;
|
||||
uint32_t canonical;
|
||||
};
|
||||
|
||||
#define NUM_ALIASES 18
|
||||
static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU12, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
|
||||
{FOURCC_HDYC, FOURCC_UYVY},
|
||||
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
|
||||
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
|
||||
{FOURCC_DMB1, FOURCC_MJPG},
|
||||
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
|
||||
{FOURCC_RGB3, FOURCC_RAW},
|
||||
{FOURCC_BGR3, FOURCC_24BG},
|
||||
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
|
||||
{FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB
|
||||
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
|
||||
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
|
||||
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
|
||||
};
|
||||
|
||||
LIBYUV_API
|
||||
uint32_t CanonicalFourCC(uint32_t fourcc) {
|
||||
int i;
|
||||
for (i = 0; i < NUM_ALIASES; ++i) {
|
||||
if (kFourCCAliases[i].alias == fourcc) {
|
||||
return kFourCCAliases[i].canonical;
|
||||
}
|
||||
}
|
||||
// Not an alias, so return it as-is.
|
||||
return fourcc;
|
||||
}
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Common definitions for video, including fourcc and VideoFormat.
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Definition of FourCC codes
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Convert four characters to a FourCC code.
|
||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||
// constants are used in a switch.
|
||||
#ifdef __cplusplus
|
||||
#define FOURCC(a, b, c, d) \
|
||||
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
|
||||
(static_cast<uint32_t>(c) << 16) | /* NOLINT */ \
|
||||
(static_cast<uint32_t>(d) << 24)) /* NOLINT */
|
||||
#else
|
||||
#define FOURCC(a, b, c, d) \
|
||||
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
|
||||
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
|
||||
#endif
|
||||
|
||||
// Some pages discussing FourCC codes:
|
||||
// http://www.fourcc.org/yuv.php
|
||||
// http://v4l2spec.bytesex.org/spec/book1.htm
|
||||
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
|
||||
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
|
||||
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
|
||||
|
||||
// FourCC codes grouped according to implementation efficiency.
|
||||
// Primary formats should convert in 1 efficient step.
|
||||
// Secondary formats are converted in 2 steps.
|
||||
// Auxilliary formats call primary converters.
|
||||
enum FourCC {
|
||||
// 10 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
|
||||
FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
|
||||
|
||||
// 1 Secondary YUV format: row biplanar. deprecated.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
|
||||
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
|
||||
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
|
||||
|
||||
// 1 Primary Compressed YUV format.
|
||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||
|
||||
// 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
|
||||
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
|
||||
FOURCC_J420 =
|
||||
FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J422 =
|
||||
FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J444 =
|
||||
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J400 =
|
||||
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
|
||||
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
|
||||
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc
|
||||
FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc
|
||||
FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc
|
||||
FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc
|
||||
FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420
|
||||
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420
|
||||
FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420
|
||||
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
|
||||
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
|
||||
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
|
||||
FOURCC_P010 = FOURCC('P', '0', '1', '0'),
|
||||
FOURCC_P210 = FOURCC('P', '2', '1', '0'),
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
|
||||
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
|
||||
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
|
||||
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
|
||||
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
|
||||
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
|
||||
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
|
||||
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
|
||||
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
|
||||
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
|
||||
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
|
||||
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
|
||||
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
|
||||
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
|
||||
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
|
||||
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
|
||||
|
||||
// deprecated formats. Not supported, but defined for backward compatibility.
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_ANY = -1,
|
||||
};
|
||||
|
||||
enum FourCCBpp {
|
||||
// Canonical fourcc codes used in our code.
|
||||
FOURCC_BPP_I420 = 12,
|
||||
FOURCC_BPP_I422 = 16,
|
||||
FOURCC_BPP_I444 = 24,
|
||||
FOURCC_BPP_I411 = 12,
|
||||
FOURCC_BPP_I400 = 8,
|
||||
FOURCC_BPP_NV21 = 12,
|
||||
FOURCC_BPP_NV12 = 12,
|
||||
FOURCC_BPP_YUY2 = 16,
|
||||
FOURCC_BPP_UYVY = 16,
|
||||
FOURCC_BPP_M420 = 12, // deprecated
|
||||
FOURCC_BPP_Q420 = 12,
|
||||
FOURCC_BPP_ARGB = 32,
|
||||
FOURCC_BPP_BGRA = 32,
|
||||
FOURCC_BPP_ABGR = 32,
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_AR30 = 32,
|
||||
FOURCC_BPP_AB30 = 32,
|
||||
FOURCC_BPP_AR64 = 64,
|
||||
FOURCC_BPP_AB64 = 64,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
FOURCC_BPP_RGBO = 16,
|
||||
FOURCC_BPP_R444 = 16,
|
||||
FOURCC_BPP_RGGB = 8,
|
||||
FOURCC_BPP_BGGR = 8,
|
||||
FOURCC_BPP_GRBG = 8,
|
||||
FOURCC_BPP_GBRG = 8,
|
||||
FOURCC_BPP_YV12 = 12,
|
||||
FOURCC_BPP_YV16 = 16,
|
||||
FOURCC_BPP_YV24 = 24,
|
||||
FOURCC_BPP_YU12 = 12,
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_H420 = 12,
|
||||
FOURCC_BPP_H422 = 16,
|
||||
FOURCC_BPP_I010 = 15,
|
||||
FOURCC_BPP_I210 = 20,
|
||||
FOURCC_BPP_H010 = 15,
|
||||
FOURCC_BPP_H210 = 20,
|
||||
FOURCC_BPP_P010 = 15,
|
||||
FOURCC_BPP_P210 = 20,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
FOURCC_BPP_YU16 = 16,
|
||||
FOURCC_BPP_YU24 = 24,
|
||||
FOURCC_BPP_YUYV = 16,
|
||||
FOURCC_BPP_YUVS = 16,
|
||||
FOURCC_BPP_HDYC = 16,
|
||||
FOURCC_BPP_2VUY = 16,
|
||||
FOURCC_BPP_JPEG = 1,
|
||||
FOURCC_BPP_DMB1 = 1,
|
||||
FOURCC_BPP_BA81 = 8,
|
||||
FOURCC_BPP_RGB3 = 24,
|
||||
FOURCC_BPP_BGR3 = 24,
|
||||
FOURCC_BPP_CM32 = 32,
|
||||
FOURCC_BPP_CM24 = 24,
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
};
|
||||
|
||||
// Converts fourcc aliases into canonical ones.
|
||||
LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
|
|
@ -115,6 +115,9 @@ func TestYuvPredefined(t *testing.T) {
|
|||
frame := RawFrame{Data: im, Stride: 32, W: 32, H: 32}
|
||||
a := pc.Process(frame, 0, PixFmt(libyuv.FourccAbgr))
|
||||
|
||||
v := libyuv.Version()
|
||||
t.Logf("%v", v)
|
||||
|
||||
if len(a) != len(should) {
|
||||
t.Fatalf("diffrent size a: %v, o: %v", len(a), len(should))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue