Use static libyuv for macs

This commit is contained in:
Sergey Stepanov 2023-10-16 00:02:16 +03:00 committed by sergystepanov
parent b1b33713d6
commit f11cad157b
37 changed files with 45 additions and 10943 deletions

View file

@ -38,7 +38,7 @@ jobs:
- name: Get MacOS dev libraries and tools
if: matrix.os == 'macos-latest'
run: |
brew install pkg-config libvpx x264 opus sdl2
brew install pkg-config libvpx x264 opus sdl2 jpeg-turbo
- name: Get Windows dev libraries and tools
if: matrix.os == 'windows-latest'

View file

@ -38,7 +38,7 @@ test:
go test -v ./pkg/...
verify-cores:
go test -run TestAll ./pkg/worker/room -v -renderFrames $(GL_CTX) -outputPath "../../../_rendered"
go test -run TestAll ./pkg/worker/room -v -renderFrames $(GL_CTX) -outputPath "./_rendered"
dev.build: compile build

View file

@ -64,7 +64,7 @@ a better sense of performance.
apt-get install -y make gcc pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev libyuv-dev
# MacOS
brew install pkg-config libvpx x264 opus sdl2
brew install pkg-config libvpx x264 opus sdl2 jpeg-turbo
# Windows (MSYS2)
pacman -Sy --noconfirm --needed git make mingw-w64-x86_64-{gcc,pkgconf,dlfcn,libvpx,opus,x264-git,SDL2,libyuv}

View file

@ -1,29 +0,0 @@
Copyright 2011 The LibYuv Project Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Google nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,29 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
#include <stddef.h> // For size_t and NULL
#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
#define INT_TYPES_DEFINED
#include <stdint.h> // for uintptr_t and C99 types
#endif // INT_TYPES_DEFINED
#if !defined(LIBYUV_API)
#define LIBYUV_API
#endif // LIBYUV_API
#define LIBYUV_BOOL int
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_

View file

@ -1,336 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "convert.h"
#include "basic_types.h"
#include "cpu_id.h"
#include "planar_functions.h"
#include "row.h"
// Subsample amount uses a shift.
// v is value
// a is amount to add to round up
// s is shift to subsample down
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// Copy I420 with optional flipping.
// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
// is does row coalescing.
LIBYUV_API
int I420Copy(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// Copy UV planes.
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Convert ARGB to I420.
LIBYUV_API
int ARGBToI420(const uint8_t *src_argb,
int src_stride_argb,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*ARGBToUVRow)(const uint8_t *src_argb0, int src_stride_argb,
uint8_t *dst_u, uint8_t *dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t *src_argb, uint8_t *dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
}
return 0;
}
// Convert ABGR to I420.
LIBYUV_API
int ABGRToI420(const uint8_t *src_abgr,
int src_stride_abgr,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*ABGRToUVRow)(const uint8_t *src_abgr0, int src_stride_abgr,
uint8_t *dst_u, uint8_t *dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t *src_abgr, uint8_t *dst_y, int width) =
ABGRToYRow_C;
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_AVX2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
ABGRToYRow(src_abgr, dst_y, width);
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
src_abgr += src_stride_abgr * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
ABGRToYRow(src_abgr, dst_y, width);
}
return 0;
}
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t *src_rgb565,
int src_stride_rgb565,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*RGB565ToARGBRow)(const uint8_t *src_rgb, uint8_t *dst_argb,
int width) = RGB565ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t *src_argb0, int src_stride_argb,
uint8_t *dst_u, uint8_t *dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t *src_argb, uint8_t *dst_y, int width) =
ARGBToYRow_C;
if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
src_stride_rgb565 = -src_stride_rgb565;
}
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
{
#if !(defined(HAS_RGB565TOYROW_NEON))
// Allocate 2 rows of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON))
#else
RGB565ToARGBRow(src_rgb565, row, width);
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb565 += src_stride_rgb565 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if (defined(HAS_RGB565TOYROW_NEON))
#else
RGB565ToARGBRow(src_rgb565, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
}
#if !(defined(HAS_RGB565TOYROW_NEON))
free_aligned_buffer_64(row);
#endif
}
return 0;
}

View file

@ -1,113 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_H_
#define INCLUDE_LIBYUV_CONVERT_H_
#include "rotate.h" // For enum RotationMode.
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8_t *src_argb,
int src_stride_argb,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
int ABGRToI420(const uint8_t *src_abgr,
int src_stride_abgr,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t *src_rgb565,
int src_stride_rgb565,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height);
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToI420(const uint8_t *sample,
size_t sample_size,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int crop_x,
int crop_y,
int src_width,
int src_height,
int crop_width,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc);
#endif // INCLUDE_LIBYUV_CONVERT_H_

View file

@ -1,24 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "basic_types.h"
// Conversion matrix for YVU to BGR
LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // BT.601 full
LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full
LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_

View file

@ -1,116 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "convert.h"
#include "video_common.h"
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToI420(const uint8_t *sample,
size_t sample_size,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int crop_x,
int crop_y,
int src_width,
int src_height,
int crop_width,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc) {
uint32_t format = CanonicalFourCC(fourcc);
const uint8_t *src;
// TODO(nisse): Why allow crop_height < 0?
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
LIBYUV_BOOL need_buf =
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
format != FOURCC_NV21 && format != FOURCC_YV12) ||
dst_y == sample;
uint8_t *tmp_y = dst_y;
uint8_t *tmp_u = dst_u;
uint8_t *tmp_v = dst_v;
int tmp_y_stride = dst_stride_y;
int tmp_u_stride = dst_stride_u;
int tmp_v_stride = dst_stride_v;
uint8_t *rotate_buffer = NULL;
const int inv_crop_height =
(src_height < 0) ? -abs_crop_height : abs_crop_height;
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
return -1;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination dst_y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
rotate_buffer = (uint8_t *) malloc(y_size + uv_size * 2); /* NOLINT */
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
dst_y = rotate_buffer;
dst_u = dst_y + y_size;
dst_v = dst_u + uv_size;
dst_stride_y = crop_width;
dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
}
switch (format) {
// Single plane formats
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
tmp_v, tmp_v_stride, crop_width, abs_crop_height,
rotation);
}
free(rotate_buffer);
}
return r;
}

View file

@ -1,204 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "cpu_id.h"
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h> // For fopen()
#include <string.h>
// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
#define SAFEBUFFERS
// cpu_info_ variable for SIMD instruction sets detected.
LIBYUV_API int cpu_info_ = 0;
// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API
void CpuId(int info_eax, int info_ecx, int *cpu_info) {
#if defined(_MSC_VER)
// GCC version uses inline x86 assembly.
#else // defined(_MSC_VER)
int info_ebx, info_edx;
asm volatile(
#if defined(__i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=D"(info_ebx),
#else
"cpuid \n"
: "=b"(info_ebx),
#endif // defined( __i386__) && defined(__PIC__)
"+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
cpu_info[0] = info_eax;
cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx;
cpu_info[3] = info_edx;
#endif // defined(_MSC_VER)
}
#else // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API
void CpuId(int eax, int ecx, int* cpu_info) {
(void)eax;
(void)ecx;
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
// For VS2010 and earlier emit can be used:
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
// __asm {
// xor ecx, ecx // xcr 0
// xgetbv
// mov xcr0, eax
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
static int GetXCR0() {
int xcr0 = 0;
#if defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__)
return xcr0;
}
#else
// xgetbv unavailable to query for OSSave support. Return 0.
#define GetXCR0() 0
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
// Based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char *cpuinfo_name) {
char cpuinfo_line[512];
FILE *f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char *p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
// aarch64 uses asimd for Neon.
p = strstr(cpuinfo_line, " asimd");
if (p) {
fclose(f);
return kCpuHasNEON;
}
}
}
fclose(f);
return 0;
}
static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86))
int cpu_info0[4] = {0, 0, 0, 0};
int cpu_info1[4] = {0, 0, 0, 0};
int cpu_info7[4] = {0, 0, 0, 0};
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
}
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
// AVX requires OS saves YMM registers.
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
}
}
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it.
// So for aarch64, neon enabling is hard coded here.
#endif
#if defined(__aarch64__)
cpu_info = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
cpu_info |= kCpuHasARM;
#endif // __arm__
cpu_info |= kCpuInitialized;
return cpu_info;
}
// Note that use of this function is not thread safe.
LIBYUV_API
int MaskCpuFlags(int enable_flags) {
int cpu_info = GetCpuFlags() & enable_flags;
SetCpuFlags(cpu_info);
return cpu_info;
}
LIBYUV_API
int InitCpuFlags(void) {
return MaskCpuFlags(-1);
}

View file

@ -1,106 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "basic_types.h"
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag.
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100; // unused at this time.
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasF16C = 0x2000;
static const int kCpuHasGFNI = 0x4000;
static const int kCpuHasAVX512BW = 0x8000;
static const int kCpuHasAVX512VL = 0x10000;
static const int kCpuHasAVX512VNNI = 0x20000;
static const int kCpuHasAVX512VBMI = 0x40000;
static const int kCpuHasAVX512VBMI2 = 0x80000;
static const int kCpuHasAVX512VBITALG = 0x100000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
LIBYUV_API
int InitCpuFlags(void);
// Detect CPU has SSE2 etc.
// Test_flag parameter should be one of kCpuHas constants above.
// Returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
#else
int cpu_info = cpu_info_;
#endif
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
}
// Internal function for parsing /proc/cpuinfo.
LIBYUV_API
int ArmCpuCaps(const char *cpuinfo_name);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(1) to disable all cpu specific optimizations.
// MaskCpuFlags(0) to reset state so next call will auto init.
// Returns cpu_info flags.
LIBYUV_API
int MaskCpuFlags(int enable_flags);
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
// should be a valid combination of the kCpuHas constants above and include
// kCpuInitialized. Use this method when running in a sandboxed process where
// the detection code might fail (as it might access /proc/cpuinfo). In such
// cases the cpu_info can be obtained from a non sandboxed process by calling
// InitCpuFlags() and passed to the sandboxed process (via command line
// parameters, IPC...) which can then call this method to initialize the CPU
// flags.
// Notes:
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
// again.
// - enabling CPU features that are not supported by the CPU will result in
// undefined behavior.
// TODO(fbarchard): consider writing a helper function that translates from
// other library CPU info to libyuv CPU info and add a .md doc that explains
// CPU detection.
static __inline void SetCpuFlags(int cpu_flags) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
#else
cpu_info_ = cpu_flags;
#endif
}
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(int info_eax, int info_ecx, int *cpu_info);
#endif // INCLUDE_LIBYUV_CPU_ID_H_

View file

@ -1,18 +1,43 @@
//go:build !darwin && !no_libyuv
// Package libyuv contains the wrapper for: https://chromium.googlesource.com/libyuv/libyuv.
// Libs are downloaded from: https://packages.macports.org/libyuv/.
package libyuv
// see: https://chromium.googlesource.com/libyuv/libyuv
/*
#cgo CFLAGS: -Wall
#cgo LDFLAGS: -lyuv
#cgo !darwin LDFLAGS: -lyuv
#include <stdlib.h>
#cgo darwin CFLAGS: -DINCLUDE_LIBYUV_VERSION_H_
#cgo darwin LDFLAGS: -L${SRCDIR} -lstdc++
#cgo darwin,amd64 LDFLAGS: -lyuv_darwin_x86_64 -ljpeg -lstdc++
#cgo darwin,arm64 LDFLAGS: -lyuv_darwin_arm64 -ljpeg -lstdc++
#include <stdint.h> // for uintptr_t and C99 types
#if !defined(LIBYUV_API)
#define LIBYUV_API
#endif // LIBYUV_API
#ifndef INCLUDE_LIBYUV_VERSION_H_
#include "libyuv/version.h"
#include "libyuv/video_common.h"
#else
#define LIBYUV_VERSION 1874 // darwin static libs version
#endif // INCLUDE_LIBYUV_VERSION_H_
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define FOURCC(a, b, c, d) \
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
enum FourCC {
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_ANY = -1,
};
//
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
@ -20,7 +45,6 @@ typedef enum RotationMode {
kRotate270 = 270, // Rotate 270 degrees clockwise.
} RotationModeEnum;
//
LIBYUV_API
int ConvertToI420(const uint8_t* sample,
size_t sample_size,
@ -65,6 +89,11 @@ int I420Scale(const uint8_t *src_y,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
*/
import "C"
import "fmt"

View file

@ -1,89 +0,0 @@
//go:build darwin || no_libyuv
package libyuv
/*
#cgo CFLAGS: -Wall
#include "basic_types.h"
#include "version.h"
#include "video_common.h"
#include "rotate.h"
#include "scale.h"
#include "convert.h"
*/
import "C"
import "fmt"
const FourccRgbp uint32 = C.FOURCC_RGBP
const FourccArgb uint32 = C.FOURCC_ARGB
const FourccAbgr uint32 = C.FOURCC_ABGR
func Y420(src []byte, dst []byte, _, h, stride int, dw, dh int, rot uint, pix uint32, cx, cy int) {
cw := (dw + 1) / 2
ch := (dh + 1) / 2
i0 := dw * dh
i1 := i0 + cw*ch
yStride := dw
cStride := cw
C.ConvertToI420(
(*C.uchar)(&src[0]),
C.size_t(0),
(*C.uchar)(&dst[0]),
C.int(yStride),
(*C.uchar)(&dst[i0]),
C.int(cStride),
(*C.uchar)(&dst[i1]),
C.int(cStride),
C.int(0),
C.int(0),
C.int(stride),
C.int(h),
C.int(cx),
C.int(cy),
C.enum_RotationMode(rot),
C.uint32_t(pix))
}
func Y420Scale(src []byte, dst []byte, w, h int, dw, dh int) {
srcWidthUV, dstWidthUV := (w+1)>>1, (dw+1)>>1
srcHeightUV, dstHeightUV := (h+1)>>1, (dh+1)>>1
srcYPlaneSize, dstYPlaneSize := w*h, dw*dh
srcUVPlaneSize, dstUVPlaneSize := srcWidthUV*srcHeightUV, dstWidthUV*dstHeightUV
srcStrideY, dstStrideY := w, dw
srcStrideU, dstStrideU := srcWidthUV, dstWidthUV
srcStrideV, dstStrideV := srcWidthUV, dstWidthUV
srcY := (*C.uchar)(&src[0])
srcU := (*C.uchar)(&src[srcYPlaneSize])
srcV := (*C.uchar)(&src[srcYPlaneSize+srcUVPlaneSize])
dstY := (*C.uchar)(&dst[0])
dstU := (*C.uchar)(&dst[dstYPlaneSize])
dstV := (*C.uchar)(&dst[dstYPlaneSize+dstUVPlaneSize])
C.I420Scale(
srcY,
C.int(srcStrideY),
srcU,
C.int(srcStrideU),
srcV,
C.int(srcStrideV),
C.int(w),
C.int(h),
dstY,
C.int(dstStrideY),
dstU,
C.int(dstStrideU),
dstV,
C.int(dstStrideV),
C.int(dw),
C.int(dh),
C.enum_FilterMode(C.kFilterNone))
}
func Version() string { return fmt.Sprintf("%v mod", int(C.LIBYUV_VERSION)) }

Binary file not shown.

Binary file not shown.

View file

@ -1,68 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "planar_functions.h"
#include "cpu_id.h"
#include "row.h"
// Copy a plane of data
LIBYUV_API
void CopyPlane(const uint8_t *src_y,
int src_stride_y,
uint8_t *dst_y,
int dst_stride_y,
int width,
int height) {
int y;
void (*CopyRow)(const uint8_t *src, uint8_t *dst, int width) = CopyRow_C;
if (width <= 0 || height == 0) {
return;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_stride_y = -dst_stride_y;
}
// Coalesce rows.
if (src_stride_y == width && dst_stride_y == width) {
width *= height;
height = 1;
src_stride_y = dst_stride_y = 0;
}
// Nothing to do.
if (src_y == dst_y && src_stride_y == dst_stride_y) {
return;
}
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
// Copy plane
for (y = 0; y < height; ++y) {
CopyRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}

View file

@ -1,46 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#include "basic_types.h"
// TODO(fbarchard): Move cpu macros to row.h
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Copy a plane of data.
LIBYUV_API
void CopyPlane(const uint8_t *src_y,
int src_stride_y,
uint8_t *dst_y,
int dst_stride_y,
int width,
int height);
#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_

View file

@ -1,217 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rotate.h"
#include "convert.h"
#include "cpu_id.h"
#include "rotate_row.h"
#include "row.h"
LIBYUV_API
void TransposePlane(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height) {
int i = height;
void (*TransposeWx8)(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int width) = TransposeWx8_C;
#if defined(HAS_TRANSPOSEWX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
TransposeWx8 = TransposeWx8_Fast_SSSE3;
}
}
#endif
// Work across the source in 8x8 tiles
while (i >= 8) {
TransposeWx8(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; // Go down 8 rows.
dst += 8; // Move over 8 columns.
i -= 8;
}
if (i > 0) {
TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
}
}
LIBYUV_API
void RotatePlane90(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height) {
// Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
LIBYUV_API
void RotatePlane270(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height) {
// Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
LIBYUV_API
void RotatePlane180(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height) {
// Swap top and bottom row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
const uint8_t *src_bot = src + src_stride * (height - 1);
uint8_t *dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*MirrorRow)(const uint8_t *src, uint8_t *dst, int width) = MirrorRow_C;
void (*CopyRow)(const uint8_t *src, uint8_t *dst, int width) = CopyRow_C;
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
MirrorRow = MirrorRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSSE3;
}
}
#endif
#if defined(HAS_MIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MirrorRow = MirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
MirrorRow = MirrorRow_AVX2;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_NEON)
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
CopyRow(src, row, width); // Copy top row into buffer
MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64(row);
}
LIBYUV_API
int I420Rotate(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || width <= 0 || height == 0 ||
!dst_y || !dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// copy frame
return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
dst_v, dst_stride_v, width, height);
case kRotate90:
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
case kRotate270:
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
default:
break;
}
return -1;
}

View file

@ -1,79 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_
#define INCLUDE_LIBYUV_ROTATE_H_
#include "basic_types.h"
// Supported rotation.
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
kRotate180 = 180, // Rotate 180 degrees.
kRotate270 = 270, // Rotate 270 degrees clockwise.
} RotationModeEnum;
// Rotate I420 frame.
LIBYUV_API
int I420Rotate(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
void RotatePlane90(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height);
LIBYUV_API
void RotatePlane180(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height);
LIBYUV_API
void RotatePlane270(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
void TransposePlane(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height);
#endif // INCLUDE_LIBYUV_ROTATE_H_

View file

@ -1,54 +0,0 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rotate_row.h"
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
int dst_stride, int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
} \
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
}
#ifdef HAS_TRANSPOSEWX8_SSSE3
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
#undef TANY
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
} \
TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
}
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
#undef TUVANY

View file

@ -1,77 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rotate_row.h"
void TransposeWx8_C(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width) {
int i;
for (i = 0; i < width; ++i) {
dst[0] = src[0 * src_stride];
dst[1] = src[1 * src_stride];
dst[2] = src[2 * src_stride];
dst[3] = src[3 * src_stride];
dst[4] = src[4 * src_stride];
dst[5] = src[5 * src_stride];
dst[6] = src[6 * src_stride];
dst[7] = src[7 * src_stride];
++src;
dst += dst_stride;
}
}
void TransposeUVWx8_C(const uint8_t *src,
int src_stride,
uint8_t *dst_a,
int dst_stride_a,
uint8_t *dst_b,
int dst_stride_b,
int width) {
int i;
for (i = 0; i < width; ++i) {
dst_a[0] = src[0 * src_stride + 0];
dst_b[0] = src[0 * src_stride + 1];
dst_a[1] = src[1 * src_stride + 0];
dst_b[1] = src[1 * src_stride + 1];
dst_a[2] = src[2 * src_stride + 0];
dst_b[2] = src[2 * src_stride + 1];
dst_a[3] = src[3 * src_stride + 0];
dst_b[3] = src[3 * src_stride + 1];
dst_a[4] = src[4 * src_stride + 0];
dst_b[4] = src[4 * src_stride + 1];
dst_a[5] = src[5 * src_stride + 0];
dst_b[5] = src[5 * src_stride + 1];
dst_a[6] = src[6 * src_stride + 0];
dst_b[6] = src[6 * src_stride + 1];
dst_a[7] = src[7 * src_stride + 0];
dst_b[7] = src[7 * src_stride + 1];
src += 2;
dst_a += dst_stride_a;
dst_b += dst_stride_b;
}
}
void TransposeWxH_C(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height) {
int i;
for (i = 0; i < width; ++i) {
int j;
for (j = 0; j < height; ++j) {
dst[i * dst_stride + j] = src[j * src_stride + i];
}
}
}

View file

@ -1,370 +0,0 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rotate_row.h"
#include "row.h"
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
#if defined(HAS_TRANSPOSEWX8_SSSE3)
void TransposeWx8_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width) {
asm volatile(
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movq (%0),%%xmm0 \n"
"movq (%0,%3),%%xmm1 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"movq (%0),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"palignr $0x8,%%xmm1,%%xmm1 \n"
"movq (%0,%3),%%xmm3 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"movq (%0),%%xmm4 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"movq (%0,%3),%%xmm5 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"movdqa %%xmm4,%%xmm5 \n"
"movq (%0),%%xmm6 \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq (%0,%3),%%xmm7 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"neg %3 \n"
"movdqa %%xmm6,%%xmm7 \n"
"lea 0x8(%0,%3,8),%0 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"neg %3 \n"
// Second round of bit swap.
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"palignr $0x8,%%xmm2,%%xmm2 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm4,%%xmm6 \n"
"movdqa %%xmm5,%%xmm7 \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
// Third round of bit swap.
// Write to the destination pointer.
"punpckldq %%xmm4,%%xmm0 \n"
"movq %%xmm0,(%1) \n"
"movdqa %%xmm0,%%xmm4 \n"
"palignr $0x8,%%xmm4,%%xmm4 \n"
"movq %%xmm4,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movdqa %%xmm2,%%xmm6 \n"
"movq %%xmm2,(%1) \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movq %%xmm6,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"movq %%xmm1,(%1) \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq %%xmm5,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movq %%xmm3,(%1) \n"
"movdqa %%xmm3,%%xmm7 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"sub $0x8,%2 \n"
"movq %%xmm7,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"((intptr_t) (src_stride)), // %3
"r"((intptr_t) (dst_stride)) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
// Transpose 16x8. 64 bit
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
void TransposeWx8_Fast_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width) {
asm volatile(
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%3),%%xmm1 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
"movdqu (%0),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm8,%%xmm9 \n"
"palignr $0x8,%%xmm1,%%xmm1 \n"
"palignr $0x8,%%xmm9,%%xmm9 \n"
"movdqu (%0,%3),%%xmm3 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm2,%%xmm10 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm10 \n"
"movdqa %%xmm2,%%xmm3 \n"
"movdqa %%xmm10,%%xmm11 \n"
"movdqu (%0),%%xmm4 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"palignr $0x8,%%xmm11,%%xmm11 \n"
"movdqu (%0,%3),%%xmm5 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm4,%%xmm12 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm12 \n"
"movdqa %%xmm4,%%xmm5 \n"
"movdqa %%xmm12,%%xmm13 \n"
"movdqu (%0),%%xmm6 \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"palignr $0x8,%%xmm13,%%xmm13 \n"
"movdqu (%0,%3),%%xmm7 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm6,%%xmm14 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"punpckhbw %%xmm7,%%xmm14 \n"
"neg %3 \n"
"movdqa %%xmm6,%%xmm7 \n"
"movdqa %%xmm14,%%xmm15 \n"
"lea 0x10(%0,%3,8),%0 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
"neg %3 \n"
// Second round of bit swap.
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"palignr $0x8,%%xmm2,%%xmm2 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm4,%%xmm6 \n"
"movdqa %%xmm5,%%xmm7 \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"punpcklwd %%xmm10,%%xmm8 \n"
"punpcklwd %%xmm11,%%xmm9 \n"
"movdqa %%xmm8,%%xmm10 \n"
"movdqa %%xmm9,%%xmm11 \n"
"palignr $0x8,%%xmm10,%%xmm10 \n"
"palignr $0x8,%%xmm11,%%xmm11 \n"
"punpcklwd %%xmm14,%%xmm12 \n"
"punpcklwd %%xmm15,%%xmm13 \n"
"movdqa %%xmm12,%%xmm14 \n"
"movdqa %%xmm13,%%xmm15 \n"
"palignr $0x8,%%xmm14,%%xmm14 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
// Third round of bit swap.
// Write to the destination pointer.
"punpckldq %%xmm4,%%xmm0 \n"
"movq %%xmm0,(%1) \n"
"movdqa %%xmm0,%%xmm4 \n"
"palignr $0x8,%%xmm4,%%xmm4 \n"
"movq %%xmm4,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movdqa %%xmm2,%%xmm6 \n"
"movq %%xmm2,(%1) \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movq %%xmm6,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"movq %%xmm1,(%1) \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq %%xmm5,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movq %%xmm3,(%1) \n"
"movdqa %%xmm3,%%xmm7 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"movq %%xmm7,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm12,%%xmm8 \n"
"movq %%xmm8,(%1) \n"
"movdqa %%xmm8,%%xmm12 \n"
"palignr $0x8,%%xmm12,%%xmm12 \n"
"movq %%xmm12,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm14,%%xmm10 \n"
"movdqa %%xmm10,%%xmm14 \n"
"movq %%xmm10,(%1) \n"
"palignr $0x8,%%xmm14,%%xmm14 \n"
"punpckldq %%xmm13,%%xmm9 \n"
"movq %%xmm14,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm9,%%xmm13 \n"
"movq %%xmm9,(%1) \n"
"palignr $0x8,%%xmm13,%%xmm13 \n"
"movq %%xmm13,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm15,%%xmm11 \n"
"movq %%xmm11,(%1) \n"
"movdqa %%xmm11,%%xmm15 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
"sub $0x10,%2 \n"
"movq %%xmm15,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"((intptr_t) (src_stride)), // %3
"r"((intptr_t) (dst_stride)) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
"xmm15");
}
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
// Transpose UV 8x8. 64 bit.
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
void TransposeUVWx8_SSE2(const uint8_t *src,
int src_stride,
uint8_t *dst_a,
int dst_stride_a,
uint8_t *dst_b,
int dst_stride_b,
int width) {
asm volatile(
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%4),%%xmm1 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
"movdqa %%xmm8,%%xmm1 \n"
"movdqu (%0),%%xmm2 \n"
"movdqu (%0,%4),%%xmm3 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm2,%%xmm8 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm8 \n"
"movdqa %%xmm8,%%xmm3 \n"
"movdqu (%0),%%xmm4 \n"
"movdqu (%0,%4),%%xmm5 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm4,%%xmm8 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm8 \n"
"movdqa %%xmm8,%%xmm5 \n"
"movdqu (%0),%%xmm6 \n"
"movdqu (%0,%4),%%xmm7 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm6,%%xmm8 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"neg %4 \n"
"lea 0x10(%0,%4,8),%0 \n"
"punpckhbw %%xmm7,%%xmm8 \n"
"movdqa %%xmm8,%%xmm7 \n"
"neg %4 \n"
// Second round of bit swap.
"movdqa %%xmm0,%%xmm8 \n"
"movdqa %%xmm1,%%xmm9 \n"
"punpckhwd %%xmm2,%%xmm8 \n"
"punpckhwd %%xmm3,%%xmm9 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm8,%%xmm2 \n"
"movdqa %%xmm9,%%xmm3 \n"
"movdqa %%xmm4,%%xmm8 \n"
"movdqa %%xmm5,%%xmm9 \n"
"punpckhwd %%xmm6,%%xmm8 \n"
"punpckhwd %%xmm7,%%xmm9 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm8,%%xmm6 \n"
"movdqa %%xmm9,%%xmm7 \n"
// Third round of bit swap.
// Write to the destination pointer.
"movdqa %%xmm0,%%xmm8 \n"
"punpckldq %%xmm4,%%xmm0 \n"
"movlpd %%xmm0,(%1) \n" // Write back U channel
"movhpd %%xmm0,(%2) \n" // Write back V channel
"punpckhdq %%xmm4,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm2,%%xmm8 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movlpd %%xmm2,(%1) \n"
"movhpd %%xmm2,(%2) \n"
"punpckhdq %%xmm6,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm1,%%xmm8 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movlpd %%xmm1,(%1) \n"
"movhpd %%xmm1,(%2) \n"
"punpckhdq %%xmm5,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm3,%%xmm8 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movlpd %%xmm3,(%1) \n"
"movhpd %%xmm3,(%2) \n"
"punpckhdq %%xmm7,%%xmm8 \n"
"sub $0x8,%3 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst_a), // %1
"+r"(dst_b), // %2
"+r"(width) // %3
: "r"((intptr_t) (src_stride)), // %4
"r"((intptr_t) (dst_stride_a)), // %5
"r"((intptr_t) (dst_stride_b)) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7", "xmm8", "xmm9");
}
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
#endif // defined(__x86_64__) || defined(__i386__)

View file

@ -1,106 +0,0 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_
#define INCLUDE_LIBYUV_ROTATE_ROW_H_
#include "basic_types.h"
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
// The following are available for 64 bit GCC:
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
#define HAS_TRANSPOSEWX8_FAST_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
void TransposeWxH_C(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width,
int height);
void TransposeWx8_C(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width);
void TransposeWx8_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width);
void TransposeWx8_Fast_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width);
void TransposeWx8_Any_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride,
int width);
void TransposeUVWx8_C(const uint8_t *src,
int src_stride,
uint8_t *dst_a,
int dst_stride_a,
uint8_t *dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_SSE2(const uint8_t *src,
int src_stride,
uint8_t *dst_a,
int dst_stride_a,
uint8_t *dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_SSE2(const uint8_t *src,
int src_stride,
uint8_t *dst_a,
int dst_stride_a,
uint8_t *dst_b,
int dst_stride_b,
int width);
#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_

View file

@ -1,426 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROW_H_
#define INCLUDE_LIBYUV_ROW_H_
#include <stddef.h> // For NULL
#include <stdlib.h> // For malloc
#include "basic_types.h"
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Conversions:
#define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORSPLITUVROW_SSSE3
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#endif
// Effects:
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBMIRRORROW_SSE2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
defined(GCC_HAS_AVX2))
#define HAS_ARGBEXTRACTALPHAROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBTOYROW_AVX2
#define HAS_COPYROW_AVX
#define HAS_INTERPOLATEROW_AVX2
#define HAS_MIRRORROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBTOUVROW_AVX2
#endif
#endif
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_MIRRORUVROW_SSSE3
#endif
// The following are available for AVX2 gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ABGRTOYROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ABGRTOUVROW_AVX2
#endif
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var
#else
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#endif
#define LIBYUV_NOINLINE __declspec(noinline)
typedef __declspec(align(16)) int16_t vec16[8];
typedef __declspec(align(16)) int32_t vec32[4];
typedef __declspec(align(16)) float vecf32[4];
typedef __declspec(align(16)) int8_t vec8[16];
typedef __declspec(align(16)) uint16_t uvec16[8];
typedef __declspec(align(16)) uint32_t uvec32[4];
typedef __declspec(align(16)) uint8_t uvec8[16];
typedef __declspec(align(32)) int16_t lvec16[16];
typedef __declspec(align(32)) int32_t lvec32[8];
typedef __declspec(align(32)) int8_t lvec8[32];
typedef __declspec(align(32)) uint16_t ulvec16[16];
typedef __declspec(align(32)) uint32_t ulvec32[8];
typedef __declspec(align(32)) uint8_t ulvec8[32];
#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
#define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#endif
#define LIBYUV_NOINLINE __attribute__((noinline))
typedef int16_t __attribute__((vector_size(16))) vec16;
typedef int32_t __attribute__((vector_size(16))) vec32;
typedef float __attribute__((vector_size(16))) vecf32;
typedef int8_t __attribute__((vector_size(16))) vec8;
typedef uint16_t __attribute__((vector_size(16))) uvec16;
typedef uint32_t __attribute__((vector_size(16))) uvec32;
typedef uint8_t __attribute__((vector_size(16))) uvec8;
typedef int16_t __attribute__((vector_size(32))) lvec16;
typedef int32_t __attribute__((vector_size(32))) lvec32;
typedef int8_t __attribute__((vector_size(32))) lvec8;
typedef uint16_t __attribute__((vector_size(32))) ulvec16;
typedef uint32_t __attribute__((vector_size(32))) ulvec32;
typedef uint8_t __attribute__((vector_size(32))) ulvec8;
#else
#define SIMD_ALIGNED(var) var
#define LIBYUV_NOINLINE
typedef int16_t vec16[8];
typedef int32_t vec32[4];
typedef float vecf32[4];
typedef int8_t vec8[16];
typedef uint16_t uvec16[8];
typedef uint32_t uvec32[4];
typedef uint8_t uvec8[16];
typedef int16_t lvec16[16];
typedef int32_t lvec32[8];
typedef int8_t lvec8[32];
typedef uint16_t ulvec16[16];
typedef uint32_t ulvec32[8];
typedef uint8_t ulvec8[32];
#endif
#if !defined(__aarch64__) || !defined(__arm__)
// This struct is for Intel color conversion.
struct YuvConstants {
uint8_t kUVToB[32];
uint8_t kUVToG[32];
uint8_t kUVToR[32];
int16_t kYToRgb[16];
int16_t kYBiasToRgb[16];
};
#endif
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \
void* var##_mem = malloc((size) + 63); /* NOLINT */ \
uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
var = NULL
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
#else
#define OMITFP __attribute__((optimize("omit-frame-pointer")))
#endif
// NaCL macros for GCC x86 and x64.
#if defined(__native_client__)
#define LABELALIGN ".p2align 5\n"
#else
#define LABELALIGN
#endif
void ARGBToYRow_AVX2(const uint8_t *src_argb, uint8_t *dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void ABGRToYRow_AVX2(const uint8_t *src_abgr, uint8_t *dst_y, int width);
void ABGRToYRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void ARGBToYRow_SSSE3(const uint8_t *src_argb, uint8_t *dst_y, int width);
void ABGRToYRow_SSSE3(const uint8_t *src_abgr, uint8_t *dst_y, int width);
void BGRAToYRow_SSSE3(const uint8_t *src_bgra, uint8_t *dst_y, int width);
void ABGRToYRow_SSSE3(const uint8_t *src_abgr, uint8_t *dst_y, int width);
void ARGBToYRow_C(const uint8_t *src_rgb, uint8_t *dst_y, int width);
void ABGRToYRow_C(const uint8_t *src_rgb, uint8_t *dst_y, int width);
void RGB565ToYRow_C(const uint8_t *src_rgb565, uint8_t *dst_y, int width);
void ARGBToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void BGRAToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void ABGRToYRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void ARGBToUVRow_AVX2(const uint8_t *src_argb,
int src_stride_argb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ABGRToUVRow_AVX2(const uint8_t *src_abgr,
int src_stride_abgr,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ARGBToUVRow_SSSE3(const uint8_t *src_argb,
int src_stride_argb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void BGRAToUVRow_SSSE3(const uint8_t *src_bgra,
int src_stride_bgra,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ABGRToUVRow_SSSE3(const uint8_t *src_abgr,
int src_stride_abgr,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void RGBAToUVRow_SSSE3(const uint8_t *src_rgba,
int src_stride_rgba,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ARGBToUVRow_Any_AVX2(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ABGRToUVRow_Any_AVX2(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ARGBToUVRow_Any_SSSE3(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void BGRAToUVRow_Any_SSSE3(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ABGRToUVRow_Any_SSSE3(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void RGBAToUVRow_Any_SSSE3(const uint8_t *src_ptr,
int src_stride,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ARGBToUVRow_C(const uint8_t *src_rgb,
int src_stride_rgb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ARGBToUVRow_C(const uint8_t *src_rgb,
int src_stride_rgb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void BGRAToUVRow_C(const uint8_t *src_rgb,
int src_stride_rgb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void ABGRToUVRow_C(const uint8_t *src_rgb,
int src_stride_rgb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void RGBAToUVRow_C(const uint8_t *src_rgb,
int src_stride_rgb,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void RGB565ToUVRow_C(const uint8_t *src_rgb565,
int src_stride_rgb565,
uint8_t *dst_u,
uint8_t *dst_v,
int width);
void MirrorRow_AVX2(const uint8_t *src, uint8_t *dst, int width);
void MirrorRow_SSSE3(const uint8_t *src, uint8_t *dst, int width);
void MirrorRow_C(const uint8_t *src, uint8_t *dst, int width);
void MirrorRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void MirrorRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t *src, uint8_t *dst, int width);
void MirrorUVRow_AVX2(const uint8_t *src_uv, uint8_t *dst_uv, int width);
void MirrorUVRow_SSSE3(const uint8_t *src_uv, uint8_t *dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void ARGBMirrorRow_AVX2(const uint8_t *src, uint8_t *dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
void ARGBMirrorRow_C(const uint8_t *src, uint8_t *dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int width);
void ARGBMirrorRow_Any_SSE2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int width);
void CopyRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
void CopyRow_AVX(const uint8_t *src, uint8_t *dst, int width);
void CopyRow_ERMS(const uint8_t *src, uint8_t *dst, int width);
void CopyRow_C(const uint8_t *src, uint8_t *dst, int count);
void CopyRow_Any_SSE2(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void CopyRow_Any_AVX(const uint8_t *src_ptr, uint8_t *dst_ptr, int width);
void RGB565ToARGBRow_SSE2(const uint8_t *src, uint8_t *dst, int width);
void RGB565ToARGBRow_AVX2(const uint8_t *src_rgb565,
uint8_t *dst_argb,
int width);
void RGB565ToARGBRow_C(const uint8_t *src_rgb565, uint8_t *dst_argb, int width);
void RGB565ToARGBRow_Any_SSE2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int width);
void RGB565ToARGBRow_Any_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int width);
// Used for I420Scale, ARGBScale, and ARGBInterpolate.
void InterpolateRow_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
void InterpolateRow_SSSE3(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction);
void InterpolateRow_AVX2(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction);
void InterpolateRow_Any_SSSE3(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_Any_AVX2(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
#endif // INCLUDE_LIBYUV_ROW_H_

View file

@ -1,206 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "row.h"
#include <string.h> // For memset.
// Subsampled source needs to be increase by 1 of not even.
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t vin[128]); \
SIMD_ALIGNED(uint8_t vout[128]); \
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(vin, vout, MASK + 1); \
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_COPYROW_AVX
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
#endif
#ifdef HAS_COPYROW_SSE2
ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
#endif
#ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ABGRTOYROW_AVX2
ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_BGRATOYROW_SSSE3
ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
#endif
#undef ANY11
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
int width, int source_y_fraction) { \
SIMD_ALIGNED(TS vin[64 * 2]); \
SIMD_ALIGNED(TD vout[64]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
} \
memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
if (source_y_fraction) { \
memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
r * SBPP * sizeof(TS)); \
} \
ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11I(InterpolateRow_Any_SSSE3,
InterpolateRow_SSSE3,
uint8_t,
uint8_t,
1,
1,
15)
#endif
#undef ANY11I
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t vin[64]); \
SIMD_ALIGNED(uint8_t vout[64]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
} \
memcpy(vin, src_ptr, r* BPP); \
ANY_SIMD(vin, vout, MASK + 1); \
memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
}
#ifdef HAS_MIRRORROW_AVX2
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
#endif
#ifdef HAS_MIRRORROW_SSSE3
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
#endif
#ifdef HAS_MIRRORUVROW_AVX2
ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
#endif
#ifdef HAS_MIRRORUVROW_SSSE3
ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
#endif
#ifdef HAS_ARGBMIRRORROW_SSE2
ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
#endif
#undef ANY11M
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width) { \
SIMD_ALIGNED(uint8_t vin[128 * 2]); \
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
} \
memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \
memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ABGRTOUVROW_AVX2
ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
#endif
#undef ANY12S

View file

@ -1,887 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "row.h"
#include <assert.h>
#include <string.h> // For memcpy and memset.
#define STATIC_CAST(type, expr) (type)(expr)
// This macro controls YUV to RGB using unsigned math to extend range of
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
// LIBYUV_UNLIMITED_DATA
// Macros to enable unlimited data for each colorspace
// LIBYUV_UNLIMITED_BT601
// LIBYUV_UNLIMITED_BT709
// LIBYUV_UNLIMITED_BT2020
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return -(v >= 0) & v;
}
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (-(v >= 255) | v) & 255;
}
static __inline int32_t clamp1023(int32_t v) {
return (-(v >= 1023) | v) & 1023;
}
// clamp to max
static __inline int32_t ClampMax(int32_t v, int32_t max) {
return (-(v >= max) | v) & max;
}
static __inline uint32_t Abs(int32_t v) {
int m = -(v < 0);
return (v + m) ^ m;
}
#else // USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return (v < 0) ? 0 : v;
}
static __inline int32_t clamp255(int32_t v) {
return (v > 255) ? 255 : v;
}
static __inline int32_t clamp1023(int32_t v) {
return (v > 1023) ? 1023 : v;
}
static __inline int32_t ClampMax(int32_t v, int32_t max) {
return (v > max) ? max : v;
}
static __inline uint32_t Abs(int32_t v) {
return (v < 0) ? -v : v;
}
#endif // USE_BRANCHLESS
static __inline uint32_t Clamp(int32_t val) {
int v = clamp0(val);
return (uint32_t) (clamp255(v));
}
static __inline uint32_t Clamp10(int32_t val) {
int v = clamp0(val);
return (uint32_t) (clamp1023(v));
}
// Little Endian
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
#else
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
p[0] = (uint8_t)(v & 255);
p[1] = (uint8_t)((v >> 8) & 255);
p[2] = (uint8_t)((v >> 16) & 255);
p[3] = (uint8_t)((v >> 24) & 255);
}
#endif
void RGB565ToARGBRow_C(const uint8_t *src_rgb565,
uint8_t *dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb565 += 2;
}
}
// 8 bit
// Intel SSE/AVX uses the following equivalent formula
// 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
// 0x7e80) >> 8;
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
}
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
}
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
}
#else
// TODO(fbarchard): Add rounding to x86 SIMD and use this
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
}
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
}
#endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
return STATIC_CAST(
uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
}
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
return STATIC_CAST(
uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
}
#endif
// ARGBToY_C and ARGBToUV_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = src_rgb[B] + src_rgb1[B]; \
uint16_t ag = src_rgb[G] + src_rgb1[G]; \
uint16_t ar = src_rgb[R] + src_rgb1[R]; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
} \
}
#endif
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
#undef MAKEROWY
// JPeg uses a variation on BT.601-1 full range
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
// BT.601 Mpeg range uses:
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
// JPeg 7 bit Y (deprecated)
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
// JPeg 8 bit Y:
// b 0.11400 * 256 = 29.184 = 29
// g 0.58700 * 256 = 150.272 = 150
// r 0.29900 * 256 = 76.544 = 77
// JPeg 8 bit U:
// b 0.50000 * 255 = 127.5 = 127
// g -0.33126 * 255 = -84.4713 = -84
// r -0.16874 * 255 = -43.0287 = -43
// JPeg 8 bit V:
// b -0.08131 * 255 = -20.73405 = -20
// g -0.41869 * 255 = -106.76595 = -107
// r 0.50000 * 255 = 127.5 = 127
// 8 bit
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (77 * r + 150 * g + 29 * b + 128) >> 8;
}
#if defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
}
#endif
// ARGBToYJ_C and ARGBToUVJ_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
} \
}
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
#undef MAKEROWYJ
void RGB565ToYRow_C(const uint8_t *src_rgb565, uint8_t *dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
uint8_t g = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = src_rgb565[1] >> 3;
b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
}
}
void RGB565ToUVRow_C(const uint8_t *src_rgb565,
int src_stride_rgb565,
uint8_t *dst_u,
uint8_t *dst_v,
int width) {
const uint8_t *next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
uint8_t g1 = STATIC_CAST(
uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
uint8_t g3 = STATIC_CAST(
uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_rgb565 += 4;
next_rgb565 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 24
#undef REPEAT8
#undef SHADE
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
#undef REPEAT8
#undef SHADE
#define SHADE(f, v) clamp255(v + f)
#undef SHADE
#define SHADE(f, v) clamp0(f - v)
#undef SHADE
// Macros to create SIMD specific yuv to rgb conversion constants.
// clang-format off
#if defined(__aarch64__) || defined(__arm__)
// Bias values include subtract 128 from U and V, bias from Y and rounding.
// For B and R bias is negative. For G bias is positive.
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
{{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
{YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
0, 0}}
#else
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
{{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
#endif
// clang-format on
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
// TODO(fbarchard): Generate SIMD structures from float matrix.
// BT.601 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
// B = (Y - 16) * 1.164 + U * 2.018
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
#define UB 129 /* round(2.018 * 64) */
#else
#define UB 128 /* max(128, round(2.018 * 64)) */
#endif
#define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */
#define VR 102 /* round(1.596 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.601 full range YUV to RGB reference (aka JPEG)
// * R = Y + V * 1.40200
// * G = Y - U * 0.34414 - V * 0.71414
// * B = Y + U * 1.77200
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#define UB 113 /* round(1.77200 * 64) */
#define UG 22 /* round(0.34414 * 64) */
#define VG 46 /* round(0.71414 * 64) */
#define VR 90 /* round(1.40200 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.709 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.793
// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
// B = (Y - 16) * 1.164 + U * 2.112
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
#define UB 135 /* round(2.112 * 64) */
#else
#define UB 128 /* max(128, round(2.112 * 64)) */
#endif
#define UG 14 /* round(0.213 * 64) */
#define VG 34 /* round(0.533 * 64) */
#define VR 115 /* round(1.793 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.709 full range YUV to RGB reference
// R = Y + V * 1.5748
// G = Y - U * 0.18732 - V * 0.46812
// B = Y + U * 1.8556
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#define UB 119 /* round(1.8556 * 64) */
#define UG 12 /* round(0.18732 * 64) */
#define VG 30 /* round(0.46812 * 64) */
#define VR 101 /* round(1.5748 * 64) */
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.2020 limited range YUV to RGB reference
// R = (Y - 16) * 1.164384 + V * 1.67867
// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
// B = (Y - 16) * 1.164384 + U * 2.14177
// KR = 0.2627; KB = 0.0593
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
#define UB 137 /* round(2.142 * 64) */
#else
#define UB 128 /* max(128, round(2.142 * 64)) */
#endif
#define UG 12 /* round(0.187326 * 64) */
#define VG 42 /* round(0.65042 * 64) */
#define VR 107 /* round(1.67867 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.2020 full range YUV to RGB reference
// R = Y + V * 1.474600
// G = Y - U * 0.164553 - V * 0.571353
// B = Y + U * 1.881400
// KR = 0.2627; KB = 0.0593
#define UB 120 /* round(1.881400 * 64) */
#define UG 11 /* round(0.164553 * 64) */
#define VG 37 /* round(0.571353 * 64) */
#define VR 94 /* round(1.474600 * 64) */
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
#undef BB
#undef BG
#undef BR
#undef MAKEYUVCONSTANTS
#if defined(__aarch64__) || defined(__arm__)
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVCoeff[0]; \
int vr = yuvconstants->kUVCoeff[1]; \
int ug = yuvconstants->kUVCoeff[2]; \
int vg = yuvconstants->kUVCoeff[3]; \
int yg = yuvconstants->kRGBCoeffBias[0]; \
int bb = yuvconstants->kRGBCoeffBias[1]; \
int bg = yuvconstants->kRGBCoeffBias[2]; \
int br = yuvconstants->kRGBCoeffBias[3]
#define CALC_RGB16 \
int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
int b16 = y1 + (u * ub) - bb; \
int g16 = y1 + bg - (u * ug + v * vg); \
int r16 = y1 + (v * vr) - br
#else
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVToB[0]; \
int ug = yuvconstants->kUVToG[0]; \
int vg = yuvconstants->kUVToG[1]; \
int vr = yuvconstants->kUVToR[1]; \
int yg = yuvconstants->kYToRgb[0]; \
int yb = yuvconstants->kYBiasToRgb[0]
#define CALC_RGB16 \
int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
int8_t ui = (int8_t)u; \
int8_t vi = (int8_t)v; \
ui -= 0x80; \
vi -= 0x80; \
int b16 = y1 + (ui * ub); \
int g16 = y1 - (ui * ug + vi * vg); \
int r16 = y1 + (vi * vr)
#endif
void MirrorRow_C(const uint8_t *src, uint8_t *dst, int width) {
int x;
src += width - 1;
for (x = 0; x < width - 1; x += 2) {
dst[x] = src[0];
dst[x + 1] = src[-1];
src -= 2;
}
if (width & 1) {
dst[width - 1] = src[0];
}
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
void CopyRow_C(const uint8_t *src, uint8_t *dst, int count) {
memcpy(dst, src, count);
}
// Divide source RGB by alpha and store to destination.
// b = (b * 255 + (a / 2)) / a;
// g = (g * 255 + (a / 2)) / a;
// r = (r * 255 + (a / 2)) / a;
// Reciprocal method is off by 1 on some values. ie 125
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
#define T(a) 0x01000000 + (0x10000 / a)
const uint32_t fixed_invtbl8[256] = {
0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
// Blend 2 rows into 1.
static void HalfRow_C(const uint8_t *src_uv,
ptrdiff_t src_uv_stride,
uint8_t *dst_uv,
int width) {
int x;
for (x = 0; x < width; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
// C version 2x2 -> 2x1.
void InterpolateRow_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint8_t *src_ptr1 = src_ptr + src_stride;
int x;
assert(source_y_fraction >= 0);
assert(source_y_fraction < 256);
if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width);
return;
}
if (y1_fraction == 128) {
HalfRow_C(src_ptr, src_stride, dst_ptr, width);
return;
}
for (x = 0; x < width; ++x) {
dst_ptr[0] = STATIC_CAST(
uint8_t,
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
++src_ptr;
++src_ptr1;
++dst_ptr;
}
}
// Work around GCC 7 punning warning -Wstrict-aliasing
#if defined(__GNUC__)
typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
#else
typedef uint32_t uint32_alias_t;
#endif
#undef STATIC_CAST

File diff suppressed because it is too large Load diff

View file

@ -1,946 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "scale.h"
#include <assert.h>
#include <string.h>
#include "cpu_id.h"
#include "planar_functions.h" // For CopyPlane
#include "row.h"
#include "scale_row.h"
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
// Scale plane, 1/2
// This is an optimized version for scaling down a plane to 1/2 of
// its original size.
static void ScalePlaneDown2(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown2)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width) =
filtering == kFilterNone
? ScaleRowDown2_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_C
: ScaleRowDown2Box_C);
int row_stride = src_stride * 2;
(void) src_width;
(void) src_height;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN2_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_SSSE3
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
: ScaleRowDown2Box_Any_SSSE3);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_SSSE3
: (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
: ScaleRowDown2Box_SSSE3);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_AVX2
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
: ScaleRowDown2Box_Any_AVX2);
if (IS_ALIGNED(dst_width, 32)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
: (filtering == kFilterLinear
? ScaleRowDown2Linear_AVX2
: ScaleRowDown2Box_AVX2);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
// TODO(fbarchard): Loop through source height to allow odd height.
for (y = 0; y < dst_height; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of
// its original size.
static void ScalePlaneDown4(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown4)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
int row_stride = src_stride * 4;
(void) src_width;
(void) src_height;
if (!filtering) {
src_ptr += src_stride * 2; // Point to row 2.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (y = 0; y < dst_height; ++y) {
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane down, 3/4
static void ScalePlaneDown34(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown34_0)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void) src_width;
(void) src_height;
assert(dst_width % 3 == 0);
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
}
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
}
if (dst_width % 24 == 0) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
}
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
}
}
// Scale plane, 3/8
// This is an optimized version for scaling down a plane to 3/8
// of its original size.
//
// Uses box filter arranges like this
// aaabbbcc -> abc
// aaabbbcc def
// aaabbbcc ghi
// dddeeeff
// dddeeeff
// dddeeeff
// ggghhhii
// ggghhhii
// Boxes are 3x3, 2x3, 3x2 and 2x2
static void ScalePlaneDown38(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown38_3)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
(void) src_width;
(void) src_height;
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
}
#if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
}
if (dst_width % 12 == 0 && !filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
}
if (dst_width % 6 == 0 && filtering) {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
}
}
#define MIN1(x) ((x) < 1 ? 1 : (x))
static __inline uint32_t SumPixels(int iboxwidth, const uint16_t *src_ptr) {
uint32_t sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
return sum;
}
static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t *src_ptr) {
uint32_t sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
return sum;
}
static void ScaleAddCols2_C(int dst_width,
int boxheight,
int x,
int dx,
const uint16_t *src_ptr,
uint8_t *dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = dx >> 16;
int boxwidth;
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
int scaletbl_index = boxwidth - minboxwidth;
assert((scaletbl_index == 0) || (scaletbl_index == 1));
*dst_ptr++ = (uint8_t) (SumPixels(boxwidth, src_ptr + ix) *
scaletbl[scaletbl_index] >>
16);
}
}
static void ScaleAddCols0_C(int dst_width,
int boxheight,
int x,
int dx,
const uint16_t *src_ptr,
uint8_t *dst_ptr) {
int scaleval = 65536 / boxheight;
int i;
(void) dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = (uint8_t) (src_ptr[i] * scaleval >> 16);
}
}
static void ScaleAddCols1_C(int dst_width,
int boxheight,
int x,
int dx,
const uint16_t *src_ptr,
uint8_t *dst_ptr) {
int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = (uint8_t) (SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
x += boxwidth;
}
}
// Scale plane down to any dimensions, with interpolation.
// (boxfilter).
//
// Same method as SimpleScale, which is fixed point, outputting
// one pixel of destination using fixed point (16.16) to step
// through source, sampling a box of pixel with simple
// averaging.
static void ScalePlaneBox(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr) {
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
const int max_y = (src_height << 16);
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
&dx, &dy);
src_width = Abs(src_width);
{
// Allocate a row buffer of uint16_t.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16_t *src_ptr, uint8_t *dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
void (*ScaleAddRow)(const uint8_t *src_ptr, uint16_t *dst_ptr,
int src_width) = ScaleAddRow_C;
#if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRow = ScaleAddRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_SSE2;
}
}
#endif
#if defined(HAS_SCALEADDROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleAddRow = ScaleAddRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
ScaleAddRow = ScaleAddRow_AVX2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8_t *src = src_ptr + iy * (int64_t) src_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
boxheight = MIN1((y >> 16) - iy);
memset(row16, 0, src_width * 2);
for (k = 0; k < boxheight; ++k) {
ScaleAddRow(src, (uint16_t *) (row16), src_width);
src += src_stride;
}
ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t *) (row16), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
}
}
// Scale plane down with bilinear interpolation.
static void ScalePlaneBilinearDown(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width);
const int max_y = (src_height - 1) << 16;
int j;
void (*ScaleFilterCols)(uint8_t *dst_ptr, const uint8_t *src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
void (*InterpolateRow)(uint8_t *dst_ptr, const uint8_t *src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t *src = src_ptr + yi * (int64_t) src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(row, src, src_stride, src_width, yf);
ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
}
dst_ptr += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
// Scale up down with bilinear interpolation.
static void ScalePlaneBilinearUp(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr,
enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
void (*InterpolateRow)(uint8_t *dst_ptr, const uint8_t *src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
void (*ScaleFilterCols)(uint8_t *dst_ptr, const uint8_t *src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
}
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint8_t *src = src_ptr + yi * (int64_t) src_stride;
// Allocate 2 row buffers.
const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, row_size * 2);
uint8_t *rowptr = row;
int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
if (src_height > 2) {
src += src_stride;
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * (int64_t) src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
if ((y + 65536) < max_y) {
src += src_stride;
}
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
}
dst_ptr += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
// Scale plane, horizontally up by 2 times.
// Uses linear filter horizontally, nearest vertically.
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of I422 to I444.
static void ScalePlaneUp2_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr) {
void (*ScaleRowUp)(const uint8_t *src_ptr, uint8_t *dst_ptr, int dst_width) =
ScaleRowUp2_Linear_Any_C;
int i;
int y;
int dy;
(void) src_width;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t) src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * (int64_t) src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
}
}
// Scale plane, up by 2 times.
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of I420 to I444.
static void ScalePlaneUp2_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr) {
void (*Scale2RowUp)(const uint8_t *src_ptr, ptrdiff_t src_stride,
uint8_t *dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_Any_C;
int x;
(void) src_width;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
}
#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
src_ptr += src_stride;
// TODO(fbarchard): Test performance of writing one row of destination at a
// time.
dst_ptr += 2 * dst_stride;
}
if (!(dst_height & 1)) {
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
}
}
// Scale Plane to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
static void ScalePlaneSimple(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_ptr,
uint8_t *dst_ptr) {
int i;
void (*ScaleCols)(uint8_t *dst_ptr, const uint8_t *src_ptr, int dst_width,
int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
&dx, &dy);
src_width = Abs(src_width);
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t) src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
}
// Scale a plane.
// This function dispatches to a specialized scaler based on scale factor.
LIBYUV_API
void ScalePlane(const uint8_t *src,
int src_stride,
int src_width,
int src_height,
uint8_t *dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (int64_t) src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) {
// Straight copy.
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = 0;
int y = 0;
// When scaling down, use the center 2 rows to filter.
// When scaling up, last row of destination uses the last 2 source rows.
if (dst_height <= src_height) {
dy = FixedDiv(src_height, dst_height);
y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (src_height > 1 && dst_height > 1) {
dy = FixedDiv1(src_height, dst_height);
}
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
// Scale down.
if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
// optimized, 3/4
ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
return;
}
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
// optimized, 1/2
ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
return;
}
// 3/8 rounded up for odd sized chroma height.
if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
// optimized, 3/8
ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
(filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
return;
}
}
if (filtering == kFilterBox && dst_height * 2 < src_height) {
ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst);
return;
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (filtering) {
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst);
}
LIBYUV_API
int I420Scale(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
return 0;
}

View file

@ -1,53 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_H_
#define INCLUDE_LIBYUV_SCALE_H_
#include "basic_types.h"
// Supported filtering.
typedef enum FilterMode {
kFilterNone = 0, // Point sample; Fastest.
kFilterLinear = 1, // Filter horizontally only.
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
kFilterBox = 3 // Highest quality.
} FilterModeEnum;
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I420Scale(const uint8_t *src_y,
int src_stride_y,
const uint8_t *src_u,
int src_stride_u,
const uint8_t *src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t *dst_y,
int dst_stride_y,
uint8_t *dst_u,
int dst_stride_u,
uint8_t *dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
#endif // INCLUDE_LIBYUV_SCALE_H_

View file

@ -1,632 +0,0 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "scale_row.h"
// Fixed scale down.
// Mask may be non-power of 2, so use MOD
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
int dst_width) { \
int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \
}
// Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel.
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
int dst_width) { \
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
int n = (dst_width - 1) - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r + 1); \
}
#ifdef HAS_SCALEROWDOWN2_SSSE3
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_SSSE3,
ScaleRowDown2Linear_SSSE3,
ScaleRowDown2Linear_C,
2,
1,
15)
SDANY(ScaleRowDown2Box_Any_SSSE3,
ScaleRowDown2Box_SSSE3,
ScaleRowDown2Box_C,
2,
1,
15)
SDODD(ScaleRowDown2Box_Odd_SSSE3,
ScaleRowDown2Box_SSSE3,
ScaleRowDown2Box_Odd_C,
2,
1,
15)
#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
SDANY(ScaleUVRowDown2Box_Any_SSSE3,
ScaleUVRowDown2Box_SSSE3,
ScaleUVRowDown2Box_C,
2,
2,
3)
#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
SDANY(ScaleUVRowDown2Box_Any_AVX2,
ScaleUVRowDown2Box_AVX2,
ScaleUVRowDown2Box_C,
2,
2,
7)
#endif
#ifdef HAS_SCALEROWDOWN2_AVX2
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
SDANY(ScaleRowDown2Linear_Any_AVX2,
ScaleRowDown2Linear_AVX2,
ScaleRowDown2Linear_C,
2,
1,
31)
SDANY(ScaleRowDown2Box_Any_AVX2,
ScaleRowDown2Box_AVX2,
ScaleRowDown2Box_C,
2,
1,
31)
SDODD(ScaleRowDown2Box_Odd_AVX2,
ScaleRowDown2Box_AVX2,
ScaleRowDown2Box_Odd_C,
2,
1,
31)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_SSSE3,
ScaleRowDown4Box_SSSE3,
ScaleRowDown4Box_C,
4,
1,
7)
#endif
#ifdef HAS_SCALEROWDOWN4_AVX2
SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
SDANY(ScaleRowDown4Box_Any_AVX2,
ScaleRowDown4Box_AVX2,
ScaleRowDown4Box_C,
4,
1,
15)
#endif
#ifdef HAS_SCALEROWDOWN34_SSSE3
SDANY(ScaleRowDown34_Any_SSSE3,
ScaleRowDown34_SSSE3,
ScaleRowDown34_C,
4 / 3,
1,
23)
SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
ScaleRowDown34_0_Box_SSSE3,
ScaleRowDown34_0_Box_C,
4 / 3,
1,
23)
SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
ScaleRowDown34_1_Box_SSSE3,
ScaleRowDown34_1_Box_C,
4 / 3,
1,
23)
#endif
#ifdef HAS_SCALEROWDOWN38_SSSE3
SDANY(ScaleRowDown38_Any_SSSE3,
ScaleRowDown38_SSSE3,
ScaleRowDown38_C,
8 / 3,
1,
11)
SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
ScaleRowDown38_3_Box_SSSE3,
ScaleRowDown38_3_Box_C,
8 / 3,
1,
5)
SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
ScaleRowDown38_2_Box_SSSE3,
ScaleRowDown38_2_Box_C,
8 / 3,
1,
5)
#endif
#undef SDANY
// Scale down by even scale factor.
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
uint8_t* dst_ptr, int dst_width) { \
int r = dst_width & MASK; \
int n = dst_width & ~MASK; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
dst_ptr + n * BPP, r); \
}
#ifdef SASIMDONLY
// This also works and uses memcpy and SIMD instead of C, but is slower on ARM
// Add rows box filter scale down. Using macro from row_any
#define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint16_t dst_temp[32]); \
SIMD_ALIGNED(uint8_t src_temp[32]); \
memset(dst_temp, 0, 32 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
ANY_SIMD(src_temp, dst_temp, MASK + 1); \
memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
}
#ifdef HAS_SCALEADDROW_SSE2
SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
#endif
#ifdef HAS_SCALEADDROW_AVX2
SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
#endif
#undef SAANY
#else
// Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
int n = src_width & ~MASK; \
if (n > 0) { \
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
} \
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
}
#ifdef HAS_SCALEADDROW_SSE2
SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_AVX2
SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
#endif
#undef SAANY
#endif // SASIMDONLY
// Scale up horizontally 2 times using linear filter.
#define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
dst_ptr[0] = src_ptr[0]; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(src_ptr, dst_ptr + 1, n); \
} \
C(src_ptr + (n / 2), dst_ptr + n + 1, r); \
} \
dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \
}
// Even the C versions need to be wrapped, because boundary pixels have to
// be handled differently
SUH2LANY(ScaleRowUp2_Linear_Any_C,
ScaleRowUp2_Linear_C,
ScaleRowUp2_Linear_C,
0,
uint8_t)
SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
ScaleRowUp2_Linear_16_C,
ScaleRowUp2_Linear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
ScaleRowUp2_Linear_SSE2,
ScaleRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
ScaleRowUp2_Linear_SSSE3,
ScaleRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
ScaleRowUp2_Linear_12_SSSE3,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
ScaleRowUp2_Linear_16_SSE2,
ScaleRowUp2_Linear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
ScaleRowUp2_Linear_AVX2,
ScaleRowUp2_Linear_C,
31,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
ScaleRowUp2_Linear_12_AVX2,
ScaleRowUp2_Linear_16_C,
31,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
ScaleRowUp2_Linear_16_AVX2,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#undef SUH2LANY
// Scale up 2 times using bilinear filter.
// This function produces 2 rows at a time.
#define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
ptrdiff_t dst_stride, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
const PTYPE* sa = src_ptr; \
const PTYPE* sb = src_ptr + src_stride; \
PTYPE* da = dst_ptr; \
PTYPE* db = dst_ptr + dst_stride; \
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(sa, sb - sa, da + 1, db - da, n); \
} \
C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \
} \
da[dst_width - 1] = \
(3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \
db[dst_width - 1] = \
(sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \
}
SU2BLANY(ScaleRowUp2_Bilinear_Any_C,
ScaleRowUp2_Bilinear_C,
ScaleRowUp2_Bilinear_C,
0,
uint8_t)
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
ScaleRowUp2_Bilinear_16_C,
ScaleRowUp2_Bilinear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
ScaleRowUp2_Bilinear_SSE2,
ScaleRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
ScaleRowUp2_Bilinear_12_SSSE3,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2,
ScaleRowUp2_Bilinear_16_SSE2,
ScaleRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
ScaleRowUp2_Bilinear_SSSE3,
ScaleRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
ScaleRowUp2_Bilinear_AVX2,
ScaleRowUp2_Bilinear_C,
31,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
ScaleRowUp2_Bilinear_12_AVX2,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
ScaleRowUp2_Bilinear_16_AVX2,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#undef SU2BLANY
// Scale bi-planar plane up horizontally 2 times using linear filter.
#define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
dst_ptr[0] = src_ptr[0]; \
dst_ptr[1] = src_ptr[1]; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(src_ptr, dst_ptr + 2, n); \
} \
C(src_ptr + n, dst_ptr + 2 * n + 2, r); \
} \
dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \
dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \
}
SBUH2LANY(ScaleUVRowUp2_Linear_Any_C,
ScaleUVRowUp2_Linear_C,
ScaleUVRowUp2_Linear_C,
0,
uint8_t)
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
ScaleUVRowUp2_Linear_16_C,
ScaleUVRowUp2_Linear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
ScaleUVRowUp2_Linear_SSSE3,
ScaleUVRowUp2_Linear_C,
7,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
ScaleUVRowUp2_Linear_AVX2,
ScaleUVRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
ScaleUVRowUp2_Linear_16_SSE41,
ScaleUVRowUp2_Linear_16_C,
3,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
ScaleUVRowUp2_Linear_16_AVX2,
ScaleUVRowUp2_Linear_16_C,
7,
uint16_t)
#endif
#undef SBUH2LANY
// Scale bi-planar plane up 2 times using bilinear filter.
// This function produces 2 rows at a time.
#define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
ptrdiff_t dst_stride, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
const PTYPE* sa = src_ptr; \
const PTYPE* sb = src_ptr + src_stride; \
PTYPE* da = dst_ptr; \
PTYPE* db = dst_ptr + dst_stride; \
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \
db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(sa, sb - sa, da + 2, db - da, n); \
} \
C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \
} \
da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \
sb[((dst_width + 1) & ~1) - 2] + 2) >> \
2; \
db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \
3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \
2; \
da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \
sb[((dst_width + 1) & ~1) - 1] + 2) >> \
2; \
db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \
3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \
2; \
}
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C,
ScaleUVRowUp2_Bilinear_C,
ScaleUVRowUp2_Bilinear_C,
0,
uint8_t)
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
ScaleUVRowUp2_Bilinear_16_C,
ScaleUVRowUp2_Bilinear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
ScaleUVRowUp2_Bilinear_SSSE3,
ScaleUVRowUp2_Bilinear_C,
7,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
ScaleUVRowUp2_Bilinear_AVX2,
ScaleUVRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
ScaleUVRowUp2_Bilinear_16_SSE41,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
ScaleUVRowUp2_Bilinear_16_AVX2,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#undef SBU2BLANY

View file

@ -1,930 +0,0 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "scale.h"
#include <assert.h>
#include "cpu_id.h"
#include "row.h"
#include "scale_row.h"
#define STATIC_CAST(type, expr) (type)(expr)
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (-(v >= 255) | v) & 255;
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// CPU agnostic row functions
void ScaleRowDown2_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
int x;
(void) src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[1];
dst[1] = src_ptr[3];
dst += 2;
src_ptr += 4;
}
if (dst_width & 1) {
dst[0] = src_ptr[1];
}
}
void ScaleRowDown2Linear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
const uint8_t *s = src_ptr;
int x;
(void) src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + 1) >> 1;
dst[1] = (s[2] + s[3] + 1) >> 1;
dst += 2;
s += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + 1) >> 1;
}
}
void ScaleRowDown2Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
}
}
void ScaleRowDown2Box_Odd_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
int x;
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst += 1;
s += 2;
t += 2;
}
dst[0] = (s[0] + t[0] + 1) >> 1;
}
void ScaleRowDown4_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
int x;
(void) src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[2];
dst[1] = src_ptr[6];
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = src_ptr[2];
}
}
void ScaleRowDown4Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
intptr_t stride = src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
src_ptr[stride * 3 + 3] + 8) >>
4;
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
src_ptr[stride * 3 + 7] + 8) >>
4;
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
src_ptr[stride * 3 + 3] + 8) >>
4;
}
}
void ScaleRowDown34_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
int x;
(void) src_stride;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[1];
dst[2] = src_ptr[3];
dst += 3;
src_ptr += 4;
}
}
// Filter rows 0 and 1 together, 3 : 1
void ScaleRowDown34_0_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *d,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
d += 3;
s += 4;
t += 4;
}
}
// Filter rows 1 and 2 together, 1 : 1
void ScaleRowDown34_1_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *d,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
d += 3;
s += 4;
t += 4;
}
}
// Sample position: (O is src sample position, X is dst sample position)
//
// v dst_ptr at here v stop at here
// X O X X O X X O X X O X X O X
// ^ src_ptr at here
void ScaleRowUp2_Linear_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
}
}
// Sample position: (O is src sample position, X is dst sample position)
//
// src_ptr at here
// X v X X X X X X X X X
// O O O O O
// X X X X X X X X X X
// ^ dst_ptr at here ^ stop at here
// X X X X X X X X X X
// O O O O O
// X X X X X X X X X X
void ScaleRowUp2_Bilinear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
uint8_t *d = dst_ptr;
uint8_t *e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[2 * x + 0] =
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
d[2 * x + 1] =
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 0] =
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 1] =
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
}
}
// Only suitable for at most 14 bit range.
void ScaleRowUp2_Linear_16_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
}
}
// Only suitable for at most 12bit range.
void ScaleRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint16_t *s = src_ptr;
const uint16_t *t = src_ptr + src_stride;
uint16_t *d = dst_ptr;
uint16_t *e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[2 * x + 0] =
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
d[2 * x + 1] =
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 0] =
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 1] =
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
}
}
// (1-f)a + fb can be replaced with a + f(b-a)
#if defined(__arm__) || defined(__aarch64__)
#define BLENDER(a, b, f) \
(uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
#else
// Intel uses 7 bit math with rounding.
#define BLENDER(a, b, f) \
(uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
#endif
void ScaleFilterCols_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
void ScaleFilterCols64_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x32,
int dx) {
int64_t x = (int64_t) (x32);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
#undef BLENDER
// Same as 8 bit arm blender but return is cast to uint16_t
#define BLENDER(a, b, f) \
(uint16_t)( \
(int)(a) + \
(int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
#undef BLENDER
void ScaleRowDown38_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width) {
int x;
(void) src_stride;
assert(dst_width % 3 == 0);
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[3];
dst[2] = src_ptr[6];
dst += 3;
src_ptr += 8;
}
}
// 8x3 -> 3x1
void ScaleRowDown38_3_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] =
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
(65536 / 9) >>
16;
dst_ptr[1] =
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
(65536 / 9) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
(65536 / 6) >>
16;
src_ptr += 8;
dst_ptr += 3;
}
}
// 8x2 -> 3x1
void ScaleRowDown38_2_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2]) *
(65536 / 6) >>
16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5]) *
(65536 / 6) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
(65536 / 4) >>
16;
src_ptr += 8;
dst_ptr += 3;
}
}
void ScaleAddRow_C(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width) {
int x;
assert(src_width > 0);
for (x = 0; x < src_width - 1; x += 2) {
dst_ptr[0] += src_ptr[0];
dst_ptr[1] += src_ptr[1];
src_ptr += 2;
dst_ptr += 2;
}
if (src_width & 1) {
dst_ptr[0] += src_ptr[0];
}
}
// UV scale row functions
// same as ARGB but 2 channels
void ScaleUVRowDown2_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width) {
int x;
(void) src_stride;
for (x = 0; x < dst_width; ++x) {
dst_uv[0] = src_uv[2]; // Store the 2nd UV
dst_uv[1] = src_uv[3];
src_uv += 4;
dst_uv += 2;
}
}
void ScaleUVRowDown2Linear_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width) {
int x;
(void) src_stride;
for (x = 0; x < dst_width; ++x) {
dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
src_uv += 4;
dst_uv += 2;
}
}
void ScaleUVRowDown2Box_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
src_uv[src_stride + 2] + 2) >>
2;
dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
src_uv[src_stride + 3] + 2) >>
2;
src_uv += 4;
dst_uv += 2;
}
}
void ScaleUVRowDownEven_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
int src_stepx,
uint8_t *dst_uv,
int dst_width) {
const uint16_t *src = (const uint16_t *) (src_uv);
uint16_t *dst = (uint16_t *) (dst_uv);
(void) src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src[0];
dst[1] = src[src_stepx];
src += src_stepx * 2;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[0];
}
}
// Scales a single row of pixels using point sampling.
void ScaleCols_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[0] = src_ptr[x >> 16];
x += dx;
dst_ptr[1] = src_ptr[x >> 16];
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[x >> 16];
}
}
// Scales a single row of pixels up by 2x using point sampling.
void ScaleColsUp2_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx) {
int j;
(void) x;
(void) dx;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
src_ptr += 1;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[0];
}
}
void ScaleUVRowUp2_Linear_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[4 * x + 0] =
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
dst_ptr[4 * x + 1] =
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
dst_ptr[4 * x + 2] =
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
dst_ptr[4 * x + 3] =
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
}
}
void ScaleUVRowUp2_Bilinear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint8_t *s = src_ptr;
const uint8_t *t = src_ptr + src_stride;
uint8_t *d = dst_ptr;
uint8_t *e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 1 + 8) >>
4;
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 1 + 8) >>
4;
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
t[2 * x + 2] * 3 + 8) >>
4;
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
t[2 * x + 2] * 3 + 8) >>
4;
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 9 + 8) >>
4;
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 9 + 8) >>
4;
}
}
void ScaleUVRowUp2_Linear_16_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[4 * x + 0] =
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
dst_ptr[4 * x + 1] =
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
dst_ptr[4 * x + 2] =
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
dst_ptr[4 * x + 3] =
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
}
}
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint16_t *s = src_ptr;
const uint16_t *t = src_ptr + src_stride;
uint16_t *d = dst_ptr;
uint16_t *e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 1 + 8) >>
4;
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 1 + 8) >>
4;
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
t[2 * x + 2] * 3 + 8) >>
4;
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
t[2 * x + 2] * 3 + 8) >>
4;
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 9 + 8) >>
4;
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 9 + 8) >>
4;
}
}
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
#define BLENDERC(a, b, f, s) \
(uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
void ScaleUVFilterCols_C(uint8_t *dst_uv,
const uint8_t *src_uv,
int dst_width,
int x,
int dx) {
const uint16_t *src = (const uint16_t *) (src_uv);
uint16_t *dst = (uint16_t *) (dst_uv);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint16_t a = src[xi];
uint16_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
x += dx;
xi = x >> 16;
xf = (x >> 9) & 0x7f;
a = src[xi];
b = src[xi + 1];
dst[1] = BLENDER(a, b, xf);
x += dx;
dst += 2;
}
if (dst_width & 1) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint16_t a = src[xi];
uint16_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
}
}
#undef BLENDER1
#undef BLENDERC
#undef BLENDER
// Scale plane vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_argb,
uint8_t *dst_argb,
int x,
int y,
int dy,
int bpp, // bytes per pixel. 4 for ARGB.
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
void (*InterpolateRow)(uint8_t *dst_argb, const uint8_t *src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
int j;
assert(bpp >= 1 && bpp <= 4);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
if (y > max_y) {
y = max_y;
}
yi = y >> 16;
yf = filtering ? ((y >> 8) & 255) : 0;
InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
dst_width_bytes, yf);
dst_argb += dst_stride;
y += dy;
}
}
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (src_width < 0) {
src_width = -src_width;
}
if (src_height < 0) {
src_height = -src_height;
}
if (filtering == kFilterBox) {
// If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
filtering = kFilterBilinear;
}
}
if (filtering == kFilterBilinear) {
if (src_height == 1) {
filtering = kFilterLinear;
}
// TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
if (dst_height == src_height || dst_height * 3 == src_height) {
filtering = kFilterLinear;
}
// TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
// avoid reading 2 pixels horizontally that causes memory exception.
if (src_width == 1) {
filtering = kFilterNone;
}
}
if (filtering == kFilterLinear) {
if (src_width == 1) {
filtering = kFilterNone;
}
// TODO(fbarchard): Detect any odd scale factor and reduce to None.
if (dst_width == src_width || dst_width * 3 == src_width) {
filtering = kFilterNone;
}
}
return filtering;
}
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
// Compute slope values for stepping.
void ScaleSlope(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering,
int *x,
int *y,
int *dx,
int *dy) {
assert(x != NULL);
assert(y != NULL);
assert(dx != NULL);
assert(dy != NULL);
assert(src_width != 0);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
// Check for 1 pixel and avoid FixedDiv overflow.
if (dst_width == 1 && src_width >= 32768) {
dst_width = src_width;
}
if (dst_height == 1 && src_height >= 32768) {
dst_height = src_height;
}
if (filtering == kFilterBox) {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = 0;
*y = 0;
} else if (filtering == kFilterBilinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (src_height > 1 && dst_height > 1) {
*dy = FixedDiv1(src_height, dst_height);
*y = 0;
}
} else if (filtering == kFilterLinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
*dy = FixedDiv(src_height, dst_height);
*y = *dy >> 1;
} else {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = CENTERSTART(*dx, 0);
*y = CENTERSTART(*dy, 0);
}
// Negative src_width means horizontally mirror.
if (src_width < 0) {
*x += (dst_width - 1) * *dx;
*dx = -*dx;
// src_width = -src_width; // Caller must do this.
}
}
#undef CENTERSTART

File diff suppressed because it is too large Load diff

View file

@ -1,768 +0,0 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "basic_types.h"
#include "scale.h"
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_FIXEDDIV1_X86
#define HAS_FIXEDDIV_X86
#define HAS_SCALEADDROW_SSE2
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALEROWDOWN2_SSSE3
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEROWDOWN4_SSSE3
#endif
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
#define HAS_SCALEROWUP2_LINEAR_SSE2
#define HAS_SCALEROWUP2_LINEAR_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_SSE2
#define HAS_SCALEROWUP2_BILINEAR_SSSE3
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
#endif
// The following are available for gcc/clang x86 platforms, but
// require clang 3.4 or gcc 4.7.
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEUVROWDOWN2BOX_AVX2
#define HAS_SCALEROWUP2_LINEAR_AVX2
#define HAS_SCALEROWUP2_BILINEAR_AVX2
#define HAS_SCALEROWUP2_LINEAR_12_AVX2
#define HAS_SCALEROWUP2_BILINEAR_12_AVX2
#define HAS_SCALEROWUP2_LINEAR_16_AVX2
#define HAS_SCALEROWUP2_BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
defined(GCC_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t *src_argb,
uint8_t *dst_argb,
int x,
int y,
int dy,
int bpp,
enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_X86(int num, int div);
int FixedDiv1_X86(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86
#endif
// Compute slope values for stepping.
void ScaleSlope(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering,
int *x,
int *y,
int *dx,
int *dy);
void ScaleRowDown2_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown2Linear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown2Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown2Box_Odd_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown4_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown4Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown34_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown34_0_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *d,
int dst_width);
void ScaleRowDown34_1_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *d,
int dst_width);
void ScaleRowUp2_Linear_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleCols_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int,
int);
void ScaleFilterCols_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx);
void ScaleFilterCols64_C(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x32,
int dx);
void ScaleRowDown38_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst,
int dst_width);
void ScaleRowDown38_3_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleAddRow_C(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
void ScaleUVRowDown2_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowDown2Box_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowDownEven_C(const uint8_t *src_uv,
ptrdiff_t src_stride,
int src_stepx,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowUp2_Linear_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_C(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_C(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_C(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_C(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
// Specialized scalers for x86.
void ScaleRowDown2_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Linear_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Linear_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4Box_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_SSE2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSE2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_SSE2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_SSSE3(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSE2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSE2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_SSSE3(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSE2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowDown2_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_Odd_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown2Box_Odd_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleAddRow_SSE2(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8_t *src_ptr, uint16_t *dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8_t *src_ptr,
uint16_t *dst_ptr,
int src_width);
void ScaleAddRow_Any_AVX2(const uint8_t *src_ptr,
uint16_t *dst_ptr,
int src_width);
void ScaleFilterCols_SSSE3(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_SSE2(uint8_t *dst_ptr,
const uint8_t *src_ptr,
int dst_width,
int x,
int dx);
// UV Row functions
void ScaleUVRowDown2Box_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowDown2Box_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_uv,
int dst_width);
void ScaleUVRowDown2Box_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_SSSE3(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_SSSE3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_AVX2(const uint8_t *src_ptr,
uint8_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_AVX2(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_AVX2(const uint16_t *src_ptr,
uint16_t *dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_AVX2(const uint16_t *src_ptr,
ptrdiff_t src_stride,
uint16_t *dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
#endif // INCLUDE_LIBYUV_SCALE_ROW_H_

View file

@ -1,16 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1875
#endif // INCLUDE_LIBYUV_VERSION_H_

View file

@ -1,50 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "video_common.h"
struct FourCCAliasEntry {
uint32_t alias;
uint32_t canonical;
};
#define NUM_ALIASES 18
static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
{FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU12, FOURCC_I420},
{FOURCC_YU16, FOURCC_I422},
{FOURCC_YU24, FOURCC_I444},
{FOURCC_YUYV, FOURCC_YUY2},
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
{FOURCC_HDYC, FOURCC_UYVY},
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG},
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
{FOURCC_RGB3, FOURCC_RAW},
{FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
{FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
};
LIBYUV_API
uint32_t CanonicalFourCC(uint32_t fourcc) {
int i;
for (i = 0; i < NUM_ALIASES; ++i) {
if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical;
}
}
// Not an alias, so return it as-is.
return fourcc;
}

View file

@ -1,212 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
#include "basic_types.h"
//////////////////////////////////////////////////////////////////////////////
// Definition of FourCC codes
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a FourCC code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
#define FOURCC(a, b, c, d) \
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
(static_cast<uint32_t>(c) << 16) | /* NOLINT */ \
(static_cast<uint32_t>(d) << 24)) /* NOLINT */
#else
#define FOURCC(a, b, c, d) \
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
// http://www.fourcc.org/yuv.php
// http://v4l2spec.bytesex.org/spec/book1.htm
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
// FourCC codes grouped according to implementation efficiency.
// Primary formats should convert in 1 efficient step.
// Secondary formats are converted in 2 steps.
// Auxilliary formats call primary converters.
enum FourCC {
// 10 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
// 1 Secondary YUV format: row biplanar. deprecated.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 =
FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J422 =
FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J444 =
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J400 =
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc
FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc
FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc
FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc
FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc
FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc
FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420
FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
FOURCC_P010 = FOURCC('P', '0', '1', '0'),
FOURCC_P210 = FOURCC('P', '2', '1', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// deprecated formats. Not supported, but defined for backward compatibility.
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
// Match any fourcc.
FOURCC_ANY = -1,
};
enum FourCCBpp {
// Canonical fourcc codes used in our code.
FOURCC_BPP_I420 = 12,
FOURCC_BPP_I422 = 16,
FOURCC_BPP_I444 = 24,
FOURCC_BPP_I411 = 12,
FOURCC_BPP_I400 = 8,
FOURCC_BPP_NV21 = 12,
FOURCC_BPP_NV12 = 12,
FOURCC_BPP_YUY2 = 16,
FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12, // deprecated
FOURCC_BPP_Q420 = 12,
FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32,
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_AR30 = 32,
FOURCC_BPP_AB30 = 32,
FOURCC_BPP_AR64 = 64,
FOURCC_BPP_AB64 = 64,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
FOURCC_BPP_RGBO = 16,
FOURCC_BPP_R444 = 16,
FOURCC_BPP_RGGB = 8,
FOURCC_BPP_BGGR = 8,
FOURCC_BPP_GRBG = 8,
FOURCC_BPP_GBRG = 8,
FOURCC_BPP_YV12 = 12,
FOURCC_BPP_YV16 = 16,
FOURCC_BPP_YV24 = 24,
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_H422 = 16,
FOURCC_BPP_I010 = 15,
FOURCC_BPP_I210 = 20,
FOURCC_BPP_H010 = 15,
FOURCC_BPP_H210 = 20,
FOURCC_BPP_P010 = 15,
FOURCC_BPP_P210 = 20,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
FOURCC_BPP_YU16 = 16,
FOURCC_BPP_YU24 = 24,
FOURCC_BPP_YUYV = 16,
FOURCC_BPP_YUVS = 16,
FOURCC_BPP_HDYC = 16,
FOURCC_BPP_2VUY = 16,
FOURCC_BPP_JPEG = 1,
FOURCC_BPP_DMB1 = 1,
FOURCC_BPP_BA81 = 8,
FOURCC_BPP_RGB3 = 24,
FOURCC_BPP_BGR3 = 24,
FOURCC_BPP_CM32 = 32,
FOURCC_BPP_CM24 = 24,
// Match any fourcc.
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_

View file

@ -115,6 +115,9 @@ func TestYuvPredefined(t *testing.T) {
frame := RawFrame{Data: im, Stride: 32, W: 32, H: 32}
a := pc.Process(frame, 0, PixFmt(libyuv.FourccAbgr))
v := libyuv.Version()
t.Logf("%v", v)
if len(a) != len(should) {
t.Fatalf("diffrent size a: %v, o: %v", len(a), len(should))
}