summaryrefslogtreecommitdiff
path: root/package/xbmc
diff options
context:
space:
mode:
authorWaldemar Brodkorb <wbx@openadk.org>2014-06-25 17:16:55 +0200
committerWaldemar Brodkorb <wbx@openadk.org>2014-06-25 17:16:55 +0200
commit65d1906183283a885b61bb87c023938f29145fe0 (patch)
treea1a7ca582307a90cbb1bf229edcda2a8f786f0d5 /package/xbmc
parent5fccf9a1157ab2a4f2159ab475852ef8a37f9b5b (diff)
add support for hifiberry from Openelec git
Diffstat (limited to 'package/xbmc')
-rw-r--r--package/xbmc/Makefile4
-rw-r--r--package/xbmc/patches/xbmc-gotham_rbp_backports.patch20665
2 files changed, 20666 insertions, 3 deletions
diff --git a/package/xbmc/Makefile b/package/xbmc/Makefile
index 3c792e3ad..dd0c50c46 100644
--- a/package/xbmc/Makefile
+++ b/package/xbmc/Makefile
@@ -5,7 +5,7 @@ include $(ADK_TOPDIR)/rules.mk
PKG_NAME:= xbmc
PKG_VERSION:= 13.1
-PKG_RELEASE:= 1
+PKG_RELEASE:= 2
PKG_MD5SUM:= 9ce6b6ac89b6aa0b111a1acdf3606e06
PKG_DESCR:= software media player
PKG_SECTION:= mm/video
@@ -64,7 +64,6 @@ AUTOTOOL_STYLE:= autoreconf
CONFIGURE_ENV+= DESTDIR='${WRKINST}' \
TEXTUREPACKER_NATIVE_ROOT='$(STAGING_HOST_DIR)/usr'
CONFIGURE_ARGS+= --disable-optical-drive \
- --disable-optmizations \
--disable-mysql \
--disable-avahi \
--disable-rsxs \
@@ -81,7 +80,6 @@ CONFIGURE_ARGS+= --disable-optical-drive \
--disable-wayland \
--disable-pulse \
--disable-mid \
- --with-ffmpeg \
--enable-alsa \
--enable-libmp3lame \
--enable-libvorbisenc \
diff --git a/package/xbmc/patches/xbmc-gotham_rbp_backports.patch b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch
new file mode 100644
index 000000000..9a4772437
--- /dev/null
+++ b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch
@@ -0,0 +1,20665 @@
+From 1353d8feca19f2f84019797942d70864054db1b0 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:46 +0100
+Subject: [PATCH 01/94] h264_parser: Initialize the h264dsp context in the
+ parser as well
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Each AVStream struct for an H.264 elementary stream actually has two
+copies of the H264DSPContext struct (and in fact all the other members
+of H264Context as well):
+
+((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp
+((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp
+
+but only the first of these was actually being initialised. This
+prevented the addition of platform-specific implementations of
+parser-related functions.
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/h264_parser.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index aff9ba1..a732f79 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -386,6 +386,7 @@ static int init(AVCodecParserContext *s)
+ H264Context *h = s->priv_data;
+ h->thread_context[0] = h;
+ h->slice_context_count = 1;
++ ff_h264dsp_init(&h->h264dsp, 8, 1);
+ return 0;
+ }
+
+--
+1.9.3
+
+
+From 7ea2cb68f6fb1149fce70854e36ed6357a267238 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:47 +0100
+Subject: [PATCH 02/94] h264dsp: Factorize code into a new function,
+ h264_find_start_code_candidate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This performs the start code search which was previously part of
+h264_find_frame_end() - the most CPU intensive part of the function.
+
+By itself, this results in a performance regression:
+ Before After
+ Mean StdDev Mean StdDev Change
+Overall time 2925.6 26.2 3068.5 31.7 -4.7%
+
+but this can more than be made up for by platform-optimised
+implementations of the function.
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/h264_parser.c | 20 +++-----------------
+ lib/ffmpeg/libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++
+ lib/ffmpeg/libavcodec/h264dsp.h | 9 +++++++++
+ 3 files changed, 41 insertions(+), 17 deletions(-)
+
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index a732f79..972aace 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -62,23 +62,9 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si
+ }
+
+ if(state==7){
+-#if HAVE_FAST_UNALIGNED
+- /* we check i<buf_size instead of i+3/7 because its simpler
+- * and there should be FF_INPUT_BUFFER_PADDING_SIZE bytes at the end
+- */
+-# if HAVE_FAST_64BIT
+- while(i<next_avc && !((~*(const uint64_t*)(buf+i) & (*(const uint64_t*)(buf+i) - 0x0101010101010101ULL)) & 0x8080808080808080ULL))
+- i+=8;
+-# else
+- while(i<next_avc && !((~*(const uint32_t*)(buf+i) & (*(const uint32_t*)(buf+i) - 0x01010101U)) & 0x80808080U))
+- i+=4;
+-# endif
+-#endif
+- for(; i<next_avc; i++){
+- if(!buf[i]){
+- state=2;
+- break;
+- }
++ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
++ if (i < buf_size)
++ state = 2;
+ }
+ }else if(state<=2){
+ if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5
+diff --git a/lib/ffmpeg/libavcodec/h264dsp.c b/lib/ffmpeg/libavcodec/h264dsp.c
+index da9e417..b7d61cd 100644
+--- a/lib/ffmpeg/libavcodec/h264dsp.c
++++ b/lib/ffmpeg/libavcodec/h264dsp.c
+@@ -60,6 +60,34 @@
+ #include "h264addpx_template.c"
+ #undef BIT_DEPTH
+
++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
++{
++ int i = 0;
++#if HAVE_FAST_UNALIGNED
++ /* we check i < size instead of i + 3 / 7 because it is
++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
++ * bytes at the end.
++ */
++#if HAVE_FAST_64BIT
++ while (i < size &&
++ !((~*(const uint64_t *)(buf + i) &
++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
++ 0x8080808080808080ULL))
++ i += 8;
++#else
++ while (i < size &&
++ !((~*(const uint32_t *)(buf + i) &
++ (*(const uint32_t *)(buf + i) - 0x01010101U)) &
++ 0x80808080U))
++ i += 4;
++#endif
++#endif
++ for (; i < size; i++)
++ if (!buf[i])
++ break;
++ return i;
++}
++
+ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+ {
+ #undef FUNC
+@@ -146,6 +174,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
+ H264_DSP(8);
+ break;
+ }
++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
+
+ if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
+ if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
+diff --git a/lib/ffmpeg/libavcodec/h264dsp.h b/lib/ffmpeg/libavcodec/h264dsp.h
+index 98ea15c..1be4804 100644
+--- a/lib/ffmpeg/libavcodec/h264dsp.h
++++ b/lib/ffmpeg/libavcodec/h264dsp.h
+@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
+ /* bypass-transform */
+ void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+ void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
++
++ /**
++ * Search buf from the start for up to size bytes. Return the index
++ * of a zero byte, or >= size if not found. Ideally, use lookahead
++ * to filter out any zero bytes that are known to not be followed by
++ * one or more further zero bytes and a one byte. Better still, filter
++ * out any bytes that form the trailing_zero_8bits syntax element too.
++ */
++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
+ } H264DSPContext;
+
+ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+--
+1.9.3
+
+
+From 458ff4b6c1855c529f563dbbd15e35aaab50adae Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:48 +0100
+Subject: [PATCH 03/94] arm: Add assembly version of
+ h264_find_start_code_candidate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+ Before After
+ Mean StdDev Mean StdDev Change
+This function 508.8 23.4 185.4 9.0 +174.4%
+Overall 3068.5 31.7 2752.1 29.4 +11.5%
+
+In combination with the preceding patch:
+ Before After
+ Mean StdDev Mean StdDev Change
+Overall 2925.6 26.2 2752.1 29.4 +6.3%
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/arm/Makefile | 1 +
+ lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++
+ lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c | 4 +
+ lib/ffmpeg/libavcodec/h264_parser.c | 1 -
+ 4 files changed, 258 insertions(+), 1 deletion(-)
+ create mode 100644 lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+
+diff --git a/lib/ffmpeg/libavcodec/arm/Makefile b/lib/ffmpeg/libavcodec/arm/Makefile
+index 7390a8b..480000b71 100644
+--- a/lib/ffmpeg/libavcodec/arm/Makefile
++++ b/lib/ffmpeg/libavcodec/arm/Makefile
+@@ -9,6 +9,7 @@ OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o \
+ OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \
+
+ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
+
+ OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \
+ arm/flacdsp_arm.o \
+diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+new file mode 100644
+index 0000000..c4f12a6
+--- /dev/null
++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+@@ -0,0 +1,253 @@
++/*
++ * Copyright (c) 2013 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of Libav.
++ *
++ * Libav is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * Libav is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with Libav; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++
++RESULT .req a1
++BUF .req a1
++SIZE .req a2
++PATTERN .req a3
++PTR .req a4
++DAT0 .req v1
++DAT1 .req v2
++DAT2 .req v3
++DAT3 .req v4
++TMP0 .req v5
++TMP1 .req v6
++TMP2 .req ip
++TMP3 .req lr
++
++#define PRELOAD_DISTANCE 4
++
++.macro innerloop4
++ ldr DAT0, [PTR], #4
++ subs SIZE, SIZE, #4 @ C flag survives rest of macro
++ sub TMP0, DAT0, PATTERN, lsr #14
++ bic TMP0, TMP0, DAT0
++ ands TMP0, TMP0, PATTERN
++.endm
++
++.macro innerloop16 decrement, do_preload
++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
++ .ifnc "\do_preload",""
++ pld [PTR, #PRELOAD_DISTANCE*32]
++ .endif
++ .ifnc "\decrement",""
++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
++ .endif
++ sub TMP0, DAT0, PATTERN, lsr #14
++ sub TMP1, DAT1, PATTERN, lsr #14
++ bic TMP0, TMP0, DAT0
++ bic TMP1, TMP1, DAT1
++ sub TMP2, DAT2, PATTERN, lsr #14
++ sub TMP3, DAT3, PATTERN, lsr #14
++ ands TMP0, TMP0, PATTERN
++ bic TMP2, TMP2, DAT2
++ it eq
++ andseq TMP1, TMP1, PATTERN
++ bic TMP3, TMP3, DAT3
++ itt eq
++ andseq TMP2, TMP2, PATTERN
++ andseq TMP3, TMP3, PATTERN
++.endm
++
++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
++function ff_h264_find_start_code_candidate_armv6, export=1
++ push {v1-v6,lr}
++ mov PTR, BUF
++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
++ @ before using code that does preloads
++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
++ blo 60f
++
++ @ Get to word-alignment, 1 byte at a time
++ tst PTR, #3
++ beq 2f
++1: ldrb DAT0, [PTR], #1
++ sub SIZE, SIZE, #1
++ teq DAT0, #0
++ beq 90f
++ tst PTR, #3
++ bne 1b
++2: @ Get to 4-word alignment, 1 word at a time
++ ldr PATTERN, =0x80008000
++ setend be
++ tst PTR, #12
++ beq 4f
++3: innerloop4
++ bne 91f
++ tst PTR, #12
++ bne 3b
++4: @ Get to cacheline (8-word) alignment
++ tst PTR, #16
++ beq 5f
++ innerloop16 16
++ bne 93f
++5: @ Check complete cachelines, with preloading
++ @ We need to stop when there are still (PRELOAD_DISTANCE+1)
++ @ complete cachelines to go
++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
++6: innerloop16 , do_preload
++ bne 93f
++ innerloop16 32
++ bne 93f
++ bcs 6b
++ @ Preload trailing part-cacheline, if any
++ tst SIZE, #31
++ beq 7f
++ pld [PTR, #(PRELOAD_DISTANCE+1)*32]
++ @ Check remaining data without doing any more preloads. First
++ @ do in chunks of 4 words:
++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
++ bmi 9f
++8: innerloop16 16
++ bne 93f
++ bcs 8b
++ @ Then in words:
++9: adds SIZE, SIZE, #16 - 4
++ bmi 11f
++10: innerloop4
++ bne 91f
++ bcs 10b
++11: setend le
++ @ Check second byte of final halfword
++ ldrb DAT0, [PTR, #-1]
++ teq DAT0, #0
++ beq 90f
++ @ Check any remaining bytes
++ tst SIZE, #3
++ beq 13f
++12: ldrb DAT0, [PTR], #1
++ sub SIZE, SIZE, #1
++ teq DAT0, #0
++ beq 90f
++ tst SIZE, #3
++ bne 12b
++ @ No candidate found
++13: sub RESULT, PTR, BUF
++ b 99f
++
++60: @ Small buffer - simply check by looping over bytes
++ subs SIZE, SIZE, #1
++ bcc 99f
++61: ldrb DAT0, [PTR], #1
++ subs SIZE, SIZE, #1
++ teq DAT0, #0
++ beq 90f
++ bcs 61b
++ @ No candidate found
++ sub RESULT, PTR, BUF
++ b 99f
++
++90: @ Found a candidate at the preceding byte
++ sub RESULT, PTR, BUF
++ sub RESULT, RESULT, #1
++ b 99f
++
++91: @ Found a candidate somewhere in the preceding 4 bytes
++ sub RESULT, PTR, BUF
++ sub RESULT, RESULT, #4
++ sub TMP0, DAT0, #0x20000
++ bics TMP0, TMP0, DAT0
++ itt pl
++ ldrbpl DAT0, [PTR, #-3]
++ addpl RESULT, RESULT, #2
++ bpl 92f
++ teq RESULT, #0
++ beq 98f @ don't look back a byte if found at first byte in buffer
++ ldrb DAT0, [PTR, #-5]
++92: teq DAT0, #0
++ it eq
++ subeq RESULT, RESULT, #1
++ b 98f
++
++93: @ Found a candidate somewhere in the preceding 16 bytes
++ sub RESULT, PTR, BUF
++ sub RESULT, RESULT, #16
++ teq TMP0, #0
++ beq 95f @ not in first 4 bytes
++ sub TMP0, DAT0, #0x20000
++ bics TMP0, TMP0, DAT0
++ itt pl
++ ldrbpl DAT0, [PTR, #-15]
++ addpl RESULT, RESULT, #2
++ bpl 94f
++ teq RESULT, #0
++ beq 98f @ don't look back a byte if found at first byte in buffer
++ ldrb DAT0, [PTR, #-17]
++94: teq DAT0, #0
++ it eq
++ subeq RESULT, RESULT, #1
++ b 98f
++95: add RESULT, RESULT, #4
++ teq TMP1, #0
++ beq 96f @ not in next 4 bytes
++ sub TMP1, DAT1, #0x20000
++ bics TMP1, TMP1, DAT1
++ itee mi
++ ldrbmi DAT0, [PTR, #-13]
++ ldrbpl DAT0, [PTR, #-11]
++ addpl RESULT, RESULT, #2
++ teq DAT0, #0
++ it eq
++ subeq RESULT, RESULT, #1
++ b 98f
++96: add RESULT, RESULT, #4
++ teq TMP2, #0
++ beq 97f @ not in next 4 bytes
++ sub TMP2, DAT2, #0x20000
++ bics TMP2, TMP2, DAT2
++ itee mi
++ ldrbmi DAT0, [PTR, #-9]
++ ldrbpl DAT0, [PTR, #-7]
++ addpl RESULT, RESULT, #2
++ teq DAT0, #0
++ it eq
++ subeq RESULT, RESULT, #1
++ b 98f
++97: add RESULT, RESULT, #4
++ sub TMP3, DAT3, #0x20000
++ bics TMP3, TMP3, DAT3
++ itee mi
++ ldrbmi DAT0, [PTR, #-5]
++ ldrbpl DAT0, [PTR, #-3]
++ addpl RESULT, RESULT, #2
++ teq DAT0, #0
++ it eq
++ subeq RESULT, RESULT, #1
++ @ drop through to 98f
++98: setend le
++99: pop {v1-v6,pc}
++.endfunc
++
++ .unreq RESULT
++ .unreq BUF
++ .unreq SIZE
++ .unreq PATTERN
++ .unreq PTR
++ .unreq DAT0
++ .unreq DAT1
++ .unreq DAT2
++ .unreq DAT3
++ .unreq TMP0
++ .unreq TMP1
++ .unreq TMP2
++ .unreq TMP3
+diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+index 785b604..2804e56 100644
+--- a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+@@ -24,6 +24,8 @@
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/h264dsp.h"
+
++int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
++
+ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+ void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+ {
+ int cpu_flags = av_get_cpu_flags();
+
++ if (have_armv6(cpu_flags))
++ c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
+ if (have_neon(cpu_flags))
+ ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+ }
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index 972aace..363843c 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -65,7 +65,6 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si
+ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+ if (i < buf_size)
+ state = 2;
+- }
+ }else if(state<=2){
+ if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5
+ else if(buf[i]) state = 7;
+--
+1.9.3
+
+
+From 5841d5b69f0df2f286c0a8e419deb16d927e864e Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Mon, 19 Aug 2013 22:48:05 +0100
+Subject: [PATCH 04/94] [ffmpeg] Backport of h264_find_start_code_candidate
+ optimisation
+
+---
+ ...-Initialize-the-h264dsp-context-in-the-pa.patch | 39 +++
+ ...torize-code-into-a-new-function-h264_find.patch | 134 +++++++++
+ ...embly-version-of-h264_find_start_code_can.patch | 322 +++++++++++++++++++++
+ 3 files changed, 495 insertions(+)
+ create mode 100644 lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+ create mode 100644 lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+ create mode 100644 lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+
+diff --git a/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+new file mode 100644
+index 0000000..263578d
+--- /dev/null
++++ b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+@@ -0,0 +1,39 @@
++From 7a82022ee2f9b1fad991ace0936901e7419444be Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:46 +0100
++Subject: [PATCH 1/3] h264_parser: Initialize the h264dsp context in the
++ parser as well
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++Each AVStream struct for an H.264 elementary stream actually has two
++copies of the H264DSPContext struct (and in fact all the other members
++of H264Context as well):
++
++((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp
++((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp
++
++but only the first of these was actually being initialised. This
++prevented the addition of platform-specific implementations of
++parser-related functions.
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/h264_parser.c | 1 +
++ 1 file changed, 1 insertion(+)
++
++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
++index 2ed155c..da2a5f9 100644
++--- a/libavcodec/h264_parser.c
+++++ b/libavcodec/h264_parser.c
++@@ -417,6 +417,7 @@ static av_cold int init(AVCodecParserContext *s)
++ H264Context *h = s->priv_data;
++ h->thread_context[0] = h;
++ h->slice_context_count = 1;
+++ ff_h264dsp_init(&h->h264dsp, 8, 1);
++ return 0;
++ }
++
++--
++1.7.9.5
+diff --git a/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+new file mode 100644
+index 0000000..0151d85
+--- /dev/null
++++ b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+@@ -0,0 +1,134 @@
++From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:47 +0100
++Subject: [PATCH 2/3] h264dsp: Factorize code into a new function,
++ h264_find_start_code_candidate
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++This performs the start code search which was previously part of
++h264_find_frame_end() - the most CPU intensive part of the function.
++
++By itself, this results in a performance regression:
++ Before After
++ Mean StdDev Mean StdDev Change
++Overall time 2925.6 26.2 3068.5 31.7 -4.7%
++
++but this can more than be made up for by platform-optimised
++implementations of the function.
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/h264_parser.c | 27 +++------------------------
++ libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++
++ libavcodec/h264dsp.h | 9 +++++++++
++ 3 files changed, 41 insertions(+), 24 deletions(-)
++
++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
++index da2a5f9..ef5da98 100644
++--- a/libavcodec/h264_parser.c
+++++ b/libavcodec/h264_parser.c
++@@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
++
++ for (i = 0; i < buf_size; i++) {
++ if (state == 7) {
++-#if HAVE_FAST_UNALIGNED
++- /* we check i < buf_size instead of i + 3 / 7 because it is
++- * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
++- * bytes at the end.
++- */
++-#if HAVE_FAST_64BIT
++- while (i < buf_size &&
++- !((~*(const uint64_t *)(buf + i) &
++- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
++- 0x8080808080808080ULL))
++- i += 8;
++-#else
++- while (i < buf_size &&
++- !((~*(const uint32_t *)(buf + i) &
++- (*(const uint32_t *)(buf + i) - 0x01010101U)) &
++- 0x80808080U))
++- i += 4;
++-#endif
++-#endif
++- for (; i < buf_size; i++)
++- if (!buf[i]) {
++- state = 2;
++- break;
++- }
+++ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+++ if (i < buf_size)
+++ state = 2;
++ } else if (state <= 2) {
++ if (buf[i] == 1)
++ state ^= 5; // 2->7, 1->4, 0->5
++diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
++index 3ca6abe..a901dbb 100644
++--- a/libavcodec/h264dsp.c
+++++ b/libavcodec/h264dsp.c
++@@ -53,6 +53,34 @@
++ #include "h264addpx_template.c"
++ #undef BIT_DEPTH
++
+++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
+++{
+++ int i = 0;
+++#if HAVE_FAST_UNALIGNED
+++ /* we check i < size instead of i + 3 / 7 because it is
+++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+++ * bytes at the end.
+++ */
+++#if HAVE_FAST_64BIT
+++ while (i < size &&
+++ !((~*(const uint64_t *)(buf + i) &
+++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+++ 0x8080808080808080ULL))
+++ i += 8;
+++#else
+++ while (i < size &&
+++ !((~*(const uint32_t *)(buf + i) &
+++ (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+++ 0x80808080U))
+++ i += 4;
+++#endif
+++#endif
+++ for (; i < size; i++)
+++ if (!buf[i])
+++ break;
+++ return i;
+++}
+++
++ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++ const int chroma_format_idc)
++ {
++@@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++ H264_DSP(8);
++ break;
++ }
+++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
++
++ if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
++ if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
++diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
++index 1f9f8fe..6249ba7 100644
++--- a/libavcodec/h264dsp.h
+++++ b/libavcodec/h264dsp.h
++@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
++ /* bypass-transform */
++ void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
++ void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+++
+++ /**
+++ * Search buf from the start for up to size bytes. Return the index
+++ * of a zero byte, or >= size if not found. Ideally, use lookahead
+++ * to filter out any zero bytes that are known to not be followed by
+++ * one or more further zero bytes and a one byte. Better still, filter
+++ * out any bytes that form the trailing_zero_8bits syntax element too.
+++ */
+++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
++ } H264DSPContext;
++
++ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++--
++1.7.9.5
+diff --git a/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+new file mode 100644
+index 0000000..cdc2d1e
+--- /dev/null
++++ b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+@@ -0,0 +1,322 @@
++From 45e10e5c8d3df09c80a4d80483bff2712367f3fa Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:48 +0100
++Subject: [PATCH 3/3] arm: Add assembly version of
++ h264_find_start_code_candidate
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++ Before After
++ Mean StdDev Mean StdDev Change
++This function 508.8 23.4 185.4 9.0 +174.4%
++Overall 3068.5 31.7 2752.1 29.4 +11.5%
++
++In combination with the preceding patch:
++ Before After
++ Mean StdDev Mean StdDev Change
++Overall 2925.6 26.2 2752.1 29.4 +6.3%
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/arm/Makefile | 1 +
++ libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++++++++++++
++ libavcodec/arm/h264dsp_init_arm.c | 4 +
++ 3 files changed, 258 insertions(+)
++ create mode 100644 libavcodec/arm/h264dsp_armv6.S
++
++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
++index e941aaa..9c64b36 100644
++--- a/libavcodec/arm/Makefile
+++++ b/libavcodec/arm/Makefile
++@@ -45,6 +45,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \
++ arm/simple_idct_armv6.o \
++
++ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
+++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
++ ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
++ arm/hpeldsp_armv6.o
++ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
++diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S
++new file mode 100644
++index 0000000..c4f12a6
++--- /dev/null
+++++ b/libavcodec/arm/h264dsp_armv6.S
++@@ -0,0 +1,253 @@
+++/*
+++ * Copyright (c) 2013 RISC OS Open Ltd
+++ * Author: Ben Avison <bavison@riscosopen.org>
+++ *
+++ * This file is part of Libav.
+++ *
+++ * Libav is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * Libav is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with Libav; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++#include "libavutil/arm/asm.S"
+++
+++RESULT .req a1
+++BUF .req a1
+++SIZE .req a2
+++PATTERN .req a3
+++PTR .req a4
+++DAT0 .req v1
+++DAT1 .req v2
+++DAT2 .req v3
+++DAT3 .req v4
+++TMP0 .req v5
+++TMP1 .req v6
+++TMP2 .req ip
+++TMP3 .req lr
+++
+++#define PRELOAD_DISTANCE 4
+++
+++.macro innerloop4
+++ ldr DAT0, [PTR], #4
+++ subs SIZE, SIZE, #4 @ C flag survives rest of macro
+++ sub TMP0, DAT0, PATTERN, lsr #14
+++ bic TMP0, TMP0, DAT0
+++ ands TMP0, TMP0, PATTERN
+++.endm
+++
+++.macro innerloop16 decrement, do_preload
+++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
+++ .ifnc "\do_preload",""
+++ pld [PTR, #PRELOAD_DISTANCE*32]
+++ .endif
+++ .ifnc "\decrement",""
+++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
+++ .endif
+++ sub TMP0, DAT0, PATTERN, lsr #14
+++ sub TMP1, DAT1, PATTERN, lsr #14
+++ bic TMP0, TMP0, DAT0
+++ bic TMP1, TMP1, DAT1
+++ sub TMP2, DAT2, PATTERN, lsr #14
+++ sub TMP3, DAT3, PATTERN, lsr #14
+++ ands TMP0, TMP0, PATTERN
+++ bic TMP2, TMP2, DAT2
+++ it eq
+++ andseq TMP1, TMP1, PATTERN
+++ bic TMP3, TMP3, DAT3
+++ itt eq
+++ andseq TMP2, TMP2, PATTERN
+++ andseq TMP3, TMP3, PATTERN
+++.endm
+++
+++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
+++function ff_h264_find_start_code_candidate_armv6, export=1
+++ push {v1-v6,lr}
+++ mov PTR, BUF
+++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
+++ @ before using code that does preloads
+++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
+++ blo 60f
+++
+++ @ Get to word-alignment, 1 byte at a time
+++ tst PTR, #3
+++ beq 2f
+++1: ldrb DAT0, [PTR], #1
+++ sub SIZE, SIZE, #1
+++ teq DAT0, #0
+++ beq 90f
+++ tst PTR, #3
+++ bne 1b
+++2: @ Get to 4-word alignment, 1 word at a time
+++ ldr PATTERN, =0x80008000
+++ setend be
+++ tst PTR, #12
+++ beq 4f
+++3: innerloop4
+++ bne 91f
+++ tst PTR, #12
+++ bne 3b
+++4: @ Get to cacheline (8-word) alignment
+++ tst PTR, #16
+++ beq 5f
+++ innerloop16 16
+++ bne 93f
+++5: @ Check complete cachelines, with preloading
+++ @ We need to stop when there are still (PRELOAD_DISTANCE+1)
+++ @ complete cachelines to go
+++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
+++6: innerloop16 , do_preload
+++ bne 93f
+++ innerloop16 32
+++ bne 93f
+++ bcs 6b
+++ @ Preload trailing part-cacheline, if any
+++ tst SIZE, #31
+++ beq 7f
+++ pld [PTR, #(PRELOAD_DISTANCE+1)*32]
+++ @ Check remaining data without doing any more preloads. First
+++ @ do in chunks of 4 words:
+++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
+++ bmi 9f
+++8: innerloop16 16
+++ bne 93f
+++ bcs 8b
+++ @ Then in words:
+++9: adds SIZE, SIZE, #16 - 4
+++ bmi 11f
+++10: innerloop4
+++ bne 91f
+++ bcs 10b
+++11: setend le
+++ @ Check second byte of final halfword
+++ ldrb DAT0, [PTR, #-1]
+++ teq DAT0, #0
+++ beq 90f
+++ @ Check any remaining bytes
+++ tst SIZE, #3
+++ beq 13f
+++12: ldrb DAT0, [PTR], #1
+++ sub SIZE, SIZE, #1
+++ teq DAT0, #0
+++ beq 90f
+++ tst SIZE, #3
+++ bne 12b
+++ @ No candidate found
+++13: sub RESULT, PTR, BUF
+++ b 99f
+++
+++60: @ Small buffer - simply check by looping over bytes
+++ subs SIZE, SIZE, #1
+++ bcc 99f
+++61: ldrb DAT0, [PTR], #1
+++ subs SIZE, SIZE, #1
+++ teq DAT0, #0
+++ beq 90f
+++ bcs 61b
+++ @ No candidate found
+++ sub RESULT, PTR, BUF
+++ b 99f
+++
+++90: @ Found a candidate at the preceding byte
+++ sub RESULT, PTR, BUF
+++ sub RESULT, RESULT, #1
+++ b 99f
+++
+++91: @ Found a candidate somewhere in the preceding 4 bytes
+++ sub RESULT, PTR, BUF
+++ sub RESULT, RESULT, #4
+++ sub TMP0, DAT0, #0x20000
+++ bics TMP0, TMP0, DAT0
+++ itt pl
+++ ldrbpl DAT0, [PTR, #-3]
+++ addpl RESULT, RESULT, #2
+++ bpl 92f
+++ teq RESULT, #0
+++ beq 98f @ don't look back a byte if found at first byte in buffer
+++ ldrb DAT0, [PTR, #-5]
+++92: teq DAT0, #0
+++ it eq
+++ subeq RESULT, RESULT, #1
+++ b 98f
+++
+++93: @ Found a candidate somewhere in the preceding 16 bytes
+++ sub RESULT, PTR, BUF
+++ sub RESULT, RESULT, #16
+++ teq TMP0, #0
+++ beq 95f @ not in first 4 bytes
+++ sub TMP0, DAT0, #0x20000
+++ bics TMP0, TMP0, DAT0
+++ itt pl
+++ ldrbpl DAT0, [PTR, #-15]
+++ addpl RESULT, RESULT, #2
+++ bpl 94f
+++ teq RESULT, #0
+++ beq 98f @ don't look back a byte if found at first byte in buffer
+++ ldrb DAT0, [PTR, #-17]
+++94: teq DAT0, #0
+++ it eq
+++ subeq RESULT, RESULT, #1
+++