add support for hifiberry from Openelec git

author: Waldemar Brodkorb <wbx@openadk.org> 2014-06-25 17:16:55 +0200
committer: Waldemar Brodkorb <wbx@openadk.org> 2014-06-25 17:16:55 +0200
commit: 65d1906183283a885b61bb87c023938f29145fe0 (patch)
tree: a1a7ca582307a90cbb1bf229edcda2a8f786f0d5 /package/xbmc
parent: 5fccf9a1157ab2a4f2159ab475852ef8a37f9b5b (diff)
2 files changed, 20666 insertions, 3 deletions
diff --git a/package/xbmc/Makefile b/package/xbmc/Makefile
index 3c792e3ad..dd0c50c46 100644
--- a/package/xbmc/Makefile
+++ b/package/xbmc/Makefile
@@ -5,7 +5,7 @@ include $(ADK_TOPDIR)/rules.mk
 
 PKG_NAME:=		xbmc
 PKG_VERSION:=		13.1
-PKG_RELEASE:=		1
+PKG_RELEASE:=		2
 PKG_MD5SUM:=		9ce6b6ac89b6aa0b111a1acdf3606e06
 PKG_DESCR:=		software media player
 PKG_SECTION:=		mm/video
@@ -64,7 +64,6 @@ AUTOTOOL_STYLE:=	autoreconf
 CONFIGURE_ENV+=		DESTDIR='${WRKINST}' \
 			TEXTUREPACKER_NATIVE_ROOT='$(STAGING_HOST_DIR)/usr'
 CONFIGURE_ARGS+=	--disable-optical-drive \
-			--disable-optmizations \
 			--disable-mysql \
 			--disable-avahi \
 			--disable-rsxs \
@@ -81,7 +80,6 @@ CONFIGURE_ARGS+=	--disable-optical-drive \
 			--disable-wayland \
 			--disable-pulse \
 			--disable-mid \
-			--with-ffmpeg \
 			--enable-alsa \
 			--enable-libmp3lame \
 			--enable-libvorbisenc \
diff --git a/package/xbmc/patches/xbmc-gotham_rbp_backports.patch b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch
new file mode 100644
index 000000000..9a4772437
--- /dev/null
+++ b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch
@@ -0,0 +1,20665 @@
+From 1353d8feca19f2f84019797942d70864054db1b0 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:46 +0100
+Subject: [PATCH 01/94] h264_parser: Initialize the h264dsp context in the
+ parser as well
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Each AVStream struct for an H.264 elementary stream actually has two
+copies of the H264DSPContext struct (and in fact all the other members
+of H264Context as well):
+
+((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp
+((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp
+
+but only the first of these was actually being initialised. This
+prevented the addition of platform-specific implementations of
+parser-related functions.
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/h264_parser.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index aff9ba1..a732f79 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -386,6 +386,7 @@ static int init(AVCodecParserContext *s)
+     H264Context *h = s->priv_data;
+     h->thread_context[0] = h;
+     h->slice_context_count = 1;
++    ff_h264dsp_init(&h->h264dsp, 8, 1);
+     return 0;
+ }
+ 
+-- 
+1.9.3
+
+
+From 7ea2cb68f6fb1149fce70854e36ed6357a267238 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:47 +0100
+Subject: [PATCH 02/94] h264dsp: Factorize code into a new function,
+ h264_find_start_code_candidate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This performs the start code search which was previously part of
+h264_find_frame_end() - the most CPU intensive part of the function.
+
+By itself, this results in a performance regression:
+              Before          After
+              Mean   StdDev   Mean   StdDev  Change
+Overall time  2925.6 26.2     3068.5 31.7    -4.7%
+
+but this can more than be made up for by platform-optimised
+implementations of the function.
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/h264_parser.c | 20 +++-----------------
+ lib/ffmpeg/libavcodec/h264dsp.c     | 29 +++++++++++++++++++++++++++++
+ lib/ffmpeg/libavcodec/h264dsp.h     |  9 +++++++++
+ 3 files changed, 41 insertions(+), 17 deletions(-)
+
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index a732f79..972aace 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -62,23 +62,9 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si
+         }
+ 
+         if(state==7){
+-#if HAVE_FAST_UNALIGNED
+-        /* we check i<buf_size instead of i+3/7 because its simpler
+-         * and there should be FF_INPUT_BUFFER_PADDING_SIZE bytes at the end
+-         */
+-#    if HAVE_FAST_64BIT
+-            while(i<next_avc && !((~*(const uint64_t*)(buf+i) & (*(const uint64_t*)(buf+i) - 0x0101010101010101ULL)) & 0x8080808080808080ULL))
+-                i+=8;
+-#    else
+-            while(i<next_avc && !((~*(const uint32_t*)(buf+i) & (*(const uint32_t*)(buf+i) - 0x01010101U)) & 0x80808080U))
+-                i+=4;
+-#    endif
+-#endif
+-            for(; i<next_avc; i++){
+-                if(!buf[i]){
+-                    state=2;
+-                    break;
+-                }
++            i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
++            if (i < buf_size)
++                state = 2;
+             }
+         }else if(state<=2){
+             if(buf[i]==1)   state^= 5; //2->7, 1->4, 0->5
+diff --git a/lib/ffmpeg/libavcodec/h264dsp.c b/lib/ffmpeg/libavcodec/h264dsp.c
+index da9e417..b7d61cd 100644
+--- a/lib/ffmpeg/libavcodec/h264dsp.c
++++ b/lib/ffmpeg/libavcodec/h264dsp.c
+@@ -60,6 +60,34 @@
+ #include "h264addpx_template.c"
+ #undef BIT_DEPTH
+ 
++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
++{
++    int i = 0;
++#if HAVE_FAST_UNALIGNED
++    /* we check i < size instead of i + 3 / 7 because it is
++     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
++     * bytes at the end.
++     */
++#if HAVE_FAST_64BIT
++    while (i < size &&
++            !((~*(const uint64_t *)(buf + i) &
++                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
++                    0x8080808080808080ULL))
++        i += 8;
++#else
++    while (i < size &&
++            !((~*(const uint32_t *)(buf + i) &
++                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
++                    0x80808080U))
++        i += 4;
++#endif
++#endif
++    for (; i < size; i++)
++        if (!buf[i])
++            break;
++    return i;
++}
++
+ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+ {
+ #undef FUNC
+@@ -146,6 +174,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
+         H264_DSP(8);
+         break;
+     }
++    c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
+ 
+     if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
+     if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
+diff --git a/lib/ffmpeg/libavcodec/h264dsp.h b/lib/ffmpeg/libavcodec/h264dsp.h
+index 98ea15c..1be4804 100644
+--- a/lib/ffmpeg/libavcodec/h264dsp.h
++++ b/lib/ffmpeg/libavcodec/h264dsp.h
+@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
+     /* bypass-transform */
+     void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+     void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
++
++    /**
++     * Search buf from the start for up to size bytes. Return the index
++     * of a zero byte, or >= size if not found. Ideally, use lookahead
++     * to filter out any zero bytes that are known to not be followed by
++     * one or more further zero bytes and a one byte. Better still, filter
++     * out any bytes that form the trailing_zero_8bits syntax element too.
++     */
++    int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
+ } H264DSPContext;
+ 
+ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+-- 
+1.9.3
+
+
+From 458ff4b6c1855c529f563dbbd15e35aaab50adae Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 5 Aug 2013 13:12:48 +0100
+Subject: [PATCH 03/94] arm: Add assembly version of
+ h264_find_start_code_candidate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+               Before          After
+               Mean   StdDev   Mean   StdDev  Change
+This function   508.8 23.4      185.4  9.0    +174.4%
+Overall        3068.5 31.7     2752.1 29.4     +11.5%
+
+In combination with the preceding patch:
+                Before          After
+                Mean   StdDev   Mean   StdDev  Change
+Overall         2925.6 26.2     2752.1 29.4     +6.3%
+
+Signed-off-by: Martin Storsjö <martin@martin.st>
+---
+ lib/ffmpeg/libavcodec/arm/Makefile           |   1 +
+ lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S    | 253 +++++++++++++++++++++++++++
+ lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c |   4 +
+ lib/ffmpeg/libavcodec/h264_parser.c          |   1 -
+ 4 files changed, 258 insertions(+), 1 deletion(-)
+ create mode 100644 lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+
+diff --git a/lib/ffmpeg/libavcodec/arm/Makefile b/lib/ffmpeg/libavcodec/arm/Makefile
+index 7390a8b..480000b71 100644
+--- a/lib/ffmpeg/libavcodec/arm/Makefile
++++ b/lib/ffmpeg/libavcodec/arm/Makefile
+@@ -9,6 +9,7 @@ OBJS-$(CONFIG_AAC_DECODER)             += arm/sbrdsp_init_arm.o         \
+ OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
+ 
+ ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
++ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
+ 
+ OBJS-$(CONFIG_FLAC_DECODER)            += arm/flacdsp_init_arm.o        \
+                                           arm/flacdsp_arm.o             \
+diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+new file mode 100644
+index 0000000..c4f12a6
+--- /dev/null
++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S
+@@ -0,0 +1,253 @@
++/*
++ * Copyright (c) 2013 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of Libav.
++ *
++ * Libav is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * Libav is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with Libav; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++
++RESULT  .req    a1
++BUF     .req    a1
++SIZE    .req    a2
++PATTERN .req    a3
++PTR     .req    a4
++DAT0    .req    v1
++DAT1    .req    v2
++DAT2    .req    v3
++DAT3    .req    v4
++TMP0    .req    v5
++TMP1    .req    v6
++TMP2    .req    ip
++TMP3    .req    lr
++
++#define PRELOAD_DISTANCE 4
++
++.macro innerloop4
++        ldr     DAT0, [PTR], #4
++        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
++        sub     TMP0, DAT0, PATTERN, lsr #14
++        bic     TMP0, TMP0, DAT0
++        ands    TMP0, TMP0, PATTERN
++.endm
++
++.macro innerloop16  decrement, do_preload
++        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
++ .ifnc "\do_preload",""
++        pld     [PTR, #PRELOAD_DISTANCE*32]
++ .endif
++ .ifnc "\decrement",""
++        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
++ .endif
++        sub     TMP0, DAT0, PATTERN, lsr #14
++        sub     TMP1, DAT1, PATTERN, lsr #14
++        bic     TMP0, TMP0, DAT0
++        bic     TMP1, TMP1, DAT1
++        sub     TMP2, DAT2, PATTERN, lsr #14
++        sub     TMP3, DAT3, PATTERN, lsr #14
++        ands    TMP0, TMP0, PATTERN
++        bic     TMP2, TMP2, DAT2
++        it      eq
++        andseq  TMP1, TMP1, PATTERN
++        bic     TMP3, TMP3, DAT3
++        itt     eq
++        andseq  TMP2, TMP2, PATTERN
++        andseq  TMP3, TMP3, PATTERN
++.endm
++
++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
++function ff_h264_find_start_code_candidate_armv6, export=1
++        push    {v1-v6,lr}
++        mov     PTR, BUF
++        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
++        @ before using code that does preloads
++        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
++        blo     60f
++
++        @ Get to word-alignment, 1 byte at a time
++        tst     PTR, #3
++        beq     2f
++1:      ldrb    DAT0, [PTR], #1
++        sub     SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        tst     PTR, #3
++        bne     1b
++2:      @ Get to 4-word alignment, 1 word at a time
++        ldr     PATTERN, =0x80008000
++        setend  be
++        tst     PTR, #12
++        beq     4f
++3:      innerloop4
++        bne     91f
++        tst     PTR, #12
++        bne     3b
++4:      @ Get to cacheline (8-word) alignment
++        tst     PTR, #16
++        beq     5f
++        innerloop16  16
++        bne     93f
++5:      @ Check complete cachelines, with preloading
++        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
++        @ complete cachelines to go
++        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
++6:      innerloop16  , do_preload
++        bne     93f
++        innerloop16  32
++        bne     93f
++        bcs     6b
++        @ Preload trailing part-cacheline, if any
++        tst     SIZE, #31
++        beq     7f
++        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
++        @ Check remaining data without doing any more preloads. First
++        @ do in chunks of 4 words:
++7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
++        bmi     9f
++8:      innerloop16  16
++        bne     93f
++        bcs     8b
++        @ Then in words:
++9:      adds    SIZE, SIZE, #16 - 4
++        bmi     11f
++10:     innerloop4
++        bne     91f
++        bcs     10b
++11:     setend  le
++        @ Check second byte of final halfword
++        ldrb    DAT0, [PTR, #-1]
++        teq     DAT0, #0
++        beq     90f
++        @ Check any remaining bytes
++        tst     SIZE, #3
++        beq     13f
++12:     ldrb    DAT0, [PTR], #1
++        sub     SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        tst     SIZE, #3
++        bne     12b
++        @ No candidate found
++13:     sub     RESULT, PTR, BUF
++        b       99f
++
++60:     @ Small buffer - simply check by looping over bytes
++        subs    SIZE, SIZE, #1
++        bcc     99f
++61:     ldrb    DAT0, [PTR], #1
++        subs    SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        bcs     61b
++        @ No candidate found
++        sub     RESULT, PTR, BUF
++        b       99f
++
++90:     @ Found a candidate at the preceding byte
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #1
++        b       99f
++
++91:     @ Found a candidate somewhere in the preceding 4 bytes
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #4
++        sub     TMP0, DAT0, #0x20000
++        bics    TMP0, TMP0, DAT0
++        itt     pl
++        ldrbpl  DAT0, [PTR, #-3]
++        addpl   RESULT, RESULT, #2
++        bpl     92f
++        teq     RESULT, #0
++        beq     98f @ don't look back a byte if found at first byte in buffer
++        ldrb    DAT0, [PTR, #-5]
++92:     teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++
++93:     @ Found a candidate somewhere in the preceding 16 bytes
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #16
++        teq     TMP0, #0
++        beq     95f @ not in first 4 bytes
++        sub     TMP0, DAT0, #0x20000
++        bics    TMP0, TMP0, DAT0
++        itt     pl
++        ldrbpl  DAT0, [PTR, #-15]
++        addpl   RESULT, RESULT, #2
++        bpl     94f
++        teq     RESULT, #0
++        beq     98f @ don't look back a byte if found at first byte in buffer
++        ldrb    DAT0, [PTR, #-17]
++94:     teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++95:     add     RESULT, RESULT, #4
++        teq     TMP1, #0
++        beq     96f @ not in next 4 bytes
++        sub     TMP1, DAT1, #0x20000
++        bics    TMP1, TMP1, DAT1
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-13]
++        ldrbpl  DAT0, [PTR, #-11]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++96:     add     RESULT, RESULT, #4
++        teq     TMP2, #0
++        beq     97f @ not in next 4 bytes
++        sub     TMP2, DAT2, #0x20000
++        bics    TMP2, TMP2, DAT2
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-9]
++        ldrbpl  DAT0, [PTR, #-7]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++97:     add     RESULT, RESULT, #4
++        sub     TMP3, DAT3, #0x20000
++        bics    TMP3, TMP3, DAT3
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-5]
++        ldrbpl  DAT0, [PTR, #-3]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        @ drop through to 98f
++98:     setend  le
++99:     pop     {v1-v6,pc}
++.endfunc
++
++        .unreq  RESULT
++        .unreq  BUF
++        .unreq  SIZE
++        .unreq  PATTERN
++        .unreq  PTR
++        .unreq  DAT0
++        .unreq  DAT1
++        .unreq  DAT2
++        .unreq  DAT3
++        .unreq  TMP0
++        .unreq  TMP1
++        .unreq  TMP2
++        .unreq  TMP3
+diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+index 785b604..2804e56 100644
+--- a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+@@ -24,6 +24,8 @@
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/h264dsp.h"
+ 
++int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
++
+ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+                                      int beta, int8_t *tc0);
+ void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+ {
+     int cpu_flags = av_get_cpu_flags();
+ 
++    if (have_armv6(cpu_flags))
++        c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
+     if (have_neon(cpu_flags))
+         ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+ }
+diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c
+index 972aace..363843c 100644
+--- a/lib/ffmpeg/libavcodec/h264_parser.c
++++ b/lib/ffmpeg/libavcodec/h264_parser.c
+@@ -65,7 +65,6 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si
+             i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+             if (i < buf_size)
+                 state = 2;
+-            }
+         }else if(state<=2){
+             if(buf[i]==1)   state^= 5; //2->7, 1->4, 0->5
+             else if(buf[i]) state = 7;
+-- 
+1.9.3
+
+
+From 5841d5b69f0df2f286c0a8e419deb16d927e864e Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Mon, 19 Aug 2013 22:48:05 +0100
+Subject: [PATCH 04/94] [ffmpeg] Backport of h264_find_start_code_candidate
+ optimisation
+
+---
+ ...-Initialize-the-h264dsp-context-in-the-pa.patch |  39 +++
+ ...torize-code-into-a-new-function-h264_find.patch | 134 +++++++++
+ ...embly-version-of-h264_find_start_code_can.patch | 322 +++++++++++++++++++++
+ 3 files changed, 495 insertions(+)
+ create mode 100644 lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+ create mode 100644 lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+ create mode 100644 lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+
+diff --git a/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+new file mode 100644
+index 0000000..263578d
+--- /dev/null
++++ b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch
+@@ -0,0 +1,39 @@
++From 7a82022ee2f9b1fad991ace0936901e7419444be Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:46 +0100
++Subject: [PATCH 1/3] h264_parser: Initialize the h264dsp context in the
++ parser as well
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++Each AVStream struct for an H.264 elementary stream actually has two
++copies of the H264DSPContext struct (and in fact all the other members
++of H264Context as well):
++
++((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp
++((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp
++
++but only the first of these was actually being initialised. This
++prevented the addition of platform-specific implementations of
++parser-related functions.
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/h264_parser.c |    1 +
++ 1 file changed, 1 insertion(+)
++
++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
++index 2ed155c..da2a5f9 100644
++--- a/libavcodec/h264_parser.c
+++++ b/libavcodec/h264_parser.c
++@@ -417,6 +417,7 @@ static av_cold int init(AVCodecParserContext *s)
++     H264Context *h = s->priv_data;
++     h->thread_context[0]   = h;
++     h->slice_context_count = 1;
+++    ff_h264dsp_init(&h->h264dsp, 8, 1);
++     return 0;
++ }
++
++--
++1.7.9.5
+diff --git a/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+new file mode 100644
+index 0000000..0151d85
+--- /dev/null
++++ b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch
+@@ -0,0 +1,134 @@
++From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:47 +0100
++Subject: [PATCH 2/3] h264dsp: Factorize code into a new function,
++ h264_find_start_code_candidate
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++This performs the start code search which was previously part of
++h264_find_frame_end() - the most CPU intensive part of the function.
++
++By itself, this results in a performance regression:
++              Before          After
++              Mean   StdDev   Mean   StdDev  Change
++Overall time  2925.6 26.2     3068.5 31.7    -4.7%
++
++but this can more than be made up for by platform-optimised
++implementations of the function.
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/h264_parser.c |   27 +++------------------------
++ libavcodec/h264dsp.c     |   29 +++++++++++++++++++++++++++++
++ libavcodec/h264dsp.h     |    9 +++++++++
++ 3 files changed, 41 insertions(+), 24 deletions(-)
++
++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
++index da2a5f9..ef5da98 100644
++--- a/libavcodec/h264_parser.c
+++++ b/libavcodec/h264_parser.c
++@@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
++
++     for (i = 0; i < buf_size; i++) {
++         if (state == 7) {
++-#if HAVE_FAST_UNALIGNED
++-            /* we check i < buf_size instead of i + 3 / 7 because it is
++-             * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
++-             * bytes at the end.
++-             */
++-#if HAVE_FAST_64BIT
++-            while (i < buf_size &&
++-                   !((~*(const uint64_t *)(buf + i) &
++-                      (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
++-                      0x8080808080808080ULL))
++-                i += 8;
++-#else
++-            while (i < buf_size &&
++-                   !((~*(const uint32_t *)(buf + i) &
++-                      (*(const uint32_t *)(buf + i) - 0x01010101U)) &
++-                      0x80808080U))
++-                i += 4;
++-#endif
++-#endif
++-            for (; i < buf_size; i++)
++-                if (!buf[i]) {
++-                    state = 2;
++-                    break;
++-                }
+++            i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+++            if (i < buf_size)
+++                state = 2;
++         } else if (state <= 2) {
++             if (buf[i] == 1)
++                 state ^= 5;            // 2->7, 1->4, 0->5
++diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
++index 3ca6abe..a901dbb 100644
++--- a/libavcodec/h264dsp.c
+++++ b/libavcodec/h264dsp.c
++@@ -53,6 +53,34 @@
++ #include "h264addpx_template.c"
++ #undef BIT_DEPTH
++
+++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
+++{
+++    int i = 0;
+++#if HAVE_FAST_UNALIGNED
+++    /* we check i < size instead of i + 3 / 7 because it is
+++     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+++     * bytes at the end.
+++     */
+++#if HAVE_FAST_64BIT
+++    while (i < size &&
+++            !((~*(const uint64_t *)(buf + i) &
+++                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+++                    0x8080808080808080ULL))
+++        i += 8;
+++#else
+++    while (i < size &&
+++            !((~*(const uint32_t *)(buf + i) &
+++                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+++                    0x80808080U))
+++        i += 4;
+++#endif
+++#endif
+++    for (; i < size; i++)
+++        if (!buf[i])
+++            break;
+++    return i;
+++}
+++
++ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++                              const int chroma_format_idc)
++ {
++@@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++         H264_DSP(8);
++         break;
++     }
+++    c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
++
++     if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
++     if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
++diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
++index 1f9f8fe..6249ba7 100644
++--- a/libavcodec/h264dsp.h
+++++ b/libavcodec/h264dsp.h
++@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
++     /* bypass-transform */
++     void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
++     void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+++
+++    /**
+++     * Search buf from the start for up to size bytes. Return the index
+++     * of a zero byte, or >= size if not found. Ideally, use lookahead
+++     * to filter out any zero bytes that are known to not be followed by
+++     * one or more further zero bytes and a one byte. Better still, filter
+++     * out any bytes that form the trailing_zero_8bits syntax element too.
+++     */
+++    int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
++ } H264DSPContext;
++
++ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
++--
++1.7.9.5
+diff --git a/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+new file mode 100644
+index 0000000..cdc2d1e
+--- /dev/null
++++ b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch
+@@ -0,0 +1,322 @@
++From 45e10e5c8d3df09c80a4d80483bff2712367f3fa Mon Sep 17 00:00:00 2001
++From: Ben Avison <bavison@riscosopen.org>
++Date: Mon, 5 Aug 2013 13:12:48 +0100
++Subject: [PATCH 3/3] arm: Add assembly version of
++ h264_find_start_code_candidate
++MIME-Version: 1.0
++Content-Type: text/plain; charset=UTF-8
++Content-Transfer-Encoding: 8bit
++
++               Before          After
++               Mean   StdDev   Mean   StdDev  Change
++This function   508.8 23.4      185.4  9.0    +174.4%
++Overall        3068.5 31.7     2752.1 29.4     +11.5%
++
++In combination with the preceding patch:
++                Before          After
++                Mean   StdDev   Mean   StdDev  Change
++Overall         2925.6 26.2     2752.1 29.4     +6.3%
++
++Signed-off-by: Martin Storsjö <martin@martin.st>
++---
++ libavcodec/arm/Makefile           |    1 +
++ libavcodec/arm/h264dsp_armv6.S    |  253 +++++++++++++++++++++++++++++++++++++
++ libavcodec/arm/h264dsp_init_arm.c |    4 +
++ 3 files changed, 258 insertions(+)
++ create mode 100644 libavcodec/arm/h264dsp_armv6.S
++
++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
++index e941aaa..9c64b36 100644
++--- a/libavcodec/arm/Makefile
+++++ b/libavcodec/arm/Makefile
++@@ -45,6 +45,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL)           += arm/dsputil_init_armv6.o      \
++                                           arm/simple_idct_armv6.o       \
++
++ ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
+++ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
++ ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
++                                           arm/hpeldsp_armv6.o
++ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
++diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S
++new file mode 100644
++index 0000000..c4f12a6
++--- /dev/null
+++++ b/libavcodec/arm/h264dsp_armv6.S
++@@ -0,0 +1,253 @@
+++/*
+++ * Copyright (c) 2013 RISC OS Open Ltd
+++ * Author: Ben Avison <bavison@riscosopen.org>
+++ *
+++ * This file is part of Libav.
+++ *
+++ * Libav is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * Libav is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with Libav; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++#include "libavutil/arm/asm.S"
+++
+++RESULT  .req    a1
+++BUF     .req    a1
+++SIZE    .req    a2
+++PATTERN .req    a3
+++PTR     .req    a4
+++DAT0    .req    v1
+++DAT1    .req    v2
+++DAT2    .req    v3
+++DAT3    .req    v4
+++TMP0    .req    v5
+++TMP1    .req    v6
+++TMP2    .req    ip
+++TMP3    .req    lr
+++
+++#define PRELOAD_DISTANCE 4
+++
+++.macro innerloop4
+++        ldr     DAT0, [PTR], #4
+++        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
+++        sub     TMP0, DAT0, PATTERN, lsr #14
+++        bic     TMP0, TMP0, DAT0
+++        ands    TMP0, TMP0, PATTERN
+++.endm
+++
+++.macro innerloop16  decrement, do_preload
+++        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
+++ .ifnc "\do_preload",""
+++        pld     [PTR, #PRELOAD_DISTANCE*32]
+++ .endif
+++ .ifnc "\decrement",""
+++        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
+++ .endif
+++        sub     TMP0, DAT0, PATTERN, lsr #14
+++        sub     TMP1, DAT1, PATTERN, lsr #14
+++        bic     TMP0, TMP0, DAT0
+++        bic     TMP1, TMP1, DAT1
+++        sub     TMP2, DAT2, PATTERN, lsr #14
+++        sub     TMP3, DAT3, PATTERN, lsr #14
+++        ands    TMP0, TMP0, PATTERN
+++        bic     TMP2, TMP2, DAT2
+++        it      eq
+++        andseq  TMP1, TMP1, PATTERN
+++        bic     TMP3, TMP3, DAT3
+++        itt     eq
+++        andseq  TMP2, TMP2, PATTERN
+++        andseq  TMP3, TMP3, PATTERN
+++.endm
+++
+++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
+++function ff_h264_find_start_code_candidate_armv6, export=1
+++        push    {v1-v6,lr}
+++        mov     PTR, BUF
+++        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
+++        @ before using code that does preloads
+++        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
+++        blo     60f
+++
+++        @ Get to word-alignment, 1 byte at a time
+++        tst     PTR, #3
+++        beq     2f
+++1:      ldrb    DAT0, [PTR], #1
+++        sub     SIZE, SIZE, #1
+++        teq     DAT0, #0
+++        beq     90f
+++        tst     PTR, #3
+++        bne     1b
+++2:      @ Get to 4-word alignment, 1 word at a time
+++        ldr     PATTERN, =0x80008000
+++        setend  be
+++        tst     PTR, #12
+++        beq     4f
+++3:      innerloop4
+++        bne     91f
+++        tst     PTR, #12
+++        bne     3b
+++4:      @ Get to cacheline (8-word) alignment
+++        tst     PTR, #16
+++        beq     5f
+++        innerloop16  16
+++        bne     93f
+++5:      @ Check complete cachelines, with preloading
+++        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
+++        @ complete cachelines to go
+++        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
+++6:      innerloop16  , do_preload
+++        bne     93f
+++        innerloop16  32
+++        bne     93f
+++        bcs     6b
+++        @ Preload trailing part-cacheline, if any
+++        tst     SIZE, #31
+++        beq     7f
+++        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
+++        @ Check remaining data without doing any more preloads. First
+++        @ do in chunks of 4 words:
+++7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
+++        bmi     9f
+++8:      innerloop16  16
+++        bne     93f
+++        bcs     8b
+++        @ Then in words:
+++9:      adds    SIZE, SIZE, #16 - 4
+++        bmi     11f
+++10:     innerloop4
+++        bne     91f
+++        bcs     10b
+++11:     setend  le
+++        @ Check second byte of final halfword
+++        ldrb    DAT0, [PTR, #-1]
+++        teq     DAT0, #0
+++        beq     90f
+++        @ Check any remaining bytes
+++        tst     SIZE, #3
+++        beq     13f
+++12:     ldrb    DAT0, [PTR], #1
+++        sub     SIZE, SIZE, #1
+++        teq     DAT0, #0
+++        beq     90f
+++        tst     SIZE, #3
+++        bne     12b
+++        @ No candidate found
+++13:     sub     RESULT, PTR, BUF
+++        b       99f
+++
+++60:     @ Small buffer - simply check by looping over bytes
+++        subs    SIZE, SIZE, #1
+++        bcc     99f
+++61:     ldrb    DAT0, [PTR], #1
+++        subs    SIZE, SIZE, #1
+++        teq     DAT0, #0
+++        beq     90f
+++        bcs     61b
+++        @ No candidate found
+++        sub     RESULT, PTR, BUF
+++        b       99f
+++
+++90:     @ Found a candidate at the preceding byte
+++        sub     RESULT, PTR, BUF
+++        sub     RESULT, RESULT, #1
+++        b       99f
+++
+++91:     @ Found a candidate somewhere in the preceding 4 bytes
+++        sub     RESULT, PTR, BUF
+++        sub     RESULT, RESULT, #4
+++        sub     TMP0, DAT0, #0x20000
+++        bics    TMP0, TMP0, DAT0
+++        itt     pl
+++        ldrbpl  DAT0, [PTR, #-3]
+++        addpl   RESULT, RESULT, #2
+++        bpl     92f
+++        teq     RESULT, #0
+++        beq     98f @ don't look back a byte if found at first byte in buffer
+++        ldrb    DAT0, [PTR, #-5]
+++92:     teq     DAT0, #0
+++        it      eq
+++        subeq   RESULT, RESULT, #1
+++        b       98f
+++
+++93:     @ Found a candidate somewhere in the preceding 16 bytes
+++        sub     RESULT, PTR, BUF
+++        sub     RESULT, RESULT, #16
+++        teq     TMP0, #0
+++        beq     95f @ not in first 4 bytes
+++        sub     TMP0, DAT0, #0x20000
+++        bics    TMP0, TMP0, DAT0
+++        itt     pl
+++        ldrbpl  DAT0, [PTR, #-15]
+++        addpl   RESULT, RESULT, #2
+++        bpl     94f
+++        teq     RESULT, #0
+++        beq     98f @ don't look back a byte if found at first byte in buffer
+++        ldrb    DAT0, [PTR, #-17]
+++94:     teq     DAT0, #0
+++        it      eq
+++        subeq   RESULT, RESULT, #1
+++
author	Waldemar Brodkorb <wbx@openadk.org>	2014-06-25 17:16:55 +0200
committer	Waldemar Brodkorb <wbx@openadk.org>	2014-06-25 17:16:55 +0200
commit	65d1906183283a885b61bb87c023938f29145fe0 (patch)
tree	a1a7ca582307a90cbb1bf229edcda2a8f786f0d5 /package/xbmc
parent	5fccf9a1157ab2a4f2159ab475852ef8a37f9b5b (diff)