[asterisk-commits] kmoore: branch 10 r361956 - in /branches/10: ./ build_tools/ codecs/gsm/ code...
SVN commits to the Asterisk project
asterisk-commits at lists.digium.com
Thu Apr 12 10:01:36 CDT 2012
Author: kmoore
Date: Thu Apr 12 10:01:13 2012
New Revision: 361956
URL: http://svnview.digium.com/svn/asterisk?view=rev&rev=361956
Log:
Simplify build system architecture optimization
This change to the build system rips out any usage of PROC along with
architecture-specific optimizations in favor of using -march=native where it is
supported. This fixes broken builds on 64bit Intel systems and results in
better optimized code on systems running GCC 4.2+.
Review: https://reviewboard.asterisk.org/r/1852/
(closes issue ASTERISK-19462)
........
Merged revisions 361955 from http://svn.asterisk.org/svn/asterisk/branches/1.8
Modified:
branches/10/ (props changed)
branches/10/Makefile
branches/10/Makefile.rules
branches/10/build_tools/cflags.xml
branches/10/build_tools/menuselect-deps.in
branches/10/codecs/gsm/Makefile
branches/10/codecs/gsm/src/k6opt.s
branches/10/codecs/lpc10/Makefile
branches/10/configure
branches/10/configure.ac
branches/10/makeopts.in
Propchange: branches/10/
------------------------------------------------------------------------------
Binary property 'branch-1.8-merged' - no diff available.
Modified: branches/10/Makefile
URL: http://svnview.digium.com/svn/asterisk/branches/10/Makefile?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/Makefile (original)
+++ branches/10/Makefile Thu Apr 12 10:01:13 2012
@@ -64,7 +64,6 @@
export ASTKEYDIR
export OSARCH # Operating system
-export PROC # Processor type
export NOISY_BUILD # Used in Makefile.rules
export MENUSELECT_CFLAGS # Options selected in menuselect.
@@ -163,39 +162,6 @@
# Create OPTIONS variable, but probably we can assign directly to ASTCFLAGS
OPTIONS=
-ifeq ($(OSARCH),linux-gnu)
- ifeq ($(PROC),x86_64)
- # You must have GCC 3.4 to use k8, otherwise use athlon
- PROC=k8
- #PROC=athlon
- endif
-
- ifeq ($(PROC),sparc64)
- #The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
- #This works for even old (2.96) versions of gcc and provides a small boost either way.
- #A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn't support it.
- #So we go lowest common available by gcc and go a step down, still a step up from
- #the default as we now have a better instruction set to work with. - Belgarath
- PROC=ultrasparc
- OPTIONS+=$(shell if $(CC) -mtune=$(PROC) -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mtune=$(PROC)"; fi)
- OPTIONS+=$(shell if $(CC) -mcpu=v8 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mcpu=v8"; fi)
- OPTIONS+=-fomit-frame-pointer
- endif
-
- ifeq ($(PROC),arm)
- # The Cirrus logic is the only heavily shipping arm processor with a real floating point unit
- ifeq ($(SUB_PROC),maverick)
- OPTIONS+=-fsigned-char -mcpu=ep9312
- else
- ifeq ($(SUB_PROC),xscale)
- OPTIONS+=-fsigned-char -mcpu=xscale
- else
- OPTIONS+=-fsigned-char
- endif
- endif
- endif
-endif
-
ifeq ($(findstring -save-temps,$(_ASTCFLAGS) $(ASTCFLAGS)),)
ifeq ($(findstring -pipe,$(_ASTCFLAGS) $(ASTCFLAGS)),)
_ASTCFLAGS+=-pipe
@@ -224,26 +190,7 @@
_ASTCFLAGS+=-isystem /usr/local/include
endif
-ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
- ifneq ($(AST_MARCH_NATIVE),)
- _ASTCFLAGS+=$(AST_MARCH_NATIVE)
- else
- ifneq ($(PROC),ultrasparc)
- _ASTCFLAGS+=$(shell if $(CC) -march=$(PROC) -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=$(PROC)"; fi)
- endif
- endif
-endif
-
-ifeq ($(PROC),ppc)
- _ASTCFLAGS+=-fsigned-char
-endif
-
ifeq ($(OSARCH),FreeBSD)
- ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
- ifeq ($(PROC),i386)
- _ASTCFLAGS+=-march=i686
- endif
- endif
# -V is understood by BSD Make, not by GNU make.
BSDVERSION=$(shell make -V OSVERSION -f /usr/share/mk/bsd.port.subdir.mk)
_ASTCFLAGS+=$(shell if test $(BSDVERSION) -lt 500016 ; then echo "-D_THREAD_SAFE"; fi)
Modified: branches/10/Makefile.rules
URL: http://svnview.digium.com/svn/asterisk/branches/10/Makefile.rules?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/Makefile.rules (original)
+++ branches/10/Makefile.rules Thu Apr 12 10:01:13 2012
@@ -84,6 +84,10 @@
COMPILE_DOUBLE=yes
endif
+ifeq ($(findstring BUILD_NATIVE,$(MENUSELECT_CFLAGS)),BUILD_NATIVE)
+ _ASTCFLAGS+=-march=native
+endif
+
%.o: %.s
$(ECHO_PREFIX) echo " [AS] $< -> $@"
ifeq ($(COMPILE_DOUBLE),yes)
Modified: branches/10/build_tools/cflags.xml
URL: http://svnview.digium.com/svn/asterisk/branches/10/build_tools/cflags.xml?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/build_tools/cflags.xml (original)
+++ branches/10/build_tools/cflags.xml Thu Apr 12 10:01:13 2012
@@ -80,4 +80,9 @@
<member name="INTEGER_CALLERID" displayname="Use the (less accurate) integer-based method for decoding FSK tones (for embedded systems)">
<support_level>extended</support_level>
</member>
+ <member name="BUILD_NATIVE" displayname="Allow compiler to generate code optimized for the CPU on which the build is performed.">
+ <support_level>core</support_level>
+ <defaultenabled>yes</defaultenabled>
+ <depend>native_arch</depend>
+ </member>
</category>
Modified: branches/10/build_tools/menuselect-deps.in
URL: http://svnview.digium.com/svn/asterisk/branches/10/build_tools/menuselect-deps.in?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/build_tools/menuselect-deps.in (original)
+++ branches/10/build_tools/menuselect-deps.in Thu Apr 12 10:01:13 2012
@@ -66,3 +66,4 @@
WINARCH=@PBX_WINARCH@
ZLIB=@PBX_ZLIB@
TIMERFD=@PBX_TIMERFD@
+NATIVE_ARCH=@AST_NATIVE_ARCH@
Modified: branches/10/codecs/gsm/Makefile
URL: http://svnview.digium.com/svn/asterisk/branches/10/codecs/gsm/Makefile?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/codecs/gsm/Makefile (original)
+++ branches/10/codecs/gsm/Makefile Thu Apr 12 10:01:13 2012
@@ -43,35 +43,6 @@
ifeq ($(shell $(CC) -v 2>&1 | awk '/^gcc version/ { split($$3, v, "."); printf "%s.%s\n", v[1], v[2]; }' ),4.2)
OPTIMIZE=-O2
-endif
-
-# If the compiler's '-march' flag has been specified already, then assume it's a value
-# that is what the user wants (or has been determined by the configure script). If not,
-# do some simple logic to set a decent value
-ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
- ifeq (,$(findstring $(shell uname -s),Darwin SunOS))
- ifeq (,$(strip $(findstring $(PROC) ,"x86_64 amd64 ultrasparc sparc64 arm armv5b armeb ppc powerpc ppc64 ia64 s390 bfin mipsel mips ")))
- ifeq (,$(strip $(findstring $(shell uname -m) ,"ppc ppc64 alpha armv4l s390 ")))
- OPTIMIZE+=-march=$(PROC)
- endif
- endif
- else
- ifneq (,$(findstring $(OSARCH),Darwin))
- ifeq ($(shell if test `/usr/bin/sw_vers -productVersion | cut -c4` -gt 5; then echo 6; else echo 0; fi),6)
- # Snow Leopard/Lion reports i386, even though it's really x86_64
- OPTIMIZE+=-mtune=native
- endif
- endif
- endif
-
- #The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
- #This works for even old (2.96) versions of gcc and provides a small boost either way.
- #A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn't support it.
- #So we go lowest common available by gcc and go a step down, still a step up from
- #the default as we now have a better instruction set to work with. - Belgarath
- ifeq ($(PROC),ultrasparc)
- OPTIMIZE+=-mcpu=v8 -mtune=$(PROC) -O3
- endif
endif
PG =
@@ -224,17 +195,6 @@
$(SRC)/short_term.c \
$(SRC)/table.c
-# add k6-specific code only if not on a non-k6 hardware or proc.
-# XXX Keep a space after each findstring argument
-# XXX should merge with GSM_OBJECTS
-ifeq ($(OSARCH),linux-gnu)
-ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc s390 ))
-ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 s390 bfin mipsel mips ))
-GSM_SOURCES+= $(SRC)/k6opt.s
-endif
-endif
-endif
-
TOAST_SOURCES = $(SRC)/toast.c \
$(SRC)/toast_lin.c \
$(SRC)/toast_ulaw.c \
@@ -279,14 +239,6 @@
$(SRC)/short_term.o \
$(SRC)/table.o
-ifeq ($(OSARCH),linux-gnu)
-ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc ))
-ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 bfin mipsel mips ))
-GSM_OBJECTS+= $(SRC)/k6opt.o
-endif
-endif
-endif
-
TOAST_OBJECTS = $(SRC)/toast.o \
$(SRC)/toast_lin.o \
$(SRC)/toast_ulaw.o \
Modified: branches/10/codecs/gsm/src/k6opt.s
URL: http://svnview.digium.com/svn/asterisk/branches/10/codecs/gsm/src/k6opt.s?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/codecs/gsm/src/k6opt.s (original)
+++ branches/10/codecs/gsm/src/k6opt.s Thu Apr 12 10:01:13 2012
@@ -1,739 +1,0 @@
- .file "k6opt.s"
- .version "01.01"
-/* gcc2_compiled.: */
-.section .rodata
- .align 4
- .type coefs, at object
- .size coefs,24
-coefs:
- .value -134
- .value -374
- .value 0
- .value 2054
- .value 5741
- .value 8192
- .value 5741
- .value 2054
- .value 0
- .value -374
- .value -134
- .value 0
-.text
- .align 4
-/* void Weighting_filter (const short *e, short *x) */
-.globl Weighting_filter
- .type Weighting_filter, at function
-Weighting_filter:
- pushl %ebp
- movl %esp,%ebp
- pushl %edi
- pushl %esi
- pushl %ebx
- movl 12(%ebp),%edi
- movl 8(%ebp),%ebx
- addl $-10,%ebx
- emms
- movl $0x1000,%eax; movd %eax,%mm5 /* for rounding */
- movq coefs,%mm1
- movq coefs+8,%mm2
- movq coefs+16,%mm3
- xorl %esi,%esi
- .p2align 2
-.L21:
- movq (%ebx,%esi,2),%mm0
- pmaddwd %mm1,%mm0
-
- movq 8(%ebx,%esi,2),%mm4
- pmaddwd %mm2,%mm4
- paddd %mm4,%mm0
-
- movq 16(%ebx,%esi,2),%mm4
- pmaddwd %mm3,%mm4
- paddd %mm4,%mm0
-
- movq %mm0,%mm4
- punpckhdq %mm0,%mm4 /* mm4 has high int32 of mm0 dup'd */
- paddd %mm4,%mm0;
-
- paddd %mm5,%mm0 /* add for roundoff */
- psrad $13,%mm0
- packssdw %mm0,%mm0
- movd %mm0,%eax /* ax has result */
- movw %ax,(%edi,%esi,2)
- incl %esi
- cmpl $39,%esi
- jle .L21
- emms
- popl %ebx
- popl %esi
- popl %edi
- leave
- ret
-.Lfe1:
- .size Weighting_filter,.Lfe1-Weighting_filter
-
-.macro ccstep n
-.if \n
- movq \n(%edi),%mm1
- movq \n(%esi),%mm2
-.else
- movq (%edi),%mm1
- movq (%esi),%mm2
-.endif
- pmaddwd %mm2,%mm1
- paddd %mm1,%mm0
-.endm
-
- .align 4
-/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */
-.globl k6maxcc
- .type k6maxcc, at function
-k6maxcc:
- pushl %ebp
- movl %esp,%ebp
- pushl %edi
- pushl %esi
- pushl %ebx
- emms
- movl 8(%ebp),%edi
- movl 12(%ebp),%esi
- movl $0,%edx /* will be maximum inner-product */
- movl $40,%ebx
- movl %ebx,%ecx /* will be index of max inner-product */
- subl $80,%esi
- .p2align 2
-.L41:
- movq (%edi),%mm0
- movq (%esi),%mm2
- pmaddwd %mm2,%mm0
- ccstep 8
- ccstep 16
- ccstep 24
- ccstep 32
- ccstep 40
- ccstep 48
- ccstep 56
- ccstep 64
- ccstep 72
-
- movq %mm0,%mm1
- punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */
- paddd %mm1,%mm0;
- movd %mm0,%eax /* eax has result */
-
- cmpl %edx,%eax
- jle .L40
- movl %eax,%edx
- movl %ebx,%ecx
- .p2align 2
-.L40:
- subl $2,%esi
- incl %ebx
- cmpl $120,%ebx
- jle .L41
- movl 16(%ebp),%eax
- movw %cx,(%eax)
- movl %edx,%eax
- emms
- popl %ebx
- popl %esi
- popl %edi
- leave
- ret
-.Lfe2:
- .size k6maxcc,.Lfe2-k6maxcc
-
-
- .align 4
-/* long k6iprod (const short *p, const short *q, int n) */
-.globl k6iprod
- .type k6iprod, at function
-k6iprod:
- pushl %ebp
- movl %esp,%ebp
- pushl %edi
- pushl %esi
- emms
- pxor %mm0,%mm0
- movl 8(%ebp),%esi
- movl 12(%ebp),%edi
- movl 16(%ebp),%eax
- leal -32(%esi,%eax,2),%edx /* edx = top - 32 */
-
- cmpl %edx,%esi; ja .L202
-
- .p2align 2
-.L201:
- ccstep 0
- ccstep 8
- ccstep 16
- ccstep 24
-
- addl $32,%esi
- addl $32,%edi
- cmpl %edx,%esi; jbe .L201
-
- .p2align 2
-.L202:
- addl $24,%edx /* now edx = top-8 */
- cmpl %edx,%esi; ja .L205
-
- .p2align 2
-.L203:
- ccstep 0
-
- addl $8,%esi
- addl $8,%edi
- cmpl %edx,%esi; jbe .L203
-
- .p2align 2
-.L205:
- addl $4,%edx /* now edx = top-4 */
- cmpl %edx,%esi; ja .L207
-
- movd (%edi),%mm1
- movd (%esi),%mm2
- pmaddwd %mm2,%mm1
- paddd %mm1,%mm0
-
- addl $4,%esi
- addl $4,%edi
-
- .p2align 2
-.L207:
- addl $2,%edx /* now edx = top-2 */
- cmpl %edx,%esi; ja .L209
-
- movswl (%edi),%eax
- movd %eax,%mm1
- movswl (%esi),%eax
- movd %eax,%mm2
- pmaddwd %mm2,%mm1
- paddd %mm1,%mm0
-
- .p2align 2
-.L209:
- movq %mm0,%mm1
- punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */
- paddd %mm1,%mm0;
- movd %mm0,%eax /* eax has result */
-
- emms
- popl %esi
- popl %edi
- leave
- ret
-.Lfe3:
- .size k6iprod,.Lfe3-k6iprod
-
-
- .align 4
-/* void k6vsraw P3((short *p, int n, int bits) */
-.globl k6vsraw
- .type k6vsraw, at function
-k6vsraw:
- pushl %ebp
- movl %esp,%ebp
- pushl %esi
- movl 8(%ebp),%esi
- movl 16(%ebp),%ecx
- andl %ecx,%ecx; jle .L399
- movl 12(%ebp),%eax
- leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
- emms
- movd %ecx,%mm3
- movq ones,%mm2
- psllw %mm3,%mm2; psrlw $1,%mm2
- cmpl %edx,%esi; ja .L306
-
- .p2align 2
-.L302: /* 8 words per iteration */
- movq (%esi),%mm0
- movq 8(%esi),%mm1
- paddsw %mm2,%mm0
- psraw %mm3,%mm0;
- paddsw %mm2,%mm1
- psraw %mm3,%mm1;
- movq %mm0,(%esi)
- movq %mm1,8(%esi)
- addl $16,%esi
- cmpl %edx,%esi
- jbe .L302
-
- .p2align 2
-.L306:
- addl $12,%edx /* now edx = top-4 */
- cmpl %edx,%esi; ja .L310
-
- .p2align 2
-.L308: /* do up to 6 words, two at a time */
- movd (%esi),%mm0
- paddsw %mm2,%mm0
- psraw %mm3,%mm0;
- movd %mm0,(%esi)
- addl $4,%esi
- cmpl %edx,%esi
- jbe .L308
-
- .p2align 2
-.L310:
- addl $2,%edx /* now edx = top-2 */
- cmpl %edx,%esi; ja .L315
-
- movzwl (%esi),%eax
- movd %eax,%mm0
- paddsw %mm2,%mm0
- psraw %mm3,%mm0;
- movd %mm0,%eax
- movw %ax,(%esi)
-
- .p2align 2
-.L315:
- emms
-.L399:
- popl %esi
- leave
- ret
-.Lfe4:
- .size k6vsraw,.Lfe4-k6vsraw
-
- .align 4
-/* void k6vsllw P3((short *p, int n, int bits) */
-.globl k6vsllw
- .type k6vsllw, at function
-k6vsllw:
- pushl %ebp
- movl %esp,%ebp
- pushl %esi
- movl 8(%ebp),%esi
- movl 16(%ebp),%ecx
- andl %ecx,%ecx; jle .L499
- movl 12(%ebp),%eax
- leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
- emms
- movd %ecx,%mm3
- cmpl %edx,%esi; ja .L406
-
- .p2align 2
-.L402: /* 8 words per iteration */
- movq (%esi),%mm0
- movq 8(%esi),%mm1
- psllw %mm3,%mm0;
- psllw %mm3,%mm1;
- movq %mm0,(%esi)
- movq %mm1,8(%esi)
- addl $16,%esi
- cmpl %edx,%esi
- jbe .L402
-
- .p2align 2
-.L406:
- addl $12,%edx /* now edx = top-4 */
- cmpl %edx,%esi; ja .L410
-
- .p2align 2
-.L408: /* do up to 6 words, two at a time */
- movd (%esi),%mm0
- psllw %mm3,%mm0;
- movd %mm0,(%esi)
- addl $4,%esi
- cmpl %edx,%esi
- jbe .L408
-
- .p2align 2
-.L410:
- addl $2,%edx /* now edx = top-2 */
- cmpl %edx,%esi; ja .L415
-
- movzwl (%esi),%eax
- movd %eax,%mm0
- psllw %mm3,%mm0;
- movd %mm0,%eax
- movw %ax,(%esi)
-
- .p2align 2
-.L415:
- emms
-.L499:
- popl %esi
- leave
- ret
-.Lfe5:
- .size k6vsllw,.Lfe5-k6vsllw
-
-
-.section .rodata
- .align 4
- .type extremes, at object
- .size extremes,8
-extremes:
- .long 0x80008000
- .long 0x7fff7fff
- .type ones, at object
- .size ones,8
-ones:
- .long 0x00010001
- .long 0x00010001
-
-.text
- .align 4
-/* long k6maxmin (const short *p, int n, short *out) */
-.globl k6maxmin
- .type k6maxmin, at function
-k6maxmin:
- pushl %ebp
- movl %esp,%ebp
- pushl %esi
- emms
- movl 8(%ebp),%esi
- movl 12(%ebp),%eax
- leal -8(%esi,%eax,2),%edx
-
- cmpl %edx,%esi
- jbe .L52
- movd extremes,%mm0
- movd extremes+4,%mm1
- jmp .L58
-
- .p2align 2
-.L52:
- movq (%esi),%mm0 /* mm0 will be max's */
- movq %mm0,%mm1 /* mm1 will be min's */
- addl $8,%esi
- cmpl %edx,%esi
- ja .L56
-
- .p2align 2
-.L54:
- movq (%esi),%mm2
-
- movq %mm2,%mm3
- pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
- movq %mm3,%mm4
- pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */
- pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */
- por %mm3,%mm4
- movq %mm4,%mm0 /* now mm0 is updated max's */
-
- movq %mm1,%mm3
- pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
- pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
- por %mm3,%mm2
- movq %mm2,%mm1 /* now mm1 is updated min's */
-
- addl $8,%esi
- cmpl %edx,%esi
- jbe .L54
-
- .p2align 2
-.L56: /* merge down the 4-word max/mins to lower 2 words */
-
- movq %mm0,%mm2
- psrlq $32,%mm2
- movq %mm2,%mm3
- pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */
- pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */
- por %mm3,%mm2
- movq %mm2,%mm0 /* now mm0 is updated max's */
-
- movq %mm1,%mm2
- psrlq $32,%mm2
- movq %mm1,%mm3
- pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
- pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
- por %mm3,%mm2
- movq %mm2,%mm1 /* now mm1 is updated min's */
-
- .p2align 2
-.L58:
- addl $4,%edx /* now dx = top-4 */
- cmpl %edx,%esi
- ja .L62
- /* here, there are >= 2 words of input remaining */
- movd (%esi),%mm2
-
- movq %mm2,%mm3
- pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
- movq %mm3,%mm4
- pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */
- pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */
- por %mm3,%mm4
- movq %mm4,%mm0 /* now mm0 is updated max's */
-
- movq %mm1,%mm3
- pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
- pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
- por %mm3,%mm2
- movq %mm2,%mm1 /* now mm1 is updated min's */
-
- addl $4,%esi
-
- .p2align 2
-.L62:
- /* merge down the 2-word max/mins to 1 word */
-
- movq %mm0,%mm2
- psrlq $16,%mm2
- movq %mm2,%mm3
- pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */
- pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */
- por %mm3,%mm2
- movd %mm2,%ecx /* cx is max so far */
-
- movq %mm1,%mm2
- psrlq $16,%mm2
- movq %mm1,%mm3
- pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
- pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
- pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
- por %mm3,%mm2
- movd %mm2,%eax /* ax is min so far */
-
- addl $2,%edx /* now dx = top-2 */
- cmpl %edx,%esi
- ja .L65
-
- /* here, there is one word of input left */
- cmpw (%esi),%cx
- jge .L64
- movw (%esi),%cx
- .p2align 2
-.L64:
- cmpw (%esi),%ax
- jle .L65
- movw (%esi),%ax
-
- .p2align 2
-.L65: /* (finally!) cx is the max, ax the min */
- movswl %cx,%ecx
- movswl %ax,%eax
-
- movl 16(%ebp),%edx /* ptr to output max,min vals */
- andl %edx,%edx; jz .L77
- movw %cx,(%edx) /* max */
- movw %ax,2(%edx) /* min */
- .p2align 2
-.L77:
- /* now calculate max absolute val */
- negl %eax
- cmpl %ecx,%eax
- jge .L81
- movl %ecx,%eax
- .p2align 2
-.L81:
- emms
- popl %esi
- leave
- ret
-.Lfe6:
- .size k6maxmin,.Lfe6-k6maxmin
-
-/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
- .equiv pm_u0,8
- .equiv pm_rp0,12
- .equiv pm_kn,16
- .equiv pm_s,20
- .equiv lv_u_top,-4
- .equiv lv_s_top,-8
- .equiv lv_rp,-40 /* local version of rp0 with each word twice */
- .align 4
-.globl Short_term_analysis_filteringx
- .type Short_term_analysis_filteringx, at function
-Short_term_analysis_filteringx:
- pushl %ebp
- movl %esp,%ebp
- subl $40,%esp
- pushl %edi
- pushl %esi
-
- movl pm_rp0(%ebp),%esi;
- leal lv_rp(%ebp),%edi;
- cld
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- emms
- movl $0x4000,%eax;
- movd %eax,%mm4;
- punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */
-
- movl pm_u0(%ebp),%eax
- addl $16,%eax
- movl %eax,lv_u_top(%ebp) /* UTOP */
- movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */
- movl pm_kn(%ebp),%eax
- leal (%edx,%eax,2),%eax
- movl %eax,lv_s_top(%ebp)
- cmpl %eax,%edx
- jae .L179
- .p2align 2
-.L181:
- leal lv_rp(%ebp),%esi /* RP */
- movl pm_u0(%ebp),%edi /* U */
- movw (%edx),%ax /* (0,DI) */
- roll $16,%eax
- movw (%edx),%ax /* (DI,DI) */
- .p2align 2
-.L185: /* RP is %esi */
- movl %eax,%ecx
- movw (%edi),%ax /* (DI,U) */
- movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
- movw %cx,(%edi)
-
- movd %eax,%mm2 /* mm2 is (0,0,DI,U) */
- rorl $16,%eax
- movd %eax,%mm1 /* mm1 is (0,0,U,DI) */
-
- movq %mm1,%mm0
- pmullw %mm3,%mm0
- pmulhw %mm3,%mm1
- punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
- paddd %mm4,%mm0 /* mm4 is 0x00004000,0x00004000 */
- psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */
- packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
- paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */
- movd %mm0,%eax /* (DI,U') */
-
- addl $2,%edi
- addl $4,%esi
- cmpl lv_u_top(%ebp),%edi
- jb .L185
-
- rorl $16,%eax
- movw %ax,(%edx) /* last DI goes to *s */
- addl $2,%edx /* next s */
- cmpl lv_s_top(%ebp),%edx
- jb .L181
- .p2align 2
-.L179:
- emms
- popl %esi
- popl %edi
- leave
- ret
-.Lfe7:
- .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
-
-.end
-
-/* 'as' macro's seem to be case-insensitive */
-.macro STEP n
-.if \n
- movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */
-.else
- movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
-.endif
- movq %mm5,%mm1;
- movd %mm4,%ecx; movw %cx,%ax /* (DI,U) */
- psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4
- psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5
-
- movd %eax,%mm2 /* mm2 is (0,0,DI,U) */
- rorl $16,%eax
- movd %eax,%mm1 /* mm1 is (0,0,U,DI) */
-
- movq %mm1,%mm0
- pmullw %mm3,%mm0
- pmulhw %mm3,%mm1
- punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
- paddd %mm6,%mm0 /* mm6 is 0x00004000,0x00004000 */
- psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */
- packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
- paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */
- movd %mm0,%eax /* (DI,U') */
-.endm
-
-/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
- .equiv pm_u0,8
- .equiv pm_rp0,12
- .equiv pm_kn,16
- .equiv pm_s,20
- .equiv lv_rp_top,-4
- .equiv lv_s_top,-8
- .equiv lv_rp,-40 /* local version of rp0 with each word twice */
- .align 4
-.globl Short_term_analysis_filteringx
- .type Short_term_analysis_filteringx, at function
-Short_term_analysis_filteringx:
- pushl %ebp
- movl %esp,%ebp
- subl $56,%esp
- pushl %edi
- pushl %esi
- pushl %ebx
-
- movl pm_rp0(%ebp),%esi;
- leal lv_rp(%ebp),%edi;
- cld
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- lodsw; stosw; stosw
- movl %edi,lv_rp_top(%ebp)
- emms
-
- movl $0x4000,%eax;
- movd %eax,%mm6;
- punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */
-
- movl pm_u0(%ebp),%ebx
- movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */
- movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */
- movl pm_kn(%ebp),%eax
- leal (%edx,%eax,2),%eax
- movl %eax,lv_s_top(%ebp)
- cmpl %eax,%edx
- jae .L179
- .p2align 2
-.L181:
- leal lv_rp(%ebp),%esi /* RP */
- movw (%edx),%ax /* (0,DI) */
- roll $16,%eax
- movw (%edx),%ax /* (DI,DI) */
- movd %eax,%mm0
- .p2align 2
-.L185: /* RP is %esi */
- step 0
- step 4
- step 8
- step 12
-/*
- step 16
- step 20
- step 24
- step 28
-*/
- addl $16,%esi
- cmpl lv_rp_top(%ebp),%esi
- jb .L185
-
- rorl $16,%eax
- movw %ax,(%edx) /* last DI goes to *s */
- addl $2,%edx /* next s */
- cmpl lv_s_top(%ebp),%edx
- jb .L181
-.L179:
- movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */
- emms
- popl %ebx
- popl %esi
- popl %edi
- leave
- ret
-.Lfe7:
- .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
- .ident "GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)"
Modified: branches/10/codecs/lpc10/Makefile
URL: http://svnview.digium.com/svn/asterisk/branches/10/codecs/lpc10/Makefile?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/codecs/lpc10/Makefile (original)
+++ branches/10/codecs/lpc10/Makefile Thu Apr 12 10:01:13 2012
@@ -27,37 +27,6 @@
# fails miserably. Remove it for the time being.
_ASTCFLAGS:=$(_ASTCFLAGS:-Werror=)
-# If the compiler's '-march' flag has been specified already, then assume it's a value
-# that is what the user wants (or has been determined by the configure script). If not,
-# do some simple logic to set a decent value
-ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
- #fix for PPC processors and ALPHA, And UltraSparc too
- ifneq ($(OSARCH),Darwin)
- ifneq ($(findstring BSD,${OSARCH}),BSD)
- ifneq ($(PROC),ppc)
- ifneq ($(PROC),x86_64)
- ifneq ($(PROC),alpha)
- #The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
- #This works for even old (2.96) versions of gcc and provides a small boost either way.
- #A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn.t support it.
- #So we go lowest common available by gcc and go a step down, still a step up from
- #the default as we now have a better instruction set to work with. - Belgarath
- ifeq ($(PROC),ultrasparc)
- CFLAGS+= -mtune=$(PROC) -mcpu=v8 -O3 -fomit-frame-pointer
- else
- ifneq ($(OSARCH),SunOS)
- ifneq ($(OSARCH),arm)
- # CFLAGS+= -march=$(PROC)
- endif
- endif
- endif
- endif
- endif
- endif
- endif
- endif
-endif
-
LIB = $(LIB_TARGET_DIR)/liblpc10.a
.PHONY: all clean
Modified: branches/10/configure.ac
URL: http://svnview.digium.com/svn/asterisk/branches/10/configure.ac?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/configure.ac (original)
+++ branches/10/configure.ac Thu Apr 12 10:01:13 2012
@@ -993,15 +993,20 @@
fi
AC_SUBST(AST_SHADOW_WARNINGS)
-AC_MSG_CHECKING(for -march=native)
+AC_MSG_CHECKING(for -march=native support)
if $(${CC} -march=native -S -o /dev/null -xc /dev/null > /dev/null 2>&1); then
- AC_MSG_RESULT(yes)
- AST_MARCH_NATIVE="-march=native"
+ if test "${CONFIG_CFLAGS}" = ""; then
+ AC_MSG_RESULT(yes)
+ AST_NATIVE_ARCH=1
+ else
+ AC_MSG_RESULT(user CFLAGS present)
+ AST_NATIVE_ARCH=
+ fi
else
AC_MSG_RESULT(no)
- AST_MARCH_NATIVE=
-fi
-AC_SUBST(AST_MARCH_NATIVE)
+ AST_NATIVE_ARCH=
+fi
+AC_SUBST(AST_NATIVE_ARCH)
AC_MSG_CHECKING(for sysinfo)
AC_LINK_IFELSE(
Modified: branches/10/makeopts.in
URL: http://svnview.digium.com/svn/asterisk/branches/10/makeopts.in?view=diff&rev=361956&r1=361955&r2=361956
==============================================================================
--- branches/10/makeopts.in (original)
+++ branches/10/makeopts.in Thu Apr 12 10:01:13 2012
@@ -45,7 +45,6 @@
HOST_VENDOR=@HOST_VENDOR@
HOST_OS=@HOST_OS@
-PROC=@HOST_CPU@
OSARCH=@OSARCH@
OSREV=@PBX_OSREV@
More information about the asterisk-commits
mailing list