Files
android_kernel_samsung_sm8750/arch/x86/lib/bitops_64.c
Alexander van Heukelum 6fd92b63d0 x86: change x86 to use generic find_next_bit
The versions with inline assembly are in fact slower on the machines I
tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD
Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid
crashing the benchmark.

Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size
1...512, for each possible bitmap with one bit set, for each possible
offset: find the position of the first bit starting at offset. If you
follow ;). Times include setup of the bitmap and checking of the
results.

		Athlon		Xeon		Opteron 32/64bit
x86-specific:	0m3.692s	0m2.820s	0m3.196s / 0m2.480s
generic:	0m2.622s	0m1.662s	0m2.100s / 0m1.572s

If the bitmap size is not a multiple of BITS_PER_LONG, and no set
(cleared) bit is found, find_next_bit (find_next_zero_bit) returns a
value outside of the range [0, size]. The generic version always returns
exactly size. The generic version also uses unsigned long everywhere,
while the x86 versions use a mishmash of int, unsigned (int), long and
unsigned long.

Using the generic version does give a slightly bigger kernel, though.

defconfig:	   text    data     bss     dec     hex filename
x86-specific:	4738555  481232  626688 5846475  5935cb vmlinux (32 bit)
generic:	4738621  481232  626688 5846541  59360d vmlinux (32 bit)
x86-specific:	5392395  846568  724424 6963387  6a40bb vmlinux (64 bit)
generic:	5392458  846568  724424 6963450  6a40fa vmlinux (64 bit)

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-04-26 19:21:16 +02:00

108 lines
2.8 KiB
C

#include <linux/bitops.h>
#undef find_first_zero_bit
#undef find_first_bit
static inline long
__find_first_zero_bit(const unsigned long * addr, unsigned long size)
{
long d0, d1, d2;
long res;
/*
* We must test the size in words, not in bits, because
* otherwise incoming sizes in the range -63..-1 will not run
* any scasq instructions, and then the flags used by the je
* instruction will have whatever random value was in place
* before. Nobody should call us like that, but
* find_next_zero_bit() does when offset and size are at the
* same word and it fails to find a zero itself.
*/
size += 63;
size >>= 6;
if (!size)
return 0;
asm volatile(
" repe; scasq\n"
" je 1f\n"
" xorq -8(%%rdi),%%rax\n"
" subq $8,%%rdi\n"
" bsfq %%rax,%%rdx\n"
"1: subq %[addr],%%rdi\n"
" shlq $3,%%rdi\n"
" addq %%rdi,%%rdx"
:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
[addr] "S" (addr) : "memory");
/*
* Any register would do for [addr] above, but GCC tends to
* prefer rbx over rsi, even though rsi is readily available
* and doesn't have to be saved.
*/
return res;
}
/**
* find_first_zero_bit - find the first zero bit in a memory region
* @addr: The address to start the search at
* @size: The maximum size to search
*
* Returns the bit-number of the first zero bit, not the number of the byte
* containing a bit.
*/
long find_first_zero_bit(const unsigned long * addr, unsigned long size)
{
return __find_first_zero_bit (addr, size);
}
static inline long
__find_first_bit(const unsigned long * addr, unsigned long size)
{
long d0, d1;
long res;
/*
* We must test the size in words, not in bits, because
* otherwise incoming sizes in the range -63..-1 will not run
* any scasq instructions, and then the flags used by the jz
* instruction will have whatever random value was in place
* before. Nobody should call us like that, but
* find_next_bit() does when offset and size are at the same
* word and it fails to find a one itself.
*/
size += 63;
size >>= 6;
if (!size)
return 0;
asm volatile(
" repe; scasq\n"
" jz 1f\n"
" subq $8,%%rdi\n"
" bsfq (%%rdi),%%rax\n"
"1: subq %[addr],%%rdi\n"
" shlq $3,%%rdi\n"
" addq %%rdi,%%rax"
:"=a" (res), "=&c" (d0), "=&D" (d1)
:"0" (0ULL), "1" (size), "2" (addr),
[addr] "r" (addr) : "memory");
return res;
}
/**
* find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
* @size: The maximum size to search
*
* Returns the bit-number of the first set bit, not the number of the byte
* containing a bit.
*/
long find_first_bit(const unsigned long * addr, unsigned long size)
{
return __find_first_bit(addr,size);
}
#include <linux/module.h>
EXPORT_SYMBOL(find_first_bit);
EXPORT_SYMBOL(find_first_zero_bit);