The versions with inline assembly are in fact slower on the machines I tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD Opteron 270). The i386-version needed a fix similar to06024f21to avoid crashing the benchmark. Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size 1...512, for each possible bitmap with one bit set, for each possible offset: find the position of the first bit starting at offset. If you follow ;). Times include setup of the bitmap and checking of the results. Athlon Xeon Opteron 32/64bit x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s If the bitmap size is not a multiple of BITS_PER_LONG, and no set (cleared) bit is found, find_next_bit (find_next_zero_bit) returns a value outside of the range [0, size]. The generic version always returns exactly size. The generic version also uses unsigned long everywhere, while the x86 versions use a mishmash of int, unsigned (int), long and unsigned long. Using the generic version does give a slightly bigger kernel, though. defconfig: text data bss dec hex filename x86-specific:4738555481232 626688 5846475 5935cb vmlinux (32 bit) generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit) x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit) generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit) Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm> Signed-off-by: Ingo Molnar <mingo@elte.hu>
108 lines
2.8 KiB
C
108 lines
2.8 KiB
C
#include <linux/bitops.h>
|
|
|
|
#undef find_first_zero_bit
|
|
#undef find_first_bit
|
|
|
|
static inline long
|
|
__find_first_zero_bit(const unsigned long * addr, unsigned long size)
|
|
{
|
|
long d0, d1, d2;
|
|
long res;
|
|
|
|
/*
|
|
* We must test the size in words, not in bits, because
|
|
* otherwise incoming sizes in the range -63..-1 will not run
|
|
* any scasq instructions, and then the flags used by the je
|
|
* instruction will have whatever random value was in place
|
|
* before. Nobody should call us like that, but
|
|
* find_next_zero_bit() does when offset and size are at the
|
|
* same word and it fails to find a zero itself.
|
|
*/
|
|
size += 63;
|
|
size >>= 6;
|
|
if (!size)
|
|
return 0;
|
|
asm volatile(
|
|
" repe; scasq\n"
|
|
" je 1f\n"
|
|
" xorq -8(%%rdi),%%rax\n"
|
|
" subq $8,%%rdi\n"
|
|
" bsfq %%rax,%%rdx\n"
|
|
"1: subq %[addr],%%rdi\n"
|
|
" shlq $3,%%rdi\n"
|
|
" addq %%rdi,%%rdx"
|
|
:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
|
|
:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
|
|
[addr] "S" (addr) : "memory");
|
|
/*
|
|
* Any register would do for [addr] above, but GCC tends to
|
|
* prefer rbx over rsi, even though rsi is readily available
|
|
* and doesn't have to be saved.
|
|
*/
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* find_first_zero_bit - find the first zero bit in a memory region
|
|
* @addr: The address to start the search at
|
|
* @size: The maximum size to search
|
|
*
|
|
* Returns the bit-number of the first zero bit, not the number of the byte
|
|
* containing a bit.
|
|
*/
|
|
long find_first_zero_bit(const unsigned long * addr, unsigned long size)
|
|
{
|
|
return __find_first_zero_bit (addr, size);
|
|
}
|
|
|
|
static inline long
|
|
__find_first_bit(const unsigned long * addr, unsigned long size)
|
|
{
|
|
long d0, d1;
|
|
long res;
|
|
|
|
/*
|
|
* We must test the size in words, not in bits, because
|
|
* otherwise incoming sizes in the range -63..-1 will not run
|
|
* any scasq instructions, and then the flags used by the jz
|
|
* instruction will have whatever random value was in place
|
|
* before. Nobody should call us like that, but
|
|
* find_next_bit() does when offset and size are at the same
|
|
* word and it fails to find a one itself.
|
|
*/
|
|
size += 63;
|
|
size >>= 6;
|
|
if (!size)
|
|
return 0;
|
|
asm volatile(
|
|
" repe; scasq\n"
|
|
" jz 1f\n"
|
|
" subq $8,%%rdi\n"
|
|
" bsfq (%%rdi),%%rax\n"
|
|
"1: subq %[addr],%%rdi\n"
|
|
" shlq $3,%%rdi\n"
|
|
" addq %%rdi,%%rax"
|
|
:"=a" (res), "=&c" (d0), "=&D" (d1)
|
|
:"0" (0ULL), "1" (size), "2" (addr),
|
|
[addr] "r" (addr) : "memory");
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* find_first_bit - find the first set bit in a memory region
|
|
* @addr: The address to start the search at
|
|
* @size: The maximum size to search
|
|
*
|
|
* Returns the bit-number of the first set bit, not the number of the byte
|
|
* containing a bit.
|
|
*/
|
|
long find_first_bit(const unsigned long * addr, unsigned long size)
|
|
{
|
|
return __find_first_bit(addr,size);
|
|
}
|
|
|
|
#include <linux/module.h>
|
|
|
|
EXPORT_SYMBOL(find_first_bit);
|
|
EXPORT_SYMBOL(find_first_zero_bit);
|