| Register | FAQ | Calendar | Search | Today's Posts | Mark Forums Read |
|
#1
| |||
| |||
| Below is a piece of bit manipulation code. here in function "eval_output_out" there are some macros. Say we have to versions of the same code 1. variable "b" in "eval_output_out" is initialized 2. variable "b" in "eval_output_out" is NOT initialized code expands such that it is independent of past value of variable "b", i dumped pre processed output of two versions and there was just one difference of b initialized in one and not in other. QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc. if i just dump the assmebly of two versions using "gcc -S" behavior is expected which is version #1 (with initialization) has one extra instruction to initialize variable. But when i dump the assembly of two versions with "gcc -O3 -S" assembly of version 2 (without initialization) takes 4 extra instruction. can somebody please explain me why is this happening.?? ASSEMBLY OF VERSION #1 [ WITH INITIALIZATION] ------------------------------------------------ ..globl eval_output_out .type eval_output_out,@function eval_output_out: pushl %ebp movl width_masks+4, %edx movl %esp, %ebp movl 8(%ebp), %eax movl %edx, %ecx notl %edx andl 4(%eax), %edx andl (%eax), %ecx movl %edx, 4(%eax) sall $2, %ecx andl width_masks+4, %ecx orl %ecx, %edx movl %edx, 4(%eax) leave ret ASSEMBLY OF VERSION #2 [ WITH INITIALIZATION] ------------------------------------------------ ..globl eval_output_out .type eval_output_out,@function eval_output_out: movl width_masks+4, %edx pushl %ebp leal 0(,%edx,4), %eax movl %esp, %ebp notl %eax pushl %ebx movl 8(%ebp), %ecx andl %eax, %ebx movl %edx, %eax andl (%ecx), %eax notl %edx andl 4(%ecx), %edx sall $2, %eax movl %edx, 4(%ecx) orl %eax, %ebx andl width_masks+4, %ebx orl %ebx, %edx movl %edx, 4(%ecx) movl (%esp), %ebx leave ret SOURCE CODE ------------------------------------ static unsigned int width_masks[32] = {0x1,0x3,0x7,0xF,0x1F,0x3F,0x7F, 0xFF,0x1FF,0x3FF,0x7FF,0xFFF,0x1FFF,0x3FFF,0x7FFF, 0xFFFF,0x1FFFF, 0x3FFFF,0x7FFFF,0xFFFFF,0x1FFFFF,0x3FFFFF,0x7FFFFF ,0xFFFFFF,0x1FFFFFF, 0x3FFFFFF,0x7FFFFFF,0xFFFFFFF,0x1FFFFFFF,0x3FFFFFF F,0x7FFFFFFF, 0xFFFFFFFF}; typedef struct _top_test_model{ unsigned int inputs[1]; unsigned int outputs[1]; }top_test_model; #define LOGICAL_RIGHT_SHIFT(x,y) (((y) >= 32)?0x0 x)>>(y))#define get_arr_var_bits(reg_int, lsb_row, lsb_col, width) (LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << lsb_col) & reg_int[lsb_row]),(lsb_col))) #define reset_arr_var_bits(reg_int, lsb_row, lsb_col, width) (reg_int[lsb_row] &= ~(width_masks[width-1] << (lsb_col))) #define set_arr_var_bits(reg_int, lsb_row, lsb_col, width, val) (reg_int[lsb_row] = reset_arr_var_bits(reg_int, lsb_row, lsb_col, width) | ((val) << (lsb_col))) #define get_var_bits(reg_int, lsb_col, width) (LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << (lsb_col)) & (reg_int)), (lsb_col))) #define rst_bits(reg_int, lsb_col, width) ((reg_int) &= ~(width_masks[width-1] << (lsb_col))) #define set_var_bits(reg_int, lsb_col, width, val) (reg_int = (rst_bits(reg_int, lsb_col, width)) | ((val) << (lsb_col))) void eval_output_out(top_test_model *model){ unsigned int b; /* take 2 bits starting from bit position 0 from variable model- >inputs[0] and set them in 2 bits in variable "b" starting from bit position from 2*/ set_var_bits(b,2,2,get_arr_var_bits(model->inputs,0,0,2)); /* take 2 bits starting from bit position 0 from variable b and set them in 2 bits in model->outputs[0] starting from bit position 0 */ set_arr_var_bits(model->outputs,0,0,2,get_var_bits(b,0,2)); } int main (int argc, char *argv){ top_test_model t; eval_output_out(&t); } |
|
#2
| |||
| |||
| sh.vipin schrieb: > Below is a piece of bit manipulation code. here in function > "eval_output_out" there are some macros. > Say we have to versions of the same code > 1. variable "b" in "eval_output_out" is initialized Initialized to which value? > 2. variable "b" in "eval_output_out" is NOT initialized > code expands such that it is independent of past value of variable > "b" I assume the compiler didn't manage to realize that. > QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc. You code looks slow (and unclear) to me. You can drop the width_mask array and replace it with width_mask[i] = ~((~0) << (i+1)) , which also works in 64bit mode. You can save shifts if you keep the bits read by get_arr_var_bits into b at the same position. Hendrik vdH |
|
#3
| |||
| |||
| sh.vipin <spamtrap@crayne.org> wrote: > >Below is a piece of bit manipulation code. here in function >"eval_output_out" there are some macros. >Say we have to versions of the same code >1. variable "b" in "eval_output_out" is initialized >2. variable "b" in "eval_output_out" is NOT initialized > >code expands such that it is independent of past value of variable >"b", No, it doesn't. The first line expands to this: (b = (((b) &= ~(width_masks[2-1] << (2)))) | (((((((0)) >= 32)?0x0 ((width_masks[2-1] << 0) & model->inputs[0]))>>((0))))) << (2)));Look at the first three tokens: b = b &= ... That expression depends on the initial value of b. Further, this expression modifies "b" twice without an intervening sequence point. That means the code has undefined behavior in standard C, so it's not meaningful to do any timing analysis with it. -- Tim Roberts, timr@probo.com Providenza & Boekelheide, Inc. |
![]() |
| Thread Tools | |
| Display Modes | |
In an effort to better serve ads to our visitors, cookies are used on objectmix.com. For more information, check out our Privacy Policy.