On Mon, 2021-02-22 at 01:41 -0800, Andrew Pinski wrote:
> On Mon, Feb 22, 2021 at 1:37 AM Michael J. Baars
> <mjbaars1977....@cyberfiber.eu> wrote:
> > On Mon, 2021-02-22 at 01:29 -0800, Andrew Pinski wrote:
> > > On Mon, Feb 22, 2021 at 1:17 AM Michael J. Baars
> > > <mjbaars1977....@cyberfiber.eu> wrote:
> > > > Hi,
> > > > 
> > > > I just wrote this little program to demonstrate a possible flaw in both 
> > > > malloc and calloc.
> > > > 
> > > > If I allocate a the simplest memory region from main(), one out of 
> > > > three optimization flags fail.
> > > > If I allocate the same region from a function, three out of three 
> > > > optimization flags fail.
> > > > 
> > > > Does someone know if this really is a flaw, and if so, is it a gcc or a 
> > > > kernel flaw?
> > > 
> > > There is no flaw.  GCC (kernel, glibc) all assume unaligned accesses
> > > on x86 will not cause an exception.
> > 
> > Is this just an assumption or more like a fact? I agree with you that byte 
> > aligned is more or less the same as unaligned.
> 
> It is an assumption that is even made inside GCC.  You can modify GCC
> not to assume that but you need to recompile all libraries and even
> check the assembly code that is included with most programs.
> Why are you enabling the alignment access check anyways?  What are you
> trying to do?
> If you are looking into a performance issue with unaligned accesses,
> may I suggest you look into perf to see if you can see unaligned
> accesses?

Next to performance and correctness, I always try to keep in mind that every 
clock cycle will eventually end up on the energy bill, to avoid that computers 
cost
ten times more on the energy bill then they do in the store.

If you look at the power consumption of the Playstation 1 vs that of the 
Playstation 3 for example, you will see that the Playstation 1 uses (10 W / 240 
V
= 0.041666667 A max, while the Playstation 3 consumes 240 V * 1.7 A = 408 W. 
More than 40 times as much energy!!!

Code and style always go hand in hand. Try to keep you code as sleek as 
possible and you will see that even an old computer can do a lot more than you 
ever
thought possible :)

Thanks,
Mischa.

> Thanks,
> Andrew
> 
> > > Thanks,
> > > Andrew
> > > 
> > > > Regards,
> > > > Mischa.
#include	<stdint.h>

#include	"compression.h"

uint8_t	data_s[256]     =	{
				0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
				0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
				0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
				0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
				0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
				0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
				0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
				0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
			};
/*
0000000000000000 <compression_encode_prepare1>:
   0:	48 89 f9             	mov    %rdi,%rcx
   3:	31 d2                	xor    %edx,%edx
   5:	b8 00 00 00 01       	mov    $0x1000000,%eax
   a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
  10:	48 83 e8 01          	sub    $0x1,%rax
  14:	75 fa                	jne    10 <compression_encode_prepare1+0x10>
  16:	88 11                	mov    %dl,(%rcx)
  18:	48 83 c2 01          	add    $0x1,%rdx
  1c:	48 83 c1 01          	add    $0x1,%rcx
  20:	48 81 fa 00 01 00 00 	cmp    $0x100,%rdx
  27:	75 dc                	jne    5 <compression_encode_prepare1+0x5>
  29:	c3                   	retq   
  2a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
*/

void	compression_encode_prepare1	(struct compression* c)
{
	for	(uint64_t j = 0; j < (1 << 24); j++)
	for	(uint64_t i = 0; i < 256; i++)
	{
		c->data_t[i]    =	i;
	}
}

void	compression_encode_prepare2	(struct compression* c)
{
	for	(uint64_t j = 0; j < (1 << 24); j++)
	asm	volatile					\
	(						\
	"	lea		%0   , %%rdi	\n"	\
	"	lea		%1   , %%rsi	\n"	\
	"	mov		$0x20, %%rcx	\n"	\
	"	rep		movsq		\n"	\
		: "=m"		(c->data_t)		\
		: "m"		(   data_s)		\
		: "%rcx", "%rsi", "%rdi"			\
	);
}

#ifndef	__COMPRESSION_H__
#define	__COMPRESSION_H__

#include	<stdint.h>

struct	compression
{
	uint8_t	data_t[256];	// compression tree indices
};

extern	void	compression_encode_prepare1	(struct compression* c);
extern	void	compression_encode_prepare2	(struct compression* c);

#endif

#include	<stdint.h>
#include	<stdio.h>
#include	<time.h>

#include	"compression.h"

int	main()
{
	clock_t	tic, toc;
	
	struct	compression	c;
	
	tic    =	clock();

				compression_encode_prepare1	(&c);
					
	toc    =	clock();
	
	for	(uint64_t i = 0; i < 256; i++) printf("%02hhX ", c.data_t[i]); printf("\n");
	
	printf("elapsed compression & encryption: %fs\n", (double) (toc - tic) / (double) CLOCKS_PER_SEC);	
	
	tic    =	clock();

				compression_encode_prepare2	(&c);
					
	toc    =	clock();
	
	for	(uint64_t i = 0; i < 256; i++) printf("%02hhX ", c.data_t[i]); printf("\n");
	
	printf("elapsed compression & encryption: %fs\n", (double) (toc - tic) / (double) CLOCKS_PER_SEC);	
}
all:

	gcc -Ofast -c -g -o compression.o compression.c
	gcc -Ofast -c -g -o main.o        main.c
	gcc -Ofast    -g -o main          main.o compression.o

Reply via email to