/* gmem.h */
#ifndef GMEM_H
#define GMEM_H

/* Gujin is a bootloader, it loads a Linux kernel from cold boot or DOS.
 * Copyright (C) 1999-2013 Etienne Lorrain, fingerprint (2D3AF3EA):
 *   2471 DF64 9DEE 41D4 C8DB 9667 E448 FF8C 2D3A F3EA
 * E-Mail: etienne@gujin.org
 * This work is registered with the UK Copyright Service: Registration No:299755
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

/*
 * Include this file after "user.h" because it uses:
 *       UI.parameter.base_address
 *       UI.parameter.readdelta
 *
 * Those graphic primitives are prefixed by "g".
 */

#if ASSEMBLY_TYPE != ASSEMBLY_DSES
//#define USE_TLS	/* to access %gs: prefixed memory, i.e. graphic memory, see also -mtls-direct-seg-refs */
#endif

#ifndef USE_TLS
/*
 * Some asm() clobber "cc", i.e. the flags. GCC curently always set asm() as
 * clobbering the flags, but it is there just in case...
 *
 * TODO: find a clean way to handle "cld/std" stuff
 */

/*
 * Because the returned value is always expended to 32 bits by GCC,
 * we define RETURN32BITS (only used when ASSEMBLY_TYPE != ASSEMBLY_DSES):
 */
#define RETURN32BITS

/*
 * The effect on execution speed of this should be measured...
 * Also is it better to interleave "push/add/pop/add" or to
 * do "push/pop/add/add" ?
 */
#define USE_PUSHPOP

/*
 * I still do not know which is the fastest, but using the "lea"
 * increase the code size by adding the addr32 prefix.
 */
#if 0
#define ADD_2_TO_REG(reg)	"	lea 2("#reg"),"#reg"	\n"
#define ADD_4_TO_REG(reg)	"	lea 4("#reg"),"#reg"	\n"
#else
#define ADD_2_TO_REG(reg)	"	add $2,"#reg"		\n"
#define ADD_4_TO_REG(reg)	"	add $4,"#reg"		\n"
#endif

/*
 * Another feature of GCC, all graphic memory primitives are
 * "synchronised" (while optimising) to this unexistant variable...
 */
extern union aliasclass_videomemory {
    unsigned BPP8  : 8;
    unsigned BPP24 : 24;
    unsigned BPP32 : 32;
    } video_memory[];
#endif /* USE_TLS */

/**
 ** Real simple primitives:
 **/
#if defined (USE_TLS)
extern __thread unsigned char graphicptr_array[] asm("graphicptr");
/* Set graphicptr@INDNTPOFF = 0; in the linker file
   We just hope to get: movz[bw]l  %gs:graphicptr@INDNTPOFF(%ebx[,%ecx]), %eax */

extern inline unsigned char gpeekb (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *((unsigned char *)&graphicptr_array[offset]);
  }

extern inline unsigned short gpeekw (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *((unsigned short *)&graphicptr_array[offset]);
  }

extern inline unsigned gpeekl (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *((unsigned *)&graphicptr_array[offset]);
  }

extern inline unsigned long long gpeekll (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *((unsigned long long *)&graphicptr_array[offset]);
  }

extern inline void gpokeb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *((unsigned char *)&graphicptr_array[offset]) = value;
  }

extern inline void gORb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *((unsigned char *)&graphicptr_array[offset]) |= value;
  }

extern inline void gANDb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *((unsigned char *)&graphicptr_array[offset]) &= value;
  }

extern inline void gpokew (unsigned offset, unsigned short value)
  {
  offset += UI.parameter.base_address;
  *((unsigned short *)&graphicptr_array[offset]) = value;
  }

extern inline void gpokel (unsigned offset, unsigned value)
  {
  offset += UI.parameter.base_address;
  *((unsigned *)&graphicptr_array[offset]) = value;
  }

extern inline void gpokell (unsigned offset, unsigned long long value)
  {
  offset += UI.parameter.base_address;
  *((unsigned long long *)&graphicptr_array[offset]) = value;
  }

#if !(USER_SUPPORT & VESA_2WINDOWS)
extern inline unsigned char gxchgb (unsigned offset, unsigned char val)
  {
  unsigned char tmp;

  offset += UI.parameter.base_address;
  /* dummy (but forced) read for EGA, then write: */
  tmp = *((volatile unsigned char *)&graphicptr_array[offset]);
  *((unsigned char *)&graphicptr_array[offset]) = val;
  return tmp;
  }
#endif

#elif ASSEMBLY_TYPE == ASSEMBLY_DSES
extern inline unsigned char gpeekb (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *(unsigned char *)offset;
  }

extern inline unsigned short gpeekw (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *(unsigned short *)offset;
  }

extern inline unsigned gpeekl (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *(unsigned *)offset;
  }

extern inline unsigned long long gpeekll (unsigned offset)
  {
  offset += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif
  return *(unsigned long long *)offset;
  }

extern inline void gpokeb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *(unsigned char *)offset = value;
  }

extern inline void gORb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *(unsigned char *)offset |= value;
  }

extern inline void gANDb (unsigned offset, unsigned char value)
  {
  offset += UI.parameter.base_address;
  *(unsigned char *)offset &= value;
  }

extern inline void gpokew (unsigned offset, unsigned short value)
  {
  offset += UI.parameter.base_address;
  *(unsigned short *)offset = value;
  }

extern inline void gpokel (unsigned offset, unsigned value)
  {
  offset += UI.parameter.base_address;
  *(unsigned *)offset = value;
  }

extern inline void gpokell (unsigned offset, unsigned long long value)
  {
  offset += UI.parameter.base_address;
  *(unsigned long long *)offset = value;
  }

#if !(USER_SUPPORT & VESA_2WINDOWS)
extern inline unsigned char gxchgb (unsigned offset, unsigned char val)
  {
  unsigned char tmp;

  offset += UI.parameter.base_address;
  /* dummy (but forced) read for EGA, then write: */
  tmp = *(volatile unsigned char *)offset;
  *(unsigned char *)offset = val;
  return tmp;
  }
#endif

#else /* !ASSEMBLY_DSES && !defined (USE_TLS) */

#ifndef RETURN32BITS
extern inline unsigned char gpeekb (unsigned offset)
  {
  unsigned char returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movb %%gs:%1,%0 "
	: "=q" (returned)
	: "m" (*(unsigned char *)offset), "m" (*video_memory)
	);

  return returned;
  }

extern inline unsigned short gpeekw (unsigned offset)
  {
  unsigned short returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movw %%gs:%1,%0 "
	: "=q" (returned)
	: "m" (*(unsigned short *)offset), "m" (*video_memory)
	);

  return returned;
  }

#else /* RETURN32BITS */

extern inline unsigned gpeekb (unsigned offset)
  {
  unsigned returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movzbl %%gs:%1,%0 "
	: "=r" (returned)
	: "m" (*(unsigned char *)offset), "m" (*video_memory)
	);

  return returned;
  }

extern inline unsigned gpeekw (unsigned offset)
  {
  unsigned returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movzwl %%gs:%1,%0 "
	: "=r" (returned)
	: "m" (*(unsigned short *)offset), "m" (*video_memory)
	);

  return returned;
  }
#endif /* RETURN32BITS */

extern inline unsigned gpeekl (unsigned offset)
  {
  unsigned returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movl %%gs:%1,%0 "
	: "=r" (returned)
	: "m" (*(unsigned *)offset), "m" (*video_memory)
	);

  return returned;
  }

extern inline unsigned long long gpeekll (unsigned offset)
  {
  unsigned long long returned;

#if USER_SUPPORT & VESA_2WINDOWS
  offset += UI.parameter.readdelta;
#endif

  asm (" movl %%gs:%1,%%eax \n movl %%gs:%2,%%edx "
	: "=A" (returned)
	: "m" (*(unsigned *)offset), "m" (*(((unsigned *)offset) + 1)), "m" (*video_memory)
	);

  return returned;
  }

extern inline void gpokeb (unsigned offset, unsigned char value)
  {
  asm (" movb %1,%%gs:%2 "
	: "=m" (*video_memory)
	: "qi" (value), "m" (*(unsigned char *)offset)
	);
  }

extern inline void gORb (unsigned offset, unsigned char value)
  {
  asm (" orb %1,%%gs:%2 "
	: "=m" (*video_memory)
	: "qi" (value), "m" (*(unsigned char *)offset)
	);
  }

extern inline void gANDb (unsigned offset, unsigned char value)
  {
  asm (" andb %1,%%gs:%2 "
	: "=m" (*video_memory)
	: "qi" (value), "m" (*(unsigned char *)offset)
	);
  }

extern inline void gpokew (unsigned offset, unsigned short value)
  {
  asm (" movw %1,%%gs:%2 "
	: "=m" (*video_memory)
	: "qi" (value), "m" (*(unsigned short *)offset)
	);
  }

extern inline void gpokel (unsigned offset, unsigned value)
  {
  asm (" movl %1,%%gs:%2 "
	: "=m" (*video_memory)
	: "ri" (value), "m" (*(unsigned *)offset)
	);
  }

extern inline void gpokell (unsigned offset, unsigned long long value)
  {
  asm (" movl %%eax,%%gs:%2 \n movl %%edx,%%gs:%3 "
	: "=m" (*video_memory)
	: "A" (value), "m" (*(unsigned *)offset), "m" (*(((unsigned *)offset) + 1))
	);
  }

#if !(USER_SUPPORT & VESA_2WINDOWS)
extern inline unsigned char gxchgb (unsigned offset, unsigned char val)
  {
  asm (" xchgb %0,%%gs:%2	# dummy read for EGA, then write "
	: "+q" (val), "=m" (*video_memory)
	: "m" (*(unsigned char *)offset)
	);

  return val;
  }
#endif

#endif /* ASSEMBLY_DSES */

/**
 ** Real complex primitives:
 **/
extern inline unsigned
rep_gpokel (unsigned nb, unsigned offset, unsigned winadr, unsigned value)
  {
  offset -= winadr;
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++;
  while (--nb) {
      gpokel (offset, value);
      offset += 4;
      }
#elif ASSEMBLY_TYPE == ASSEMBLY_DSES

  offset += UI.parameter.base_address;

  asm (" cld ; rep stosl %3,%a1 "
	: "=m" (*video_memory), "+D" (offset), "+c" (nb)
	: "a" (value)
	: "cc");

  offset -= UI.parameter.base_address;

#elif ASSEMBLY_TYPE == ASSEMBLY_RISC
  /*
   * 80x86 doc: cannot overwrite segment %%es -> %%gs in:
   * " cld \n rep stosl %1,%%gs:(%2) " : : "a" (value), "D" (offset), ...
   */
  asm (
"    .L%=_startloop:			\n"
"	movl	%3,%%gs:%a0		\n"
	ADD_4_TO_REG (%0)
"	loopl	.L%=_startloop		\n"
	: "+r" (offset), "+c" (nb), "=m" (*video_memory)
	: "ri" (value)
	: "cc");
#elif ASSEMBLY_TYPE == ASSEMBLY_CISC
  {
  unsigned tmp = offset - 4; /* movl before decrement of %cx */

  offset += 4 * nb;
  asm (
"    .L%=_startloop:						\n"
"	movl	%3,%%gs:(%2,%%ecx,4)	# rep do not work	\n"
"	loopl	.L%=_startloop					\n"
	: "+c" (nb), "=m" (*video_memory)
	: "r" (tmp), "ri" (value)
	: "cc");
  }
#endif
  return offset + winadr;
  }

extern inline unsigned
rep_g3pokel (unsigned nb, unsigned offset, unsigned winadr, unsigned value)
  {
  unsigned char remainder, index;
  unsigned color;

#if USER_SUPPORT & VESA_SUPPORT
  if (UI.parameter.linelength > UI.parameter.width * 3)
      index = (offset % UI.parameter.linelength) % 3;
    else
#endif
      index = offset % 3;
  index *= 8;
  color = (value << (24 - index)) | (value >> index);

  /* Align the loop main write on 32 bits boundary: */
  index = MIN (nb, offset % 4);
  nb -= index;
  offset -= winadr;
  if (index & 1) {
      gpokeb (offset, color);
      offset++;
      color >>= 8;
      color |= (color << 24);
      }
  if (index > 1) {
      gpokew (offset, color);
      offset += 2;
      color <<= 8;
      color |= (color >> 24);
      }

  remainder = nb % 4;
  nb /= 4;
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++; while (USUAL(--nb)) {
      gpokel (offset, color);
      offset += 4;
      color >>= 8;
      color |= (color << 24);
      }
#else
  if (nb) {
      unsigned tmp;

#if ASSEMBLY_TYPE == ASSEMBLY_DSES
      offset += UI.parameter.base_address;
#endif
      asm (
"    .L%=_startloop:		\n"
"	movl	%1,%%gs:(%2)	\n"
"	shrl	$8,%1		\n"
	ADD_4_TO_REG (%2)
"	movl	%1,%3		\n"
"	sall	$24,%3		\n"
"	orl	%3,%1		\n"
"	loopl	.L%=_startloop	\n"
	: "+c" (nb), "+r" (color), "+r" (offset),
	  "=&r" (tmp), "=m" (*video_memory)
	: : "cc");
#if ASSEMBLY_TYPE == ASSEMBLY_DSES
      offset -= UI.parameter.base_address;
#endif
      }
#endif

  /* Fill in the last bytes: */
  if (remainder > 1) {
      gpokew (offset, color);
      offset += 2;
      color >>= 16;
      }
  if (remainder & 1)
      gpokeb (offset++, color);

  return offset + winadr;
  }

extern inline unsigned /* memory -> graphic */
rep_movel_tog (unsigned dst, unsigned *src, unsigned nb)
  {
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++;
  while (--nb) {
      gpokel (dst, *src++);
      dst += 4;
      }
#elif ASSEMBLY_TYPE == ASSEMBLY_DSES
  unsigned tmpdst = dst;

  tmpdst += UI.parameter.base_address;

  dst += 4 * nb;
  asm (
"	cld				\n"
"	rep movsl %a0,%%es:%a1		\n"
"	addr32 nop			\n"
	: "+S" (src), "+D" (tmpdst), "+c" (nb), "=m" (*video_memory)
	: "m" (*memory)
	: "cc");
#elif ASSEMBLY_TYPE == ASSEMBLY_RISC

#ifndef USE_PUSHPOP
  unsigned tmp;
  asm (
"    .L%=_startloop:			\n"
"	movl	%a0,%3			\n"
	ADD_4_TO_REG (%0)
"	movl	%3,%%gs:%a1		\n"
	ADD_4_TO_REG (%1)
"	loopl	.L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=&r" (tmp), "=m" (*video_memory)
	: "m" (*memory)
	: "cc");
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	%a0			\n"
"	popl	%%gs:%a1		\n"
	ADD_4_TO_REG (%0)
	ADD_4_TO_REG (%1)
"	loopl	.L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=m" (*video_memory)
	: "m" (*memory)
	: "cc");
#endif

#elif ASSEMBLY_TYPE == ASSEMBLY_CISC
  unsigned tmpsrc = (unsigned)src - 4, tmpdst = dst - 4;

  dst += 4 * nb;
#ifndef USE_PUSHPOP
  {
  unsigned tmp;

  asm (
"    .L%=_startloop:			\n"
"	movl	(%3,%%ecx,4),%1		\n"
"	movl	%1,%%gs:(%4,%%ecx,4)	\n"
"	loopl	.L%=_startloop		\n"
	: "+c" (nb), "=&r" (tmp), "=m" (*video_memory)
	: "r" (tmpsrc), "r" (tmpdst), "m" (*memory)
	: "cc");
  }
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	(%2,%%ecx,4)		\n"
"	popl	%%gs:(%3,%%ecx,4)	\n"
"	loopl	.L%=_startloop		\n"
	: "+c" (nb), "=m" (*video_memory)
	: "r" (tmpsrc), "r" (tmpdst), "m" (*memory)
	: "cc");
#endif

#endif
  return dst;
  }

extern inline unsigned /* graphic -> memory */
rep_movel_fromg (unsigned *dst, unsigned src, unsigned nb)
  {
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++;
  while (--nb) {
      *dst++ = gpeekl (src);
      src += 4;
      }
#elif ASSEMBLY_TYPE == ASSEMBLY_DSES
  unsigned tmpsrc = src;

  tmpsrc += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  tmpsrc += UI.parameter.readdelta;
#endif

  src += 4 * nb;
  asm (
"	cld				\n"
"	rep movsl %a1,%%es:%a2		\n"
"	addr32 nop			\n"
	: "+S" (tmpsrc), "+D" (dst), "+c" (nb), "=m" (memory)
	: "m" (*video_memory)
	: "cc");

#elif ASSEMBLY_TYPE == ASSEMBLY_RISC

#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

#ifndef USE_PUSHPOP
  {
  unsigned tmp;

  asm (
"    .L%=_startloop:			\n"
"	movl    %%gs:%a0,%3		\n"
	ADD_4_TO_REG (%0)
"	movl    %3,%a1			\n"
	ADD_4_TO_REG (%1)
"	loopl   .L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=&r" (tmp), "=m" (memory)
	: "m" (*video_memory)
	: "cc");
  }
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	%%gs:%a0		\n"
"	popl	%a1			\n"
	ADD_4_TO_REG (%0)
	ADD_4_TO_REG (%1)
"	loopl   .L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=m" (memory)
	: "m" (*video_memory)
	: "cc");
#endif

#if USER_SUPPORT & VESA_2WINDOWS
  src -= UI.parameter.readdelta;
#endif

#elif ASSEMBLY_TYPE == ASSEMBLY_CISC
  unsigned tmpsrc = src - 4, tmpdst = (unsigned)dst - 4;

  src += 4 * nb;

#if USER_SUPPORT & VESA_2WINDOWS
  tmpsrc += UI.parameter.readdelta;
#endif

#ifndef USE_PUSHPOP
  {
  unsigned tmp;

  asm (
"    .L%=_startloop:			\n"
"	movl	%%gs:(%3,%%ecx,4),%1	\n"
"	movl	%1,(%4,%%ecx,4)		\n"
"	loopl	.L%=_startloop		\n"
	: "+c" (nb), "=&r" (tmp), "=m" (*memory)
	: "r" (tmpsrc), "r" (tmpdst), "m" (*video_memory)
	: "cc");
  }
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	%%gs:(%2,%%ecx,4)	\n"
"	popl	(%3,%%ecx,4)		\n"
"	loopl	.L%=_startloop		\n"
	: "+c" (nb), "=m" (*memory)
	: "r" (tmpsrc), "r" (tmpdst), "m" (*video_memory)
	: "cc");
#endif

#endif
  return src;
  }

extern inline void /* graphic -> graphic */
rep_moveb_gtog (unsigned dst, unsigned src, unsigned nb)
  {
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++;
  while (--nb)
      gpokeb (dst++, gpeekb (src++));

#elif ASSEMBLY_TYPE == ASSEMBLY_DSES

  src += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

  dst += UI.parameter.base_address;

  asm (
"	cld				\n"
"	rep movsb %a0,%%es:%a1		\n"
"	addr32 nop			\n"
	: "+S" (src), "+D" (dst), "+c" (nb), "=m" (*video_memory)
	: "m" (*video_memory)
	: "cc");
#elif ASSEMBLY_TYPE == ASSEMBLY_RISC
  unsigned char tmp;

#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

  asm (
"    .L%=_startloop:			\n"
"	movb    %%gs:%a0,%3		\n"
"	incl	%0			\n"
"	movb    %3,%%gs:%a1		\n"
"	incl	%1			\n"
"	loopl   .L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=&q" (tmp), "=m" (*video_memory)
	: "m" (*video_memory)
	: "cc");

#elif ASSEMBLY_TYPE == ASSEMBLY_CISC
  /* need to copy in the right order, from low memory to high memory! */
  unsigned cpt = 0;
  unsigned char tmp;

#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

  asm (
"    .L%=_startloop:			\n"
"	movb    %%gs:(%4,%1),%2		\n"
"	movb    %2,%%gs:(%5,%1)		\n"
"	incl	%1			\n"
"	loopl   .L%=_startloop		\n"
	: "+c" (nb), "+r" (cpt), "=&q" (tmp), "=m" (*video_memory)
	: "r" (src), "r" (dst), "m" (*video_memory)
	: "cc");
#endif
  }

extern inline void /* graphic -> graphic */
rep_movel_gtog (unsigned dst, unsigned src, unsigned nb)
  {
#if ASSEMBLY_TYPE == ASSEMBLY_NONE || defined (USE_TLS)
  nb++;
  while (--nb) {
      gpokel (dst, gpeekl (src));
      dst += 4;
      src += 4;
      }

#elif ASSEMBLY_TYPE == ASSEMBLY_DSES

  src += UI.parameter.base_address;
#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

  dst += UI.parameter.base_address;

  asm (
"	cld				\n"
"	rep movsl %a0,%%es:%a1		\n"
"	addr32 nop			\n"
	: "+S" (src), "+D" (dst), "+c" (nb), "=m" (*video_memory)
	: "m" (*video_memory)
	: "cc");
#elif ASSEMBLY_TYPE == ASSEMBLY_RISC

#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

#ifndef USE_PUSHPOP
  {
  unsigned tmp;

  asm (
"    .L%=_startloop:			\n"
"	movl    %%gs:%a0,%3		\n"
	ADD_4_TO_REG(%0)
"	movl    %3,%%gs:%a1		\n"
	ADD_4_TO_REG(%1)
"	loopl   .L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=&r" (tmp), "=m" (*video_memory)
	: "m" (*video_memory)
	: "cc");
  }
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	%%gs:%a0		\n"
"	popl	%%gs:%a1		\n"
	ADD_4_TO_REG(%0)
	ADD_4_TO_REG(%1)
"	loopl   .L%=_startloop		\n"
	: "+r" (src), "+r" (dst), "+c" (nb), "=m" (*video_memory)
	: "m" (*video_memory)
	: "cc");

#endif

#elif ASSEMBLY_TYPE == ASSEMBLY_CISC
  /* need to copy in the right order, from low memory to high memory! */
  unsigned cpt = 0;

#if USER_SUPPORT & VESA_2WINDOWS
  src += UI.parameter.readdelta;
#endif

#ifndef USE_PUSHPOP
  {
  unsigned tmp;

  asm (
"    .L%=_startloop:			\n"
"	movl    %%gs:(%4,%1),%2		\n"
"	movl    %2,%%gs:(%5,%1)		\n"
	ADD_4_TO_REG(%1)
"	loopl   .L%=_startloop		\n"
	: "+c" (nb), "+r" (cpt), "=&r" (tmp), "=m" (*video_memory)
	: "r" (src), "r" (dst), "m" (*video_memory)
	: "cc");
  }
#else
  asm (
"    .L%=_startloop:			\n"
"	pushl	%%gs:(%3,%1)		\n"
"	popl	%%gs:(%4,%1)		\n"
	ADD_4_TO_REG(%1)
"	loopl   .L%=_startloop		\n"
	: "+c" (nb), "+r" (cpt), "=m" (*video_memory)
	: "r" (src), "r" (dst), "m" (*video_memory)
	: "cc");
#endif

#endif
  }

#endif /* GMEM_H */

