/* ior.c - inclusive-or two unsigned integers

   AUTHOR: Gregory Pietsch
   
   DESCRIPTION:
   
   This function inclusive-ors a to b and stores result in c.

   This file has been put into the public domain by its author.
   
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   OTHER DEALINGS IN THE SOFTWARE.
*/

#include "ordering.h"
#include "mpi.h"

#ifndef _OPTIMIZED_FOR_SIZE

/* Nonzero if either X, Y, or Z is not aligned on an "unsigned long" 
   boundary.  */
#ifdef _ALIGN
#define UNALIGNED(X,Y,Z) \
  (((unsigned long)X&(sizeof(unsigned long)-1))\
  |((unsigned long)Y&(sizeof(unsigned long)-1))\
  |((unsigned long)Z&(sizeof(unsigned long)-1)))
#else
#define UNALIGNED(X,Y,Z) (0)
#endif

/* How many bytes are operated on each interation of the word loop.  */
#define LITTLEBLOCKSIZE (sizeof(unsigned long))

/* How many bytes are operated on each interation of the 4X unrolled loop.  */
#define BIGBLOCKSIZE (sizeof(unsigned long)<<2)

/* Threshhold for punting to the byte loop.  */
#define TOO_SMALL(len) ((len)<BIGBLOCKSIZE)

#endif /* _OPTIMIZED_FOR_SIZE */

_MPI_T
_Ior (_MPI_T c, _MPI_T a, _MPI_T b, int prec)
{
  unsigned char *usc = c, *usa = a, *usb = b;

#ifndef _OPTIMIZED_FOR_SIZE
  unsigned long *pc, *pa, *pb;

  /* If the size is small, or either usc, usa, or usb is unaligned, punt 
     into the byte loop.  This should be rare.  */
  if (!TOO_SMALL (prec) && !UNALIGNED (usc, usa, usb))
    {
      pc = (unsigned long *) usc;
      pa = (unsigned long *) usa;
      pb = (unsigned long *) usb;

      /* Operate on a big block at a time if possible. */
      while (prec >= BIGBLOCKSIZE)
	{
	  *pc++ = *pa++ | *pb++;
	  *pc++ = *pa++ | *pb++;
	  *pc++ = *pa++ | *pb++;
	  *pc++ = *pa++ | *pb++;
	  prec -= BIGBLOCKSIZE;
	}

      /* Operate on a little block at a time if possible. */
      while (prec >= LITTLEBLOCKSIZE)
	{
	  *pc++ = *pa++ | *pb++;
	  prec -= LITTLEBLOCKSIZE;
	}

      /* Pick up any residual with the byte loop.  */
      usc = (unsigned char *) pc;
      usa = (unsigned char *) pa;
      usb = (unsigned char *) pb;
    }
#endif

  while (prec--)    
    *usc++ = *usa++ | *usb++;
  return c;
}

#ifndef _OPTIMIZED_FOR_SIZE
#undef UNALIGNED
#undef LITTLEBLOCKSIZE
#undef BIGBLOCKSIZE
#undef TOO_SMALL
#endif /* _OPTIMIZED_FOR_SIZE */

/* END OF FILE */
